In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Function to fetch historical stock data
def fetch_stock_data(symbol, start_date, end_date):
    stock_data = yf.download(symbol, start=start_date, end=end_date)
    return stock_data

# Function to generate features for breakout detection
def generate_features(data):
    data['SMA50'] = data['Close'].rolling(window=50).mean()
    data['SMA200'] = data['Close'].rolling(window=200).mean()
    data['Price_vs_SMA50'] = data['Close'] - data['SMA50']
    data['Price_vs_SMA200'] = data['Close'] - data['SMA200']
    data['Volume_vs_SMA50'] = data['Volume'].rolling(window=50).mean()
    data['Volume_vs_SMA200'] = data['Volume'].rolling(window=200).mean()
    data['Breakout'] = (data['Price_vs_SMA50'] > 0) & (data['Price_vs_SMA200'] > 0) & (data['Volume_vs_SMA50'] > 0) & (data['Volume_vs_SMA200'] > 0)
    return data.dropna()

# Fetch historical data for a stock (e.g., AAPL) from Yahoo Finance
symbol = 'AAPL'
start_date = '2020-01-01'
end_date = '2022-01-01'
stock_data = fetch_stock_data(symbol, start_date, end_date)

# Generate features for breakout detection
processed_data = generate_features(stock_data)

# Split the data into features (X) and target (y)
X = processed_data[['SMA50', 'SMA200', 'Price_vs_SMA50', 'Price_vs_SMA200', 'Volume_vs_SMA50', 'Volume_vs_SMA200']]
y = processed_data['Breakout'].astype(int)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a decision tree classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Make predictions on the test set
predictions = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy: {accuracy * 100:.2f}%")


In [None]:
import re
from nltk.corpus import stopwords
from transformers import BertTokenizer

# Load BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def clean_tweet(text):
    # Lowercasing
    text = text.lower()
    
    # Remove URLs
    text = re.sub(r'http\S+', '', text)
    
    # Remove mentions and hashtags
    text = re.sub(r'@[\w]+', '', text)
    text = re.sub(r'#', '', text)
    
    # Remove special characters, numbers, and punctuation
    text = re.sub(r'[^a-z\s]', '', text)
    
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    text = ' '.join([word for word in text.split() if word not in stop_words])
    
    return text

# Example tweet
tweet = "Great article on #Python programming! @user123 http://example.com #coding"

# Clean the tweet
cleaned_tweet = clean_tweet(tweet)

# Tokenize the cleaned tweet using BERT tokenizer
tokenized_tweet = tokenizer(cleaned_tweet, return_tensors='pt')

print("Original Tweet:", tweet)
print("Cleaned Tweet:", cleaned_tweet)
print("Tokenized Tweet:", tokenized_tweet)


In [None]:
from transformers import BertTokenizer, BertForSequenceClassification
from torch.nn import functional as F
import torch

# Load pre-trained BERT model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)

# Example sentence
text = "I enjoyed the movie. It was fantastic!"

# Tokenize and convert to tensor
inputs = tokenizer(text, return_tensors='pt')
outputs = model(**inputs)

# Get predicted logits
logits = outputs.logits

# Apply softmax to get probabilities
probs = F.softmax(logits, dim=1)

# Convert probabilities to predicted sentiment (positive or negative)
predicted_sentiment = torch.argmax(probs, dim=1).item()

# Print the result
print(f"Predicted Sentiment: {'Positive' if predicted_sentiment == 1 else 'Negative'}")
