In [1]:
import yfinance as yf
import pandas as pd

In [3]:
selected_stocks = ["BAJFINANCE.NS", "HDFCAMC.NS", "ASIANPAINT.NS", "TCS.NS", "DRREDDY.NS"]



In [4]:
# Updated duration of data: 10 years
start_date = "2014-01-01"
end_date = "2024-01-01"

# Dataframe to store the collected data
stock_data = {}

In [None]:
# Fetching data for each selected stock
for stock in selected_stocks:
    stock_data[stock] = yf.download(stock, start=start_date, end=end_date)

In [6]:
import numpy as np

In [7]:
for stock, df in stock_data.items():
    # Checking for missing values and filling them
    df.fillna(method='ffill', inplace=True)  # forward fill for missing values



    # Calculating log returns
    df['log_return'] = np.log(df['Adj Close'] / df['Adj Close'].shift(1))

In [8]:
def calculate_RSI(data, window=14):
    """ Calculate Relative Strength Index (RSI) """
    delta = data.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

In [9]:
def calculate_MACD(data, n_fast=12, n_slow=26, n_signal=9):
    """ Calculate Moving Average Convergence Divergence (MACD) """
    exp1 = data.ewm(span=n_fast, adjust=False).mean()
    exp2 = data.ewm(span=n_slow, adjust=False).mean()
    macd = exp1 - exp2
    signal = macd.ewm(span=n_signal, adjust=False).mean()
    return macd, signal, macd - signal

In [10]:
def calculate_BB(data, window=20, num_std_dev=2):
    """ Calculate Bollinger Bands """
    mean = data.rolling(window=window).mean()
    std_dev = data.rolling(window=window).std()
    upper_band = mean + (std_dev * num_std_dev)
    lower_band = mean - (std_dev * num_std_dev)
    return upper_band, mean, lower_band

In [11]:
# Apply calculations to each stock
for stock, df in stock_data.items():
    # RSI
    df['RSI'] = calculate_RSI(df['Adj Close'])

    # MACD
    df['MACD'], df['MACD_signal'], df['MACD_hist'] = calculate_MACD(df['Adj Close'])

    # Bollinger Bands
    df['Upper_BB'], df['Middle_BB'], df['Lower_BB'] = calculate_BB(df['Adj Close'])

In [12]:
def calculate_fibonacci_retracement(data):
    """ Calculate Fibonacci Retracement levels """
    max_price = data.max()
    min_price = data.min()
    difference = max_price - min_price
    first_level = max_price - difference * 0.236
    second_level = max_price - difference * 0.382
    third_level = max_price - difference * 0.5
    fourth_level = max_price - difference * 0.618
    return first_level, second_level, third_level, fourth_level

In [13]:
for stock, df in stock_data.items():

    levels = calculate_fibonacci_retracement(df['Adj Close'])
    df['Fibonacci_Level_1'], df['Fibonacci_Level_2'], df['Fibonacci_Level_3'], df['Fibonacci_Level_4'] = levels

In [14]:
# Fetch Nifty 50 data
nifty_data = yf.download("^NSEI", start=start_date, end=end_date)

[*********************100%%**********************]  1 of 1 completed


In [15]:
# Calculating a simple moving average for Nifty 50 as a trend indicator
nifty_data['Nifty_50_SMA'] = nifty_data['Adj Close'].rolling(window=50).mean()

In [16]:
# Merging Nifty trend data with each stock data
for stock, df in stock_data.items():
    df = df.join(nifty_data['Nifty_50_SMA'], on='Date', how='left')

In [17]:
import requests
from bs4 import BeautifulSoup
from textblob import TextBlob
import pandas as pd

In [18]:

# Define the stocks and their corresponding Yahoo News URLs
stocks_urls = {
    "BAJFINANCE.NS": "https://news.yahoo.com/stock/BAJFINANCE.NS",
    "HDFCAMC.NS": "https://news.yahoo.com/stock/HDFCAMC.NS",
    "ASIANPAINT.NS": "https://news.yahoo.com/stock/ASIANPAINT.NS",
    "TCS.NS": "https://news.yahoo.com/stock/TCS.NS",
    "DRREDDY.NS": "https://news.yahoo.com/stock/DRREDDY.NS"
}

In [19]:
# Function to scrape news headlines
def scrape_headlines(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    headlines = soup.find_all('h3')  # Assuming headlines are in <h3> tags
    return [headline.get_text() for headline in headlines]

In [32]:
# Function to analyze sentiment of headlines
def analyze_sentiment(headlines):
    sentiment_scores = []
    for headline in headlines:
        analysis = TextBlob(headline)
        sentiment_scores.append(analysis.sentiment.polarity)
    return sentiment_scores

In [36]:
# Adding Nifty 50 SMA to each stock's DataFrame
for stock, df in stock_data.items():
    df['Nifty_50_SMA'] = nifty_data['Adj Close'].rolling(window=50).mean()

In [38]:
# Ensure all relevant features including the newly added ones are present
X = combined_data[['RSI', 'MACD', 'Upper_BB', 'Lower_BB', 'Fibonacci_Level_1', 'Nifty_50_SMA']]

In [37]:
# Combining all stock data into a single DataFrame
combined_data = pd.concat(stock_data.values())

In [33]:
# Main process
stock_sentiments = {}
for stock, url in stocks_urls.items():
    headlines = scrape_headlines(url)
    sentiments = analyze_sentiment(headlines)
    stock_sentiments[stock] = pd.DataFrame({
        'Headline': headlines,
        'Sentiment': sentiments
    })

In [24]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import xgboost as xgb
from keras.models import Sequential
from keras.layers import LSTM, Dense

In [25]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [26]:
# Combining stock data and market trend data
all_stock_data = pd.DataFrame()
for stock, df in stock_data.items():
    df['Stock'] = stock  # Add a column to identify the stock
    all_stock_data = pd.concat([all_stock_data, df])

In [27]:
# Calculate weekly returns and identify best performers
all_stock_data['Week_Number'] = all_stock_data.index.week
weekly_best_performers = all_stock_data.groupby('Week_Number').apply(lambda x: x['Adj Close'].pct_change().idxmax())


  all_stock_data['Week_Number'] = all_stock_data.index.week


In [39]:
import numpy as np

# Assuming each DataFrame in stock_data has necessary columns for calculations
for stock, df in stock_data.items():
    # Example calculations
    df['Volatility'] = df['Adj Close'].pct_change().rolling(window=30).std() * np.sqrt(252)  # Annualized volatility
    df['Weekly_Return'] = df['Adj Close'].pct_change(periods=5)
    df['Positive_Indicators'] = (df['RSI'] > 50).astype(int) + (df['MACD'] > df['MACD_signal']).astype(int)  # etc.

    # Risk-Reward Ratio (Sharpe Ratio, adjust as needed)
    df['Risk_Reward'] = df['Weekly_Return'] / df['Volatility']

    # Composite Score ( adjust weights as needed)
    df['Composite_Score'] = 0.4 * df['Weekly_Return'] + 0.3 * df['Risk_Reward'] + 0.3 * df['Positive_Indicators']
    df['Week_Number'] = df.index.isocalendar().week

# Combine all individual stock DataFrames
combined_data = pd.concat(stock_data.values())

# Identify the best performer each week based on the highest composite score
combined_data['Best_Performer'] = combined_data.groupby('Week_Number')['Composite_Score'].transform(lambda x: x == x.max())
combined_data['Best_Performer'] = combined_data['Best_Performer'].astype(int)

# Your target variable
y = combined_data['Best_Performer']


In [45]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Imputing NaN values with the mean of the column
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

# Now, scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, shuffle=False)


In [46]:
# Reshaping data for LSTM
X_train_reshaped = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test_reshaped = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))


In [43]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
import xgboost as xgb
from keras.models import Sequential
from keras.layers import LSTM, Dense

In [47]:
# Gradient Boosting
gb_model = GradientBoostingClassifier()
gb_model.fit(X_train, y_train)

In [48]:
# Random Forest
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

In [49]:
# XGBoost
xgb_model = xgb.XGBClassifier()
xgb_model.fit(X_train, y_train)

In [50]:
# LSTM
lstm_model = Sequential()
lstm_model.add(LSTM(50, activation='relu', input_shape=(1, X_train.shape[1])))
lstm_model.add(Dense(1, activation='sigmoid'))
lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
lstm_model.fit(X_train_reshaped, y_train, epochs=100, batch_size=32)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7a92b88f8970>

In [51]:
from sklearn.metrics import classification_report

# Evaluate GB, RF, XGB
for model in [gb_model, rf_model, xgb_model]:
    predictions = model.predict(X_test)
    print(model.__class__.__name__)
    print(classification_report(y_test, predictions))

# Evaluate LSTM
lstm_predictions = lstm_model.predict(X_test_reshaped)
lstm_predictions = (lstm_predictions > 0.5).astype(int)  # Assuming binary classification
print("LSTM Model")
print(classification_report(y_test, lstm_predictions))


GradientBoostingClassifier
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2229
           1       0.00      0.00      0.00        10

    accuracy                           0.99      2239
   macro avg       0.50      0.50      0.50      2239
weighted avg       0.99      0.99      0.99      2239

RandomForestClassifier
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2229
           1       0.00      0.00      0.00        10

    accuracy                           1.00      2239
   macro avg       0.50      0.50      0.50      2239
weighted avg       0.99      1.00      0.99      2239

XGBClassifier
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2229
           1       0.00      0.00      0.00        10

    accuracy                           0.99      2239
   macro avg       0.50      0.50      0.50      2239
weighted a

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


LSTM Model
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2229
           1       0.00      0.00      0.00        10

    accuracy                           1.00      2239
   macro avg       0.50      0.50      0.50      2239
weighted avg       0.99      1.00      0.99      2239



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [52]:
from imblearn.over_sampling import SMOTE

# Resample the training data
smote = SMOTE()
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)


In [53]:
# Gradient Boosting with resampled data
gb_model.fit(X_train_resampled, y_train_resampled)

# Random Forest with class weights
rf_model = RandomForestClassifier(class_weight='balanced')
rf_model.fit(X_train_resampled, y_train_resampled)

# XGBoost with scale_pos_weight parameter
scale_pos_weight = sum(y_train == 0) / sum(y_train == 1)
xgb_model = xgb.XGBClassifier(scale_pos_weight=scale_pos_weight)
xgb_model.fit(X_train_resampled, y_train_resampled)


In [54]:
# Evaluate XGBoost
xgb_predictions = xgb_model.predict(X_test)
print("XGBoost Classifier Evaluation")
print(classification_report(y_test, xgb_predictions))

XGBoost Classifier Evaluation
              precision    recall  f1-score   support

           0       1.00      0.91      0.95      2229
           1       0.03      0.70      0.07        10

    accuracy                           0.91      2239
   macro avg       0.52      0.81      0.51      2239
weighted avg       0.99      0.91      0.95      2239



In [55]:
from sklearn.metrics import precision_recall_curve

# Get probability estimates for class 1
probabilities = gb_model.predict_proba(X_test)[:, 1]

# Calculate precision-recall pairs for different threshold values
precisions, recalls, thresholds = precision_recall_curve(y_test, probabilities)

# Select a threshold that balances precision and recall according to your needs
# This is an illustrative example; choose a threshold based on your specific requirements
selected_threshold = thresholds[np.argmax(precisions >= 0.05)]  # Example threshold criteria
adjusted_predictions = (probabilities >= selected_threshold).astype(int)

# Evaluate with the new threshold
print("Adjusted Gradient Boosting Classifier Evaluation")
print(classification_report(y_test, adjusted_predictions))


Adjusted Gradient Boosting Classifier Evaluation
              precision    recall  f1-score   support

           0       1.00      0.97      0.98      2229
           1       0.05      0.40      0.09        10

    accuracy                           0.96      2239
   macro avg       0.52      0.68      0.54      2239
weighted avg       0.99      0.96      0.98      2239



In [56]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Bidirectional
from sklearn.model_selection import train_test_split
import numpy as np

# Data preparation as before
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, shuffle=False)
X_train_reshaped = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test_reshaped = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Building an advanced LSTM model
lstm_model = Sequential()
lstm_model.add(Bidirectional(LSTM(50, return_sequences=True, input_shape=(1, X_train.shape[1]))))
lstm_model.add(Dropout(0.2))
lstm_model.add(LSTM(50, return_sequences=False))
lstm_model.add(Dropout(0.2))
lstm_model.add(Dense(1, activation='sigmoid'))

lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Training the model
lstm_model.fit(X_train_reshaped, y_train, epochs=100, batch_size=32)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7a92a9e17790>

In [57]:
from sklearn.metrics import classification_report, accuracy_score

# Predicting with the LSTM model
lstm_predictions = lstm_model.predict(X_test_reshaped)

# Since we're doing binary classification,  might want to convert probabilities to binary predictions
# can adjust the threshold based on y specific needs (default is 0.5)
lstm_predictions_binary = (lstm_predictions > 0.5).astype(int)

# Evaluating the model
print("LSTM Model Performance:")
print(classification_report(y_test, lstm_predictions_binary))
print("Accuracy:", accuracy_score(y_test, lstm_predictions_binary))


LSTM Model Performance:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2229
           1       0.00      0.00      0.00        10

    accuracy                           1.00      2239
   macro avg       0.50      0.50      0.50      2239
weighted avg       0.99      1.00      0.99      2239

Accuracy: 0.9955337204108977


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [58]:
from sklearn.utils import class_weight

# Calculate class weights
class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = dict(enumerate(class_weights))

# Use these class weights in model.fit
lstm_model.fit(X_train_reshaped, y_train, epochs=100, batch_size=32, class_weight=class_weight_dict)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7a92abe320e0>

In [60]:
from sklearn.metrics import classification_report, accuracy_score

# Predicting with the LSTM model
lstm_predictions = lstm_model.predict(X_test_reshaped)
lstm_predictions_binary = (lstm_predictions > 0.5).astype(int)  # Convert probabilities to binary predictions

# Evaluating the model
print("LSTM Model Performance with Class Weights:")
print(classification_report(y_test, lstm_predictions_binary))
print("Accuracy:", accuracy_score(y_test, lstm_predictions_binary))


LSTM Model Performance with Class Weights:
              precision    recall  f1-score   support

           0       1.00      0.85      0.92      2229
           1       0.02      0.80      0.05        10

    accuracy                           0.85      2239
   macro avg       0.51      0.83      0.48      2239
weighted avg       0.99      0.85      0.92      2239

Accuracy: 0.8530594015185351


In [61]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Bidirectional

# Enhanced LSTM Model
lstm_model = Sequential()
lstm_model.add(Bidirectional(LSTM(50, return_sequences=True, input_shape=(1, X_train.shape[1]))))
lstm_model.add(Dropout(0.3))
lstm_model.add(LSTM(100, return_sequences=False))
lstm_model.add(Dropout(0.3))
lstm_model.add(Dense(50, activation='relu'))
lstm_model.add(Dense(1, activation='sigmoid'))

lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [62]:
# Assuming class_weight_dict is already calculated
lstm_model.fit(X_train_reshaped, y_train, epochs=100, batch_size=32, class_weight=class_weight_dict)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7a92b8c285e0>

In [63]:
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# Predicting probabilities
lstm_probabilities = lstm_model.predict(X_test_reshaped)

# Adjusting the decision threshold
threshold = 0.5
lstm_predictions = (lstm_probabilities > threshold).astype(int)

# Evaluating the model
print("Adjusted LSTM Model Performance:")
print(classification_report(y_test, lstm_predictions))
print("Accuracy:", accuracy_score(y_test, lstm_predictions))


Adjusted LSTM Model Performance:
              precision    recall  f1-score   support

           0       1.00      0.78      0.88      2229
           1       0.02      1.00      0.04        10

    accuracy                           0.79      2239
   macro avg       0.51      0.89      0.46      2239
weighted avg       1.00      0.79      0.88      2239

Accuracy: 0.7856185797230907


In [64]:
# Simplified LSTM Model
lstm_model = Sequential()
lstm_model.add(LSTM(50, input_shape=(1, X_train.shape[1])))
lstm_model.add(Dropout(0.2))
lstm_model.add(Dense(1, activation='sigmoid'))

lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [65]:
# Retraining the model with class weights
lstm_model.fit(X_train_reshaped, y_train, epochs=100, batch_size=32, class_weight=class_weight_dict)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7a92a34c7b20>

In [66]:
# Predicting probabilities
lstm_probabilities = lstm_model.predict(X_test_reshaped)

# Experiment with different thresholds
thresholds = [0.4, 0.5, 0.6]  # Example thresholds, adjust as needed

for threshold in thresholds:
    lstm_predictions = (lstm_probabilities > threshold).astype(int)
    print(f"LSTM Model Performance at Threshold {threshold}:")
    print(classification_report(y_test, lstm_predictions))
    print("Accuracy:", accuracy_score(y_test, lstm_predictions))
    print("-------------------------------------------")


LSTM Model Performance at Threshold 0.4:
              precision    recall  f1-score   support

           0       1.00      0.73      0.84      2229
           1       0.01      0.90      0.03        10

    accuracy                           0.73      2239
   macro avg       0.51      0.82      0.44      2239
weighted avg       0.99      0.73      0.84      2239

Accuracy: 0.7324698526127735
-------------------------------------------
LSTM Model Performance at Threshold 0.5:
              precision    recall  f1-score   support

           0       1.00      0.76      0.87      2229
           1       0.02      0.90      0.03        10

    accuracy                           0.76      2239
   macro avg       0.51      0.83      0.45      2239
weighted avg       1.00      0.76      0.86      2239

Accuracy: 0.7641804376953998
-------------------------------------------
LSTM Model Performance at Threshold 0.6:
              precision    recall  f1-score   support

           0       1.0