In [None]:
import pandas as pd
import matplotlib.pyplot as plt

filepath = '/content/drive/MyDrive/stocks/GAIL.csv'
df = pd.read_csv(filepath)


def plot_actual_vs_predicted(ytest, ypred, title):
    plt.figure(figsize=(20, 6))
    plt.plot(ytest.values, label='Actual Values', color='blue', linestyle='--')
    plt.plot(ypred, label='Predicted Values', color='red', linestyle='--')
    plt.xlabel('Index')
    plt.ylabel('Value')
    plt.title(title)
    plt.legend()
    plt.show()

In [None]:

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.multioutput import MultiOutputRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor

stock_data = df.drop(['Date', 'Symbol', 'Series', 'Trades'], axis=1)
stock_data['Close1'] = stock_data['Close'].shift(-1)
stock_data['Close7'] = stock_data['Close'].shift(-7)
stock_data['Close14'] = stock_data['Close'].shift(-14)
stock_data['Close21'] = stock_data['Close'].shift(-21)
stock_data['Close30'] = stock_data['Close'].shift(-30)
stock_data.dropna(inplace=True)

In [None]:
# 1. Lag Feature - help models understand past trends.
for lag in [5,10,20]:
  stock_data[f'lag_{lag}'] = stock_data['Close'].shift(lag)

# 2. Moving Averages - smooth out the volatility and highlight trends.
stock_data['mavg_5'] = stock_data['Close'].rolling(window=5).mean()
stock_data['mavg_20']= stock_data['Close'].rolling(window=20).mean()

# 3. Exponential Moving Averages - Gives more weight to recent data points.
stock_data['EMA_10'] = stock_data['Close'].ewm(span=10, adjust=False).mean()
stock_data['EMA_20'] = stock_data['Close'].ewm(span=20, adjust=False).mean()

# 4. Relative Strength Index (RSI) -  Measures stock momentum.
delta = stock_data['Close'].diff(1)
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
stock_data['RSI_14'] = 100 - (100 / (1 + rs))

# 5. Moving Average Convergence Divergence (MACD): Helps capture momentum and trend direction.
short_ema = stock_data['Close'].ewm(span=12, adjust=False).mean()
long_ema = stock_data['Close'].ewm(span=26, adjust=False).mean()
stock_data['MACD'] = short_ema - long_ema

# 6. Bollinger Bands: Captures price volatility.
stock_data['bollinger_std'] = stock_data['Close'].rolling(window=20).std()
stock_data['bollinger_upper'] = stock_data['mavg_20'] + (stock_data['bollinger_std'] * 2)
stock_data['bollinger_lower'] = stock_data['mavg_20'] - (stock_data['bollinger_std'] * 2)

# Drop rows with NaN values created by lag and rolling features
stock_data.dropna(inplace=True)

In [None]:
stock_data.columns

Index(['Prev Close', 'Open', 'High', 'Low', 'Last', 'Close', 'VWAP', 'Volume',
       'Turnover', 'Deliverable Volume', '%Deliverble', 'Close1', 'Close7',
       'Close14', 'Close21', 'Close30', 'lag_5', 'lag_10', 'lag_20', 'mavg_5',
       'mavg_20', 'EMA_10', 'EMA_20', 'RSI_14', 'MACD', 'bollinger_std',
       'bollinger_upper', 'bollinger_lower'],
      dtype='object')

In [None]:
X = stock_data.drop(columns=['Deliverable Volume', '%Deliverble', 'Close1', 'Close7', 'Close14', 'Close21', 'Close30' ], axis=1)
Y = stock_data[['Close1', 'Close7', 'Close14', 'Close21', 'Close30']]
Y1 = stock_data['Close1']
Y7 = stock_data['Close7']
Y14 = stock_data['Close14']
Y21 = stock_data['Close21']
Y30 = stock_data['Close30']

xtrain, xtest, ytrain, ytest = train_test_split(X,Y, test_size=0.2, shuffle=False)
xtrain, xtest, ytrain1, ytest1 = train_test_split(X,Y1, test_size=0.2, shuffle=False)
xtrain, xtest, ytrain7, ytest7 = train_test_split(X,Y7, test_size=0.2, shuffle=False)
xtrain, xtest, ytrain14, ytest14 = train_test_split(X,Y14, test_size=0.2, shuffle=False)
xtrain, xtest, ytrain21, ytest21 = train_test_split(X,Y21, test_size=0.2, shuffle=False)
xtrain, xtest, ytrain30, ytest30 = train_test_split(X,Y30, test_size=0.2, shuffle=False)

scaler = MinMaxScaler()
xtrain_scaled = scaler.fit_transform(xtrain)
xtest_scaled = scaler.transform(xtest)

In [None]:
models1 = {
    'LR': LinearRegression(),
    'SVM': SVR(),
    'KNN': KNeighborsRegressor(),
    'RF': RandomForestRegressor(n_estimators=150)
}
models7 = {
    'LR': LinearRegression(),
    'SVM': SVR(),
    'KNN': KNeighborsRegressor(),
    'RF': RandomForestRegressor(n_estimators=150)
}
models14 = {
    'LR': LinearRegression(),
    'SVM': SVR(),
    'KNN': KNeighborsRegressor(),
    'RF': RandomForestRegressor(n_estimators=150)
}
models21 = {
    'LR': LinearRegression(),
    'SVM': SVR(),
    'KNN': KNeighborsRegressor(),
    'RF': RandomForestRegressor(n_estimators=150)
}
models30 = {
    'LR': LinearRegression(),
    'SVM': SVR(),
    'KNN': KNeighborsRegressor(),
    'RF': RandomForestRegressor(n_estimators=150)
}

mor_model = MultiOutputRegressor(RandomForestRegressor(n_estimators=150))

for name, model in models1.items():
    model.fit(xtrain_scaled, ytrain1)
for name, model in models7.items():
    model.fit(xtrain_scaled, ytrain7)
for name, model in models14.items():
    model.fit(xtrain_scaled, ytrain14)
for name, model in models21.items():
    model.fit(xtrain_scaled, ytrain21)
for name, model in models30.items():
    model.fit(xtrain_scaled, ytrain30)

mor_model.fit(xtrain_scaled, ytrain)

In [None]:
models1 = {
    'LR': LinearRegression(),
    'SVM': SVR(),
    'KNN': KNeighborsRegressor(),
    'RF': RandomForestRegressor(n_estimators=150)
}

xtrain1, xtest1, ytrain1, ytest1 = train_test_split(X1,Y1, test_size=0.2, shuffle=False)
scaler = MinMaxScaler()
xtrain_scaled1 = scaler.fit_transform(xtrain)
xtest_scaled1 = scaler.transform(xtest)
for name, model in models1.items():
    model.fit(xtrain_scaled1, ytrain1)

pred1 = {}
res1 = {}
print("Prediction for next day:")
for name, model in models1.items():
    pred1[name] = model.predict(xtest_scaled1)
    res1[f'{name}_mse'] = mean_squared_error(ytest1, pred1[name])
    # mae = mean_absolute_error(ytest, pred[name])
    res1[f'{name}_mape'] = mean_absolute_percentage_error(ytest1, pred1[name]) * 100
    res1[f'{name}_r2'] = r2_score(ytest1, pred1[name])
    print(f"  {name} - MSE: {res1[f'{name}_mse']}, MAPE: {res1[f'{name}_mape']},  R2 Score: {res1[f'{name}_r2']}\n")

Prediction for next day:
  LR - MSE: 120.50442205681081, MAPE: 4.748074936215823,  R2 Score: 0.9937458825873979

  SVM - MSE: 1725.416884308452, MAPE: 20.115584959671477,  R2 Score: 0.9104517527575559

  KNN - MSE: 643.8457179999999, MAPE: 11.17298409831866,  R2 Score: 0.96658473898929

  RF - MSE: 86.40500708222211, MAPE: 3.2695258306124506,  R2 Score: 0.9955156246542208



In [None]:

feature_columns = ['Prev Close', 'Open', 'High', 'Low', 'Last', 'Close', 'VWAP', 'Volume',
       'Turnover', 'lag_5', 'lag_10', 'lag_20', 'mavg_5', 'mavg_20', 'EMA_10', 'EMA_20', 'RSI_14', 'MACD', 'bollinger_std',
       'bollinger_upper', 'bollinger_lower']

importance = models1['RF'].feature_importances_
# Create a DataFrame to view feature importance
feature_importance = pd.DataFrame({'Feature': feature_columns, 'Importance': importance})
feature_importance.sort_values(by='Importance', ascending=False, inplace=True)

# Show top 10 most important features
print("\nRandom Forest for 1 Day ahead")
print(feature_importance.head(22))

importance = models7['RF'].feature_importances_
# Create a DataFrame to view feature importance
feature_importance = pd.DataFrame({'Feature': feature_columns, 'Importance': importance})
feature_importance.sort_values(by='Importance', ascending=False, inplace=True)

# Show top 10 most important features
print("\nRandom Forest for 7 Day ahead")
print(feature_importance.head(22))

importance = models14['RF'].feature_importances_
# Create a DataFrame to view feature importance
feature_importance = pd.DataFrame({'Feature': feature_columns, 'Importance': importance})
feature_importance.sort_values(by='Importance', ascending=False, inplace=True)

# Show top 10 most important features
print("\nRandom Forest for 14 Day ahead")
print(feature_importance.head(22))

importance = models21['RF'].feature_importances_
# Create a DataFrame to view feature importance
feature_importance = pd.DataFrame({'Feature': feature_columns, 'Importance': importance})
feature_importance.sort_values(by='Importance', ascending=False, inplace=True)

# Show top 10 most important features
print("\nRandom Forest for 21 Day ahead")
print(feature_importance.head(22))

importance = models30['RF'].feature_importances_
# Create a DataFrame to view feature importance
feature_importance = pd.DataFrame({'Feature': feature_columns, 'Importance': importance})
feature_importance.sort_values(by='Importance', ascending=False, inplace=True)

# Show top 10 most important features
print("\nRandom Forest for 30 Day ahead")
print(feature_importance.head(22))


Random Forest for 1 Day ahead
            Feature  Importance
4              Last    0.522663
5             Close    0.394257
2              High    0.035770
6              VWAP    0.034975
19  bollinger_upper    0.003061
3               Low    0.003012
0        Prev Close    0.001481
13          mavg_20    0.000635
1              Open    0.000624
15           EMA_20    0.000587
12           mavg_5    0.000557
14           EMA_10    0.000465
8          Turnover    0.000272
9             lag_5    0.000261
18    bollinger_std    0.000227
11           lag_20    0.000223
20  bollinger_lower    0.000204
7            Volume    0.000199
17             MACD    0.000198
16           RSI_14    0.000183
10           lag_10    0.000146

Random Forest for 7 Day ahead
            Feature  Importance
4              Last    0.203011
3               Low    0.186160
5             Close    0.177928
12           mavg_5    0.118416
6              VWAP    0.114453
19  bollinger_upper    0.061094
11        

In [None]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.model_selection import cross_val_score
import numpy as np

pred1 = {}
res1 = {}
print("Prediction for next day:")
for name, model in models1.items():
    pred1[name] = model.predict(xtest_scaled)
    res1[f'{name}_mse'] = mean_squared_error(ytest1, pred1[name])
    # mae = mean_absolute_error(ytest, pred[name])
    res1[f'{name}_mape'] = mean_absolute_percentage_error(ytest1, pred1[name]) * 100
    res1[f'{name}_r2'] = r2_score(ytest1, pred1[name])
    print(f"  {name} - MSE: {res1[f'{name}_mse']}, MAPE: {res1[f'{name}_mape']},  R2 Score: {res1[f'{name}_r2']}\n")

pred7 = {}
res7 = {}
print("Prediction for day 7")
for name, model in models7.items():
    pred7[name] = model.predict(xtest_scaled)
    res7[f'{name}_mse'] = mean_squared_error(ytest7, pred7[name])
    res7[f'{name}_mape'] = mean_absolute_percentage_error(ytest7, pred7[name]) * 100
    res7[f'{name}_r2'] = r2_score(ytest7, pred7[name])
    print(f"  {name} - MSE: {res7[f'{name}_mse']}, MAPE: {res7[f'{name}_mape']},  R2 Score: {res7[f'{name}_r2']}\n")

pred14 = {}
res14 = {}
print("Prediction for day 14")
for name, model in models14.items():
    pred14[name] = model.predict(xtest_scaled)
    res14[f'{name}_mse'] = mean_squared_error(ytest14, pred14[name])
    res14[f'{name}_mape'] = mean_absolute_percentage_error(ytest14, pred14[name]) * 100
    res14[f'{name}_r2'] = r2_score(ytest14, pred14[name])
    print(f"  {name} - MSE: {res14[f'{name}_mse']}, MAPE: {res14[f'{name}_mape']},  R2 Score: {res14[f'{name}_r2']}\n")

pred21 = {}
res21 = {}
print("Prediction for day 21")
for name, model in models21.items():
    pred21[name] = model.predict(xtest_scaled)
    res21[f'{name}_mse'] = mean_squared_error(ytest21, pred21[name])
    res21[f'{name}_mape'] = mean_absolute_percentage_error(ytest21, pred21[name]) * 100
    res21[f'{name}_r2'] = r2_score(ytest21, pred21[name])
    print(f"  {name} - MSE: {res21[f'{name}_mse']}, MAPE: {res21[f'{name}_mape']},  R2 Score: {res21[f'{name}_r2']}\n")

pred30 = {}
res30 = {}
print("Prediction for day 30")
for name, model in models30.items():
    pred30[name] = model.predict(xtest_scaled)
    res30[f'{name}_mse'] = mean_squared_error(ytest30, pred30[name])
    res30[f'{name}_mape'] = mean_absolute_percentage_error(ytest30, pred30[name]) * 100
    res30[f'{name}_r2'] = r2_score(ytest30, pred30[name])
    print(f"  {name} - MSE: {res30[f'{name}_mse']}, MAPE: {res30[f'{name}_mape']},  R2 Score: {res30[f'{name}_r2']}\n")

pred = None
pred = mor_model.predict(xtest_scaled)
print("Prediction from multioutput model")
mse = mean_squared_error(ytest, pred)
mape = mean_absolute_percentage_error(ytest, pred) * 100
r2 = r2_score(ytest, pred)

print(f"  MultiOutputRegressor - MSE: {mse}, MAPE: {mape},  R2 Score: {r2}\n")

Prediction for next day:
  LR - MSE: 120.50442205681081, MAPE: 4.748074936215823,  R2 Score: 0.9937458825873979

  SVM - MSE: 1725.416884308452, MAPE: 20.115584959671477,  R2 Score: 0.9104517527575559

  KNN - MSE: 643.8457179999999, MAPE: 11.17298409831866,  R2 Score: 0.96658473898929

  RF - MSE: 86.43411509415184, MAPE: 3.2773133272215595,  R2 Score: 0.9955141139634001

Prediction for day 7
  LR - MSE: 627.1466952953762, MAPE: 10.468125431844125,  R2 Score: 0.967424248072302

  SVM - MSE: 2049.610691979834, MAPE: 21.95207277896195,  R2 Score: 0.893537492980257

  KNN - MSE: 1132.991681789474, MAPE: 14.236234490558516,  R2 Score: 0.9411492458798077

  RF - MSE: 655.2543527879529, MAPE: 9.942039945847833,  R2 Score: 0.9659642577947232

Prediction for day 14
  LR - MSE: 1143.0663698514375, MAPE: 14.172891647295222,  R2 Score: 0.9404932153020817

  SVM - MSE: 2378.1736968445953, MAPE: 23.283542875474726,  R2 Score: 0.8761948790683288

  KNN - MSE: 1720.9729952631583, MAPE: 16.4898688799

In [None]:
pred.shape

mse1 = mean_squared_error(ytest1, pred[:,0])
mape1 = mean_absolute_percentage_error(ytest1, pred[:,0]) * 100
r2_1 = r2_score(ytest1, pred[:,0])
print("Day 1:")
print(f"  MultiOutputRegressor - MSE: {mse1}, MAPE: {mape1},  R2 Score: {r2_1}")
print(f"  Single Randome Forest- MSE: {res1['RF_mse']}, MAPE: {res1['RF_mape']},  R2 Score: {res1['RF_r2']}\n")

mse7 = mean_squared_error(ytest7, pred[:,1])
mape7 = mean_absolute_percentage_error(ytest7, pred[:,1]) * 100
r2_7 = r2_score(ytest7, pred[:,1])
print("Day 7:")
print(f"  MultiOutputRegressor - MSE: {mse7}, MAPE: {mape7},  R2 Score: {r2_7}")
print(f"  Single Randome Forest- MSE: {res7['RF_mse']}, MAPE: {res7['RF_mape']},  R2 Score: {res7['RF_r2']}\n")

mse14 = mean_squared_error(ytest14, pred[:,2])
mape14 = mean_absolute_percentage_error(ytest14, pred[:,2]) * 100
r2_14 = r2_score(ytest14, pred[:,2])
print("Day 14:")
print(f"  MultiOutputRegressor - MSE: {mse14}, MAPE: {mape14},  R2 Score: {r2_14}")
print(f"  Single Randome Forest- MSE: {res14['RF_mse']}, MAPE: {res14['RF_mape']},  R2 Score: {res14['RF_r2']}\n")

mse21 = mean_squared_error(ytest21, pred[:,3])
mape21 = mean_absolute_percentage_error(ytest21, pred[:,3]) * 100
r2_21 = r2_score(ytest21, pred[:,3])
print("Day 21:")
print(f"  MultiOutputRegressor - MSE: {mse21}, MAPE: {mape21},  R2 Score: {r2_21}")
print(f"  Single Randome Forest- MSE: {res21['RF_mse']}, MAPE: {res21['RF_mape']},  R2 Score: {res21['RF_r2']}\n")

mse30 = mean_squared_error(ytest30, pred[:,4])
mape30 = mean_absolute_percentage_error(ytest30, pred[:,4]) * 100
r2_30 = r2_score(ytest30, pred[:,4])
print("Day 30:")
print(f"  MultiOutputRegressor - MSE: {mse30}, MAPE: {mape30},  R2 Score: {r2_30}")
print(f"  Single Randome Forest- MSE: {res30['RF_mse']}, MAPE: {res30['RF_mape']},  R2 Score: {res30['RF_r2']}\n")

Day 1:
  MultiOutputRegressor - MSE: 84.55730285064311, MAPE: 3.1058333417197894,  R2 Score: 0.9956115195517757
  Single Randome Forest- MSE: 86.43411509415184, MAPE: 3.2773133272215595,  R2 Score: 0.9955141139634001

Day 7:
  MultiOutputRegressor - MSE: 647.888556215907, MAPE: 9.793564721141886,  R2 Score: 0.9663468578525419
  Single Randome Forest- MSE: 655.2543527879529, MAPE: 9.942039945847833,  R2 Score: 0.9659642577947232

Day 14:
  MultiOutputRegressor - MSE: 1131.7894174506453, MAPE: 11.076803034538814,  R2 Score: 0.941080281107062
  Single Randome Forest- MSE: 1144.4280998869026, MAPE: 11.148939350719571,  R2 Score: 0.9404223251261704

Day 21:
  MultiOutputRegressor - MSE: 1993.5020360898254, MAPE: 15.485487581234247,  R2 Score: 0.896089867367017
  Single Randome Forest- MSE: 1989.2681805864345, MAPE: 15.410296291041156,  R2 Score: 0.8963105546193708

Day 30:
  MultiOutputRegressor - MSE: 3052.4768035777806, MAPE: 20.684201978835578,  R2 Score: 0.8410361647959591
  Single Rand

In [None]:
import plotly.graph_objs as go
import plotly.offline as pyo

# Create interactive plot for a single day
def plot_day_predictions(day, ytest, pred_dict, title):
    fig = go.Figure()

    # Add actual data
    fig.add_trace(go.Scatter(x=list(range(len(ytest))), y=ytest,
                             mode='lines', name='Actual', line=dict(color='blue')))

    # Add predictions from each model
    for model_name, pred in pred_dict.items():
        fig.add_trace(go.Scatter(x=list(range(len(pred))), y=pred,
                                 mode='lines', name=model_name))

    # Layout configuration
    fig.update_layout(title=title, xaxis_title='Sample Index', yaxis_title='Stock Value',
                      legend_title="Models", hovermode="x unified")

    pyo.iplot(fig)

# Prepare prediction dictionaries for each day
pred_dict1 = {name: pred1[name] for name in models1.keys()}
pred_dict7 = {name: pred7[name] for name in models7.keys()}
pred_dict14 = {name: pred14[name] for name in models14.keys()}
pred_dict21 = {name: pred21[name] for name in models21.keys()}
pred_dict30 = {name: pred30[name] for name in models30.keys()}

# Plot predictions for each day
plot_day_predictions(1, ytest1, pred_dict1, "Day 1 Predictions (All Models)")
plot_day_predictions(7, ytest7, pred_dict7, "Day 7 Predictions (All Models)")
plot_day_predictions(14, ytest14, pred_dict14, "Day 14 Predictions (All Models)")
plot_day_predictions(21, ytest21, pred_dict21, "Day 21 Predictions (All Models)")
plot_day_predictions(30, ytest30, pred_dict30, "Day 30 Predictions (All Models)")


In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, GRU, Dropout, BatchNormalization, SimpleRNN
from keras.optimizers import Adam
import numpy as np
from keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

# Reshape X_train and X_test for LSTM/GRU (3D tensor: samples, timesteps, features)
xtrain_nn = xtrain_scaled.reshape(xtrain_scaled.shape[0], 1, xtrain_scaled.shape[1])
xtest_nn = xtest_scaled.reshape(xtest_scaled.shape[0], 1, xtest_scaled.shape[1])

from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Prepare actual test data for all days
ytest_dict = {
    'Day 1': ytest.iloc[:, 0].values,
    'Day 7': ytest.iloc[:, 1].values,
    'Day 14': ytest.iloc[:, 2].values,
    'Day 21': ytest.iloc[:, 3].values,
    'Day 30': ytest.iloc[:, 4].values,
}


input_shape = (xtrain_nn.shape[1], xtrain_nn.shape[2])

def evaluate_deep_learning_model(model, xtest, ytest, name):
    # Predict values
    ypred = model.predict(xtest)

    # Split predictions into individual day predictions
    ypred_dict = {
        'Day 1': ypred[:, 0],
        'Day 7': ypred[:, 1],
        'Day 14': ypred[:, 2],
        'Day 21': ypred[:, 3],
        'Day 30': ypred[:, 4],
    }


    mse = mean_squared_error(ytest, ypred)
    mape = mean_absolute_percentage_error(ytest, ypred) * 100
    r2 = r2_score(ytest, ypred)

    print(f"{name} - MSE: {mse}, MAPE: {mape}, R2: {r2}")

    for naam in ypred_dict:
      mse = mean_squared_error(ytest_dict[naam], ypred_dict[naam])
      mape= mean_absolute_percentage_error(ytest_dict[naam], ypred_dict[naam]) * 100
      r2 = r2_score(ytest_dict[naam], ypred_dict[naam])
      print(f"  {naam} - MSE: {mse}, MAPE: {mape}, R2: {r2}")

    return ypred_dict

In [None]:
def build_lstm_model(input_shape):
    model = Sequential()

    # 1st LSTM layer with 128 units
    model.add(LSTM(128, activation='tanh', return_sequences=True, input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))  # Adding dropout for regularization

    # 2nd LSTM layer with 64 units
    model.add(LSTM(64, activation='tanh', return_sequences=False))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    # Dense layer for prediction (multi-output, e.g., predicting for 5 future days)
    model.add(Dense(32, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Dense(5))  # Final layer with 5 outputs for multi-step prediction

    optimizer = Adam(learning_rate=0.0003)
    model.compile(optimizer=optimizer, loss='mse')
    return model

# Build and train LSTM model
lstm_model = build_lstm_model(input_shape)
history_lstm = lstm_model.fit(xtrain_nn, ytrain, epochs=200, batch_size=72, validation_data=(xtest_nn, ytest), verbose=2)

pred_lstm = evaluate_deep_learning_model(lstm_model, xtest_nn, ytest, name="LSTM")

Epoch 1/200
53/53 - 6s - loss: 112061.0234 - val_loss: 85431.0469 - 6s/epoch - 104ms/step
Epoch 2/200
53/53 - 0s - loss: 111782.6328 - val_loss: 85298.3984 - 409ms/epoch - 8ms/step
Epoch 3/200
53/53 - 0s - loss: 111577.4609 - val_loss: 85185.3438 - 403ms/epoch - 8ms/step
Epoch 4/200
53/53 - 0s - loss: 111367.9531 - val_loss: 85055.7812 - 420ms/epoch - 8ms/step
Epoch 5/200
53/53 - 0s - loss: 111131.8984 - val_loss: 84897.4375 - 412ms/epoch - 8ms/step
Epoch 6/200
53/53 - 0s - loss: 110862.7109 - val_loss: 84726.1172 - 409ms/epoch - 8ms/step
Epoch 7/200
53/53 - 0s - loss: 110586.2812 - val_loss: 84492.0391 - 426ms/epoch - 8ms/step
Epoch 8/200
53/53 - 0s - loss: 110262.0000 - val_loss: 84275.5312 - 416ms/epoch - 8ms/step
Epoch 9/200
53/53 - 0s - loss: 109916.9688 - val_loss: 84094.9688 - 429ms/epoch - 8ms/step
Epoch 10/200
53/53 - 0s - loss: 109574.3906 - val_loss: 83888.7109 - 416ms/epoch - 8ms/step
Epoch 11/200
53/53 - 0s - loss: 109139.7578 - val_loss: 83266.0469 - 416ms/epoch - 8ms/ste

In [None]:
def build_gru_model(input_shape):
    model = Sequential()

    # 1st GRU layer with 128 units
    model.add(GRU(128, activation='tanh', return_sequences=True, input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    # 2nd GRU layer with 64 units
    model.add(GRU(128, activation='tanh', return_sequences=True))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(GRU(64, activation='tanh', return_sequences=False))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    # Dense layer for prediction (multi-output)
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.2))

    model.add(Dense(5))  # Final layer with 5 outputs

    optimizer = Adam(learning_rate=0.0001)
    model.compile(optimizer=optimizer, loss='mse')
    return model

gru_model = build_gru_model(input_shape)
history_gru = gru_model.fit(xtrain_nn, ytrain, epochs=200, batch_size=72, validation_data=(xtest_nn, ytest), verbose=2)
pred_gru = evaluate_deep_learning_model(gru_model, xtest_nn, ytest, name="GRU")

Epoch 1/200
53/53 - 8s - loss: 111671.2969 - val_loss: 85476.8750 - 8s/epoch - 145ms/step
Epoch 2/200
53/53 - 1s - loss: 110981.9922 - val_loss: 85370.2656 - 547ms/epoch - 10ms/step
Epoch 3/200
53/53 - 1s - loss: 110381.0703 - val_loss: 85224.1797 - 514ms/epoch - 10ms/step
Epoch 4/200
53/53 - 1s - loss: 109750.0938 - val_loss: 84946.3906 - 512ms/epoch - 10ms/step
Epoch 5/200
53/53 - 1s - loss: 109053.4609 - val_loss: 84485.9844 - 507ms/epoch - 10ms/step
Epoch 6/200
53/53 - 0s - loss: 108319.0078 - val_loss: 83772.4141 - 480ms/epoch - 9ms/step
Epoch 7/200
53/53 - 0s - loss: 107501.4219 - val_loss: 82836.3359 - 496ms/epoch - 9ms/step
Epoch 8/200
53/53 - 0s - loss: 106715.7266 - val_loss: 81651.9219 - 492ms/epoch - 9ms/step
Epoch 9/200
53/53 - 0s - loss: 105730.8906 - val_loss: 80111.5234 - 493ms/epoch - 9ms/step
Epoch 10/200
53/53 - 0s - loss: 104728.7969 - val_loss: 78812.7500 - 493ms/epoch - 9ms/step
Epoch 11/200
53/53 - 0s - loss: 103714.7812 - val_loss: 77499.7109 - 486ms/epoch - 9ms

In [None]:
def build_rnn_model(input_shape):
    model = Sequential()

    # 1st SimpleRNN layer
    model.add(SimpleRNN(128, activation='tanh', return_sequences=True, input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    # 2nd SimpleRNN layer
    model.add(SimpleRNN(128, activation='tanh', return_sequences=False))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    # Dense layer for prediction
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.2))

    model.add(Dense(5))  # Final layer with 5 outputs

    optimizer = Adam(learning_rate=0.0003)
    model.compile(optimizer=optimizer, loss='mse')
    return model

rnn_model = build_rnn_model(input_shape)
history_rnn = rnn_model.fit(xtrain_nn, ytrain, epochs=375, batch_size=72, validation_data=(xtest_nn, ytest), verbose=2)
pred_rnn = evaluate_deep_learning_model(rnn_model, xtest_nn, ytest, name="RNN")


Epoch 1/375
53/53 - 3s - loss: 111391.4531 - val_loss: 84518.1484 - 3s/epoch - 53ms/step
Epoch 2/375
53/53 - 0s - loss: 109040.2188 - val_loss: 82644.4453 - 266ms/epoch - 5ms/step
Epoch 3/375
53/53 - 0s - loss: 105738.0938 - val_loss: 79979.7891 - 234ms/epoch - 4ms/step
Epoch 4/375
53/53 - 0s - loss: 100998.3281 - val_loss: 76667.1250 - 246ms/epoch - 5ms/step
Epoch 5/375
53/53 - 0s - loss: 94745.5859 - val_loss: 73015.0156 - 256ms/epoch - 5ms/step
Epoch 6/375
53/53 - 0s - loss: 86586.1953 - val_loss: 63800.0625 - 238ms/epoch - 4ms/step
Epoch 7/375
53/53 - 0s - loss: 77200.1250 - val_loss: 60087.1562 - 242ms/epoch - 5ms/step
Epoch 8/375
53/53 - 0s - loss: 66829.8750 - val_loss: 48926.6250 - 253ms/epoch - 5ms/step
Epoch 9/375
53/53 - 0s - loss: 56139.0117 - val_loss: 31857.1152 - 248ms/epoch - 5ms/step
Epoch 10/375
53/53 - 0s - loss: 45511.4297 - val_loss: 17703.1895 - 248ms/epoch - 5ms/step
Epoch 11/375
53/53 - 0s - loss: 36212.1250 - val_loss: 11709.0703 - 248ms/epoch - 5ms/step
Epoch 

In [None]:
def build_ffnn_model(input_shape):
    model = Sequential()

    # 1st Dense layer
    model.add(Dense(256, activation='relu', input_shape=(input_shape,)))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    # 2nd Dense layer
    model.add(Dense(128, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    # 3rd Dense layer
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))

    # Output layer (multi-output)
    model.add(Dense(5))

    optimizer = Adam(learning_rate=0.0001)
    model.compile(optimizer=optimizer, loss='mse')
    return model

ffnn_model = build_ffnn_model(xtrain_scaled.shape[1])
history_ffnn = ffnn_model.fit(xtrain_scaled, ytrain, epochs=80, batch_size=32, validation_data=(xtest_scaled, ytest), verbose=2)
pred_ffnn = evaluate_deep_learning_model(ffnn_model, xtest_scaled, ytest, name="FFNN")


Epoch 1/80
119/119 - 2s - loss: 111284.2109 - val_loss: 85019.1094 - 2s/epoch - 15ms/step
Epoch 2/80
119/119 - 0s - loss: 109009.1250 - val_loss: 83730.9688 - 374ms/epoch - 3ms/step
Epoch 3/80
119/119 - 0s - loss: 105072.2734 - val_loss: 81082.7031 - 368ms/epoch - 3ms/step
Epoch 4/80
119/119 - 0s - loss: 99138.1797 - val_loss: 76966.0469 - 361ms/epoch - 3ms/step
Epoch 5/80
119/119 - 0s - loss: 91637.9062 - val_loss: 69116.0703 - 368ms/epoch - 3ms/step
Epoch 6/80
119/119 - 0s - loss: 83248.1250 - val_loss: 61415.9883 - 363ms/epoch - 3ms/step
Epoch 7/80
119/119 - 0s - loss: 73791.0234 - val_loss: 52638.6523 - 359ms/epoch - 3ms/step
Epoch 8/80
119/119 - 0s - loss: 64128.4922 - val_loss: 42592.3906 - 369ms/epoch - 3ms/step
Epoch 9/80
119/119 - 0s - loss: 54372.7188 - val_loss: 37111.3125 - 342ms/epoch - 3ms/step
Epoch 10/80
119/119 - 0s - loss: 44792.0742 - val_loss: 21926.9707 - 374ms/epoch - 3ms/step
Epoch 11/80
119/119 - 0s - loss: 35837.9570 - val_loss: 20639.3359 - 363ms/epoch - 3ms/s

In [None]:
from keras.layers import Conv1D, MaxPooling1D, Flatten

def build_cnn_lstm_model(input_shape):
    model = Sequential()

    # 1D Convolutional layer
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))

    # LSTM layer after CNN
    model.add(LSTM(128, return_sequences=False))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))

    # Dense layer for prediction
    model.add(Dense(32, activation='relu'))
    model.add(Dense(5))  # Multi-output for future days

    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

# Reshape the array now
ytrain_np = ytrain.values  # or use ytrain.to_numpy()
ytest_np = ytest.values

# Trim the target data to match the input data
ytrain_trimmed = ytrain_np[:train_trimmed_size]  # trimmed_size was calculated earlier
ytest_trimmed = ytest_np[:test_trimmed_size]

# Reshape ytrain and ytest
ytrain_cnn = ytrain_trimmed.reshape(train_trimmed_size // 10, 10, -1)  # Match timesteps and features
ytest_cnn = ytest_trimmed.reshape(test_trimmed_size // 10, 10, -1)

cnn_lstm_model = build_cnn_lstm_model(input_shape)
history_cnn_lstm = cnn_lstm_model.fit(xtrain_nn, ytrain_cnn, epochs=100, batch_size=72, validation_data=(xtest_nn, ytest), verbose=2)
pred_cnn_lstm = evaluate_deep_learning_model(cnn_lstm_model, xtest_nn, ytest_cnn, name="CNN-LSTM")

Epoch 1/100


InvalidArgumentError: Graph execution error:

Detected at node gradient_tape/mean_squared_error/BroadcastGradientArgs defined at (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code

  File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start

  File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start

  File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 685, in <lambda>

  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 738, in _run_callback

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 825, in inner

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 786, in run

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 361, in process_one

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 261, in dispatch_shell

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 539, in execute_request

  File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py", line 302, in do_execute

  File "/usr/local/lib/python3.10/dist-packages/ipykernel/zmqshell.py", line 539, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes

  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "<ipython-input-32-8329ca4042af>", line 35, in <cell line: 35>

  File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1807, in fit

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1401, in train_function

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1384, in step_function

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1373, in run_step

  File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1154, in train_step

  File "/usr/local/lib/python3.10/dist-packages/keras/src/optimizers/optimizer.py", line 543, in minimize

  File "/usr/local/lib/python3.10/dist-packages/keras/src/optimizers/optimizer.py", line 276, in compute_gradients

Incompatible shapes: [72,5] vs. [72,10,5]
	 [[{{node gradient_tape/mean_squared_error/BroadcastGradientArgs}}]] [Op:__inference_train_function_493809]

In [None]:
from keras.layers import MultiHeadAttention, LayerNormalization

def build_transformer_model(input_shape):
    model = Sequential()

    # Multi-head attention layer
    model.add(MultiHeadAttention(num_heads=8, key_dim=64, input_shape=input_shape))
    model.add(LayerNormalization())  # Layer normalization instead of batch normalization
    model.add(Dropout(0.3))

    # Dense layers for prediction
    model.add(Dense(64, activation='relu'))
    model.add(Dense(5))  # Multi-output for future days

    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

transformer_model = build_transformer_model(input_shape)
history_transformer = transformer_model.fit(xtrain_nn, ytrain, epochs=100, batch_size=72, validation_data=(xtest_nn, ytest), verbose=2)
pred_transformer = evaluate_deep_learning_model(transformer_model, xtest_nn, ytest, name="Transformer")

TypeError: MultiHeadAttention.call() missing 1 required positional argument: 'value'

In [None]:
from keras.layers import Attention

def build_cnn_attention_lstm(input_shape):
    model = Sequential()

    # CNN for feature extraction
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))

    # LSTM layer
    model.add(LSTM(128, return_sequences=True))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))

    # Attention layer
    model.add(Attention())
    model.add(LSTM(64, return_sequences=False))

    # Dense layer for output
    model.add(Dense(32, activation='relu'))
    model.add(Dense(5))  # Multi-output for future days

    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

cnn_attention_lstm_model = build_cnn_attention_lstm(input_shape)
history_cnn_attention_lstm = cnn_attention_lstm_model.fit(xtrain_nn, ytrain, epochs=100, batch_size=72, validation_data=(xtest_nn, ytest), verbose=2)

pred_cnn_attention_lstm = evaluate_deep_learning_model(cnn_attention_lstm_model, xtest_nn, ytest, name="CNN-Attention-LSTM")

ValueError: One of the dimensions in the output is <= 0 due to downsampling in conv1d_4. Consider increasing the input size. Received input shape [None, 1, 21] which would produce output shape with a zero or negative value in a dimension.

In [None]:
from keras.layers import Input, Dense
from keras.models import Model, Sequential

def build_autoencoder_lstm(input_shape):


    # Autoencoder architecture
    input_layer = Input(shape=(input_shape,))
    encoded = Dense(128, activation='relu')(input_layer)
    encoded = Dense(64, activation='relu')(encoded)
    encoded = Dense(32, activation='relu')(encoded)

    # Decoder to reconstruct input
    decoded = Dense(64, activation='relu')(encoded)
    decoded = Dense(128, activation='relu')(decoded)
    output_layer = Dense(input_shape, activation='sigmoid')(decoded)

    autoencoder = Model(input_layer, output_layer)
    autoencoder.compile(optimizer='adam', loss='mse')

    # LSTM for prediction based on encoded data
    model = Sequential()
    model.add(LSTM(128, input_shape=(32, 1)))  # Input to LSTM is the encoded data
    model.add(Dense(5))  # Multi-output prediction
    model.compile(optimizer='adam', loss='mse')

    return autoencoder, model

input_shape_encoder = xtrain_scaled.shape[1]
autoencoder, lstm_autoencoder = build_autoencoder_lstm(input_shape_encoder)

  super().__init__(**kwargs)


In [None]:

history_autoencoder = autoencoder.fit(xtrain_nn, xtrain_nn, epochs=100, batch_size=72, validation_data=(xtest_nn, xtest_nn), verbose=2)
pred_autoencoder = evaluate_deep_learning_model(lstm_autoencoder, xtest_nn, ytest, name="Autoencoder")


Epoch 1/100


ValueError: Input 0 of layer "functional_3" is incompatible with the layer: expected shape=(None, 21), found shape=(None, 1, 21)

In [None]:
history_lstm_autoencoder = lstm_autoencoder.fit(xtrain_nn, ytrain, epochs=100, batch_size=72, validation_data=(xtest_nn, ytest), verbose=2)
pred_autoencoder_lstm = evaluate_deep_learning_model(lstm_autoencoder, xtest_nn, ytest, name="Autoencoder-LSTM")

Epoch 1/100


ValueError: Exception encountered when calling LSTMCell.call().

[1mDimensions must be equal, but are 21 and 1 for '{{node sequential_1_1/lstm_1_1/lstm_cell_1/MatMul}} = MatMul[T=DT_FLOAT, grad_a=false, grad_b=false, transpose_a=false, transpose_b=false](sequential_1_1/lstm_1_1/strided_slice_2, sequential_1_1/lstm_1_1/lstm_cell_1/Cast/ReadVariableOp)' with input shapes: [?,21], [1,512].[0m

Arguments received by LSTMCell.call():
  • inputs=tf.Tensor(shape=(None, 21), dtype=float32)
  • states=('tf.Tensor(shape=(None, 128), dtype=float32)', 'tf.Tensor(shape=(None, 128), dtype=float32)')
  • training=True