In [None]:
import pandas as pd
import matplotlib.pyplot as plt

filepath = '/content/drive/MyDrive/stocks/GAIL.csv'
df = pd.read_csv(filepath)


def plot_actual_vs_predicted(ytest, ypred, title):
    plt.figure(figsize=(20, 6))
    plt.plot(ytest.values, label='Actual Values', color='blue', linestyle='--')
    plt.plot(ypred, label='Predicted Values', color='red', linestyle='--')
    plt.xlabel('Index')
    plt.ylabel('Value')
    plt.title(title)
    plt.legend()
    plt.show()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.multioutput import MultiOutputRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor

stock_data = df.drop(['Date', 'Symbol', 'Series', 'Trades'], axis=1)
stock_data['Close1'] = stock_data['Close'].shift(-1)
stock_data['Close7'] = stock_data['Close'].shift(-7)
stock_data['Close14'] = stock_data['Close'].shift(-14)
stock_data['Close21'] = stock_data['Close'].shift(-21)
stock_data['Close30'] = stock_data['Close'].shift(-30)
stock_data.dropna(inplace=True)

In [None]:
X = stock_data.drop(columns=['Deliverable Volume', '%Deliverble', 'Close1', 'Close7', 'Close14', 'Close21', 'Close30' ], axis=1)
Y = stock_data[['Close1', 'Close7', 'Close14', 'Close21', 'Close30']]
Y1 = stock_data['Close1']
Y7 = stock_data['Close7']
Y14 = stock_data['Close14']
Y21 = stock_data['Close21']
Y30 = stock_data['Close30']

xtrain, xtest, ytrain, ytest = train_test_split(X,Y, test_size=0.2, shuffle=False)
xtrain, xtest, ytrain1, ytest1 = train_test_split(X,Y1, test_size=0.2, shuffle=False)
xtrain, xtest, ytrain7, ytest7 = train_test_split(X,Y7, test_size=0.2, shuffle=False)
xtrain, xtest, ytrain14, ytest14 = train_test_split(X,Y14, test_size=0.2, shuffle=False)
xtrain, xtest, ytrain21, ytest21 = train_test_split(X,Y21, test_size=0.2, shuffle=False)
xtrain, xtest, ytrain30, ytest30 = train_test_split(X,Y30, test_size=0.2, shuffle=False)

scaler = MinMaxScaler()
xtrain_scaled = scaler.fit_transform(xtrain)
xtest_scaled = scaler.transform(xtest)

In [None]:
models1 = {
    'LR': LinearRegression(),
    'SVM': SVR(),
    'KNN': KNeighborsRegressor(),
    'RF': RandomForestRegressor(n_estimators=150)
}
models7 = {
    'LR': LinearRegression(),
    'SVM': SVR(),
    'KNN': KNeighborsRegressor(),
    'RF': RandomForestRegressor(n_estimators=150)
}
models14 = {
    'LR': LinearRegression(),
    'SVM': SVR(),
    'KNN': KNeighborsRegressor(),
    'RF': RandomForestRegressor(n_estimators=150)
}
models21 = {
    'LR': LinearRegression(),
    'SVM': SVR(),
    'KNN': KNeighborsRegressor(),
    'RF': RandomForestRegressor(n_estimators=150)
}
models30 = {
    'LR': LinearRegression(),
    'SVM': SVR(),
    'KNN': KNeighborsRegressor(),
    'RF': RandomForestRegressor(n_estimators=150)
}

mor_model = MultiOutputRegressor(RandomForestRegressor(n_estimators=150))

for name, model in models1.items():
    model.fit(xtrain_scaled, ytrain1)
for name, model in models7.items():
    model.fit(xtrain_scaled, ytrain7)
for name, model in models14.items():
    model.fit(xtrain_scaled, ytrain14)
for name, model in models21.items():
    model.fit(xtrain_scaled, ytrain21)
for name, model in models30.items():
    model.fit(xtrain_scaled, ytrain30)

mor_model.fit(xtrain_scaled, ytrain)

In [None]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.model_selection import cross_val_score
import numpy as np

pred1 = {}
res1 = {}
print("Prediction for next day:")
for name, model in models1.items():
    pred1[name] = model.predict(xtest_scaled)
    res1[f'{name}_mse'] = mean_squared_error(ytest1, pred1[name])
    # mae = mean_absolute_error(ytest, pred[name])
    res1[f'{name}_mape'] = mean_absolute_percentage_error(ytest1, pred1[name]) * 100
    res1[f'{name}_r2'] = r2_score(ytest1, pred1[name])

    # actual_returns = ytest.pct_change().reset_index(drop=True) *100
    # pred_returns = pd.DataFrame(pred[name], columns=['Next Close']).pct_change().squeeze().reset_index(drop=True) *100

    # actual_returns = actual_returns[1:]
    # pred_returns = pred_returns[1:]
    # print(actual_returns)
    # print(pred_returns)

    # returns_mse = mean_squared_error(actual_returns, pred_returns)
    # returns_mae = mean_absolute_error(actual_returns, pred_returns)
    # print(returns_mse,returns_mae)

    # print("Actual Return: ",  actual_returns[1:].sum())
    # print("Predicted Return : ", pred_returns[1:].sum())

    # actual_cum_return = ((1 + actual_returns[1:] / 100).cumprod() - 1)*100
    # pred_cum_return = ((1 + pred_returns[1:] / 100).cumprod() - 1)*100

    # print("Actual Cumulative:", actual_cum_return.iloc[-1])
    # print("Predicted Cumulative:", pred_cum_return.iloc[-1])


    # mape = mean_absolute_percentage_error(actual_returns, pred_returns) *100

    # scores = cross_val_score(model, X, Y, cv=5, scoring='neg_mean_absolute_percentage_error')
    # print(f'Cross-validated MAPE for {name}:', -scores.mean() * 100)

    print(f"  {name} - MSE: {res1[f'{name}_mse']}, MAPE: {res1[f'{name}_mape']},  R2 Score: {res1[f'{name}_r2']}\n")

pred7 = {}
res7 = {}
print("Prediction for day 7")
for name, model in models7.items():
    pred7[name] = model.predict(xtest_scaled)
    res7[f'{name}_mse'] = mean_squared_error(ytest7, pred7[name])
    res7[f'{name}_mape'] = mean_absolute_percentage_error(ytest7, pred7[name]) * 100
    res7[f'{name}_r2'] = r2_score(ytest7, pred7[name])
    print(f"  {name} - MSE: {res7[f'{name}_mse']}, MAPE: {res7[f'{name}_mape']},  R2 Score: {res7[f'{name}_r2']}\n")

pred14 = {}
res14 = {}
print("Prediction for day 14")
for name, model in models14.items():
    pred14[name] = model.predict(xtest_scaled)
    res14[f'{name}_mse'] = mean_squared_error(ytest14, pred14[name])
    res14[f'{name}_mape'] = mean_absolute_percentage_error(ytest14, pred14[name]) * 100
    res14[f'{name}_r2'] = r2_score(ytest14, pred14[name])
    print(f"  {name} - MSE: {res14[f'{name}_mse']}, MAPE: {res14[f'{name}_mape']},  R2 Score: {res14[f'{name}_r2']}\n")

pred21 = {}
res21 = {}
print("Prediction for day 21")
for name, model in models21.items():
    pred21[name] = model.predict(xtest_scaled)
    res21[f'{name}_mse'] = mean_squared_error(ytest21, pred21[name])
    res21[f'{name}_mape'] = mean_absolute_percentage_error(ytest21, pred21[name]) * 100
    res21[f'{name}_r2'] = r2_score(ytest21, pred21[name])
    print(f"  {name} - MSE: {res21[f'{name}_mse']}, MAPE: {res21[f'{name}_mape']},  R2 Score: {res21[f'{name}_r2']}\n")

pred30 = {}
res30 = {}
print("Prediction for day 30")
for name, model in models30.items():
    pred30[name] = model.predict(xtest_scaled)
    res30[f'{name}_mse'] = mean_squared_error(ytest30, pred30[name])
    res30[f'{name}_mape'] = mean_absolute_percentage_error(ytest30, pred30[name]) * 100
    res30[f'{name}_r2'] = r2_score(ytest30, pred30[name])
    print(f"  {name} - MSE: {res30[f'{name}_mse']}, MAPE: {res30[f'{name}_mape']},  R2 Score: {res30[f'{name}_r2']}\n")

pred = None
pred = mor_model.predict(xtest_scaled)
print("Prediction from multioutput model")
mse = mean_squared_error(ytest, pred)
mape = mean_absolute_percentage_error(ytest, pred) * 100
r2 = r2_score(ytest, pred)

print(f"  MultiOutputRegressor - MSE: {mse}, MAPE: {mape},  R2 Score: {r2}\n")

Prediction for next day:
  LR - MSE: 121.26600247454145, MAPE: 4.750465553126939,  R2 Score: 0.9937104586925826

  SVM - MSE: 2873.2628132687223, MAPE: 28.26290032547954,  R2 Score: 0.8509763265684127

  KNN - MSE: 1320.0375126834383, MAPE: 18.885452587735386,  R2 Score: 0.931535382597393

  RF - MSE: 72.11552874178903, MAPE: 2.173437235691817,  R2 Score: 0.9962596804737341

Prediction for day 7
  LR - MSE: 599.7167178815288, MAPE: 9.9484091068179,  R2 Score: 0.9688346704233028

  SVM - MSE: 3202.61794416507, MAPE: 30.128649214659937,  R2 Score: 0.8335703495298163

  KNN - MSE: 1550.367567610063, MAPE: 19.65562116477298,  R2 Score: 0.9194324340660872

  RF - MSE: 546.836955359422, MAPE: 7.149454584596243,  R2 Score: 0.9715826598953331

Prediction for day 14
  LR - MSE: 1054.2933991917553, MAPE: 13.115693136447721,  R2 Score: 0.9451483822834728

  SVM - MSE: 3402.523567135484, MAPE: 30.72147442064821,  R2 Score: 0.822977245120696

  KNN - MSE: 2330.053912368973, MAPE: 23.6068319471071, 

In [None]:
pred.shape

mse1 = mean_squared_error(ytest1, pred[:,0])
mape1 = mean_absolute_percentage_error(ytest1, pred[:,0]) * 100
r2_1 = r2_score(ytest1, pred[:,0])
print("Day 1:")
print(f"  MultiOutputRegressor - MSE: {mse1}, MAPE: {mape1},  R2 Score: {r2_1}")
print(f"  Single Randome Forest- MSE: {res1['RF_mse']}, MAPE: {res1['RF_mape']},  R2 Score: {res1['RF_r2']}\n")

mse7 = mean_squared_error(ytest7, pred[:,1])
mape7 = mean_absolute_percentage_error(ytest7, pred[:,1]) * 100
r2_7 = r2_score(ytest7, pred[:,1])
print("Day 7:")
print(f"  MultiOutputRegressor - MSE: {mse7}, MAPE: {mape7},  R2 Score: {r2_7}")
print(f"  Single Randome Forest- MSE: {res7['RF_mse']}, MAPE: {res7['RF_mape']},  R2 Score: {res7['RF_r2']}\n")

mse14 = mean_squared_error(ytest14, pred[:,2])
mape14 = mean_absolute_percentage_error(ytest14, pred[:,2]) * 100
r2_14 = r2_score(ytest14, pred[:,2])
print("Day 14:")
print(f"  MultiOutputRegressor - MSE: {mse14}, MAPE: {mape14},  R2 Score: {r2_14}")
print(f"  Single Randome Forest- MSE: {res14['RF_mse']}, MAPE: {res14['RF_mape']},  R2 Score: {res14['RF_r2']}\n")

mse21 = mean_squared_error(ytest21, pred[:,3])
mape21 = mean_absolute_percentage_error(ytest21, pred[:,3]) * 100
r2_21 = r2_score(ytest21, pred[:,3])
print("Day 21:")
print(f"  MultiOutputRegressor - MSE: {mse21}, MAPE: {mape21},  R2 Score: {r2_21}")
print(f"  Single Randome Forest- MSE: {res21['RF_mse']}, MAPE: {res21['RF_mape']},  R2 Score: {res21['RF_r2']}\n")

mse30 = mean_squared_error(ytest30, pred[:,4])
mape30 = mean_absolute_percentage_error(ytest30, pred[:,4]) * 100
r2_30 = r2_score(ytest30, pred[:,4])
print("Day 30:")
print(f"  MultiOutputRegressor - MSE: {mse30}, MAPE: {mape30},  R2 Score: {r2_30}")
print(f"  Single Randome Forest- MSE: {res30['RF_mse']}, MAPE: {res30['RF_mape']},  R2 Score: {res30['RF_r2']}\n")

Day 1:
  MultiOutputRegressor - MSE: 72.34320857593751, MAPE: 2.166750327356837,  R2 Score: 0.9962478717087668
  Single Randome Forest- MSE: 72.11552874178903, MAPE: 2.173437235691817,  R2 Score: 0.9962596804737341

Day 7:
  MultiOutputRegressor - MSE: 542.2422784947581, MAPE: 7.06625837546634,  R2 Score: 0.9718214303256314
  Single Randome Forest- MSE: 546.836955359422, MAPE: 7.149454584596243,  R2 Score: 0.9715826598953331

Day 14:
  MultiOutputRegressor - MSE: 1044.735483648731, MAPE: 10.202721587224167,  R2 Score: 0.9456456510038638
  Single Randome Forest- MSE: 1047.7650726240397, MAPE: 10.4065652113494,  R2 Score: 0.9454880308798649

Day 21:
  MultiOutputRegressor - MSE: 1588.7353090044273, MAPE: 13.58412453963321,  R2 Score: 0.9171250668846063
  Single Randome Forest- MSE: 1572.0238505849074, MAPE: 13.56227115135192,  R2 Score: 0.9179968049210991

Day 30:
  MultiOutputRegressor - MSE: 2512.778185382018, MAPE: 17.280093110059173,  R2 Score: 0.8689007308378696
  Single Randome For

In [None]:
import plotly.graph_objs as go
import plotly.offline as pyo

# Create interactive plot for a single day
def plot_day_predictions(day, ytest, pred_dict, title):
    fig = go.Figure()

    # Add actual data
    fig.add_trace(go.Scatter(x=list(range(len(ytest))), y=ytest,
                             mode='lines', name='Actual', line=dict(color='blue')))

    # Add predictions from each model
    for model_name, pred in pred_dict.items():
        fig.add_trace(go.Scatter(x=list(range(len(pred))), y=pred,
                                 mode='lines', name=model_name))

    # Layout configuration
    fig.update_layout(title=title, xaxis_title='Sample Index', yaxis_title='Stock Value',
                      legend_title="Models", hovermode="x unified")

    pyo.iplot(fig)

# Prepare prediction dictionaries for each day
pred_dict1 = {name: pred1[name] for name in models1.keys()}
pred_dict7 = {name: pred7[name] for name in models7.keys()}
pred_dict14 = {name: pred14[name] for name in models14.keys()}
pred_dict21 = {name: pred21[name] for name in models21.keys()}
pred_dict30 = {name: pred30[name] for name in models30.keys()}

# Plot predictions for each day
plot_day_predictions(1, ytest1, pred_dict1, "Day 1 Predictions (All Models)")
plot_day_predictions(7, ytest7, pred_dict7, "Day 7 Predictions (All Models)")
plot_day_predictions(14, ytest14, pred_dict14, "Day 14 Predictions (All Models)")
plot_day_predictions(21, ytest21, pred_dict21, "Day 21 Predictions (All Models)")
plot_day_predictions(30, ytest30, pred_dict30, "Day 30 Predictions (All Models)")


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, GRU
import numpy as np

# Reshape X_train and X_test for LSTM/GRU (3D tensor: samples, timesteps, features)
xtrain_nn = xtrain_scaled.reshape(xtrain_scaled.shape[0], 1, xtrain_scaled.shape[1])
xtest_nn = xtest_scaled.reshape(xtest_scaled.shape[0], 1, xtest_scaled.shape[1])


def build_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=input_shape))
    # 5 outputs for the 1st, 7th, 14th, 21st and 30th day predictions
    model.add(Dense(5))
    model.compile(optimizer='adam', loss='mse')
    return model

def build_gru_model(input_shape):
    model = Sequential()
    model.add(GRU(50, activation='relu', input_shape=input_shape))
    model.add(Dense(5))
    model.compile(optimizer='adam', loss='mse')
    return model

def build_ffnn_model(input_shape):
    model = Sequential()
    model.add(Dense(128, activation='relu', input_shape=(input_shape,)))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(5))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [None]:
# Build and train LSTM model
input_shape = (xtrain_nn.shape[1], xtrain_nn.shape[2])
lstm_model = build_lstm_model(input_shape)
history_lstm = lstm_model.fit(xtrain_nn, ytrain, epochs=140, batch_size=72, validation_data=(xtest_nn, ytest), verbose=2)

Epoch 1/140



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



53/53 - 3s - 60ms/step - loss: 111669.9609 - val_loss: 85608.6797
Epoch 2/140
53/53 - 0s - 7ms/step - loss: 111074.6641 - val_loss: 84678.3047
Epoch 3/140
53/53 - 0s - 6ms/step - loss: 109174.7656 - val_loss: 82263.4062
Epoch 4/140
53/53 - 0s - 6ms/step - loss: 105188.9375 - val_loss: 78026.6172
Epoch 5/140
53/53 - 0s - 6ms/step - loss: 99155.0703 - val_loss: 72259.6641
Epoch 6/140
53/53 - 1s - 11ms/step - loss: 91575.9453 - val_loss: 65525.6445
Epoch 7/140
53/53 - 0s - 6ms/step - loss: 83004.3203 - val_loss: 58298.6250
Epoch 8/140
53/53 - 0s - 5ms/step - loss: 73940.3672 - val_loss: 50864.4961
Epoch 9/140
53/53 - 0s - 6ms/step - loss: 64740.8516 - val_loss: 43505.8047
Epoch 10/140
53/53 - 0s - 6ms/step - loss: 55690.1523 - val_loss: 36454.8516
Epoch 11/140
53/53 - 1s - 12ms/step - loss: 47042.0469 - val_loss: 29885.4336
Epoch 12/140
53/53 - 1s - 10ms/step - loss: 38976.2891 - val_loss: 23979.2617
Epoch 13/140
53/53 - 0s - 5ms/step - loss: 31655.1133 - val_loss: 18807.7422
Epoch 14/140

In [None]:
# Build and train GRU model
gru_model = build_gru_model(input_shape)
history_gru = gru_model.fit(xtrain_nn, ytrain, epochs=100, batch_size=72, validation_data=(xtest_nn, ytest), verbose=2)


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



Epoch 1/100
53/53 - 4s - 85ms/step - loss: 111602.8359 - val_loss: 85439.6953
Epoch 2/100
53/53 - 0s - 4ms/step - loss: 110610.8672 - val_loss: 84008.5703
Epoch 3/100
53/53 - 0s - 5ms/step - loss: 107964.7656 - val_loss: 80910.2344
Epoch 4/100
53/53 - 0s - 6ms/step - loss: 103250.7344 - val_loss: 76188.0938
Epoch 5/100
53/53 - 0s - 3ms/step - loss: 96713.3750 - val_loss: 70180.5000
Epoch 6/100
53/53 - 0s - 6ms/step - loss: 88850.9609 - val_loss: 63306.0547
Epoch 7/100
53/53 - 0s - 5ms/step - loss: 80110.3672 - val_loss: 55986.3789
Epoch 8/100
53/53 - 0s - 6ms/step - loss: 70909.8359 - val_loss: 48466.4727
Epoch 9/100
53/53 - 0s - 3ms/step - loss: 61567.1914 - val_loss: 41098.2266
Epoch 10/100
53/53 - 0s - 4ms/step - loss: 52402.1133 - val_loss: 34097.5234
Epoch 11/100
53/53 - 0s - 3ms/step - loss: 43688.3750 - val_loss: 27633.6680
Epoch 12/100
53/53 - 0s - 3ms/step - loss: 35634.8086 - val_loss: 21887.3945
Epoch 13/100
53/53 - 0s - 3ms/step - loss: 28414.5371 - val_loss: 16952.9277
Epo

In [None]:
# Build and train Feedforward Neural Network model
ffnn_model = build_ffnn_model(xtrain_scaled.shape[1])
history_ffnn = ffnn_model.fit(xtrain_scaled, ytrain, epochs=15, batch_size=32, validation_data=(xtest_scaled, ytest), verbose=2)

Epoch 1/15



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



120/120 - 2s - 14ms/step - loss: 102793.4219 - val_loss: 53536.5156
Epoch 2/15
120/120 - 0s - 3ms/step - loss: 21654.0488 - val_loss: 4330.4326
Epoch 3/15
120/120 - 1s - 5ms/step - loss: 856.2687 - val_loss: 3588.5073
Epoch 4/15
120/120 - 1s - 5ms/step - loss: 771.6940 - val_loss: 3105.4165
Epoch 5/15
120/120 - 0s - 3ms/step - loss: 757.9871 - val_loss: 2656.9243
Epoch 6/15
120/120 - 1s - 5ms/step - loss: 745.5925 - val_loss: 2213.1953
Epoch 7/15
120/120 - 1s - 5ms/step - loss: 733.1081 - val_loss: 1772.9346
Epoch 8/15
120/120 - 0s - 2ms/step - loss: 722.5684 - val_loss: 1582.7125
Epoch 9/15
120/120 - 0s - 2ms/step - loss: 714.9907 - val_loss: 1340.6699
Epoch 10/15
120/120 - 0s - 3ms/step - loss: 708.0319 - val_loss: 1202.4883
Epoch 11/15
120/120 - 0s - 2ms/step - loss: 706.9667 - val_loss: 1115.2988
Epoch 12/15
120/120 - 0s - 2ms/step - loss: 700.1397 - val_loss: 1032.2463
Epoch 13/15
120/120 - 0s - 2ms/step - loss: 698.9444 - val_loss: 983.1191
Epoch 14/15
120/120 - 0s - 3ms/step - l

In [None]:
# Paths for saving deep learning models
lstm_model_path = '/content/drive/MyDrive/models/phase1.0.lstm.keras'
gru_model_path =  '/content/drive/MyDrive/models/phase1.0.gru.keras'
ffnn_model_path = '/content/drive/MyDrive/models/phase1.0.ffnn.keras'

# Save models
lstm_model.save(lstm_model_path)
gru_model.save(gru_model_path)
ffnn_model.save(ffnn_model_path)

  saving_api.save_model(


In [None]:
from keras.models import load_model

# Load models
lstm_model = load_model(lstm_model_path)
gru_model = load_model(gru_model_path)
ffnn_model = load_model(ffnn_model_path)

In [None]:
# Prepare actual test data for all days
ytest_dict = {
    'Day 1': ytest.iloc[:, 0].values,
    'Day 7': ytest.iloc[:, 1].values,
    'Day 14': ytest.iloc[:, 2].values,
    'Day 21': ytest.iloc[:, 3].values,
    'Day 30': ytest.iloc[:, 4].values,
}

def evaluate_deep_learning_model(model, xtest, ytest, name):
    # Predict values
    ypred = model.predict(xtest)

    # Split predictions into individual day predictions
    ypred_dict = {
        'Day 1': ypred[:, 0],
        'Day 7': ypred[:, 1],
        'Day 14': ypred[:, 2],
        'Day 21': ypred[:, 3],
        'Day 30': ypred[:, 4],
    }


    mse = mean_squared_error(ytest, ypred)
    mape = mean_absolute_percentage_error(ytest, ypred) * 100
    r2 = r2_score(ytest, ypred)

    print(f"{name} - MSE: {mse}, MAPE: {mape}, R2: {r2}")

    for naam in ypred_dict:
      mse = mean_squared_error(ytest_dict[naam], ypred_dict[naam])
      mape= mean_absolute_percentage_error(ytest_dict[naam], ypred_dict[naam]) * 100
      r2 = r2_score(ytest_dict[naam], ypred_dict[naam])
      print(f"  {naam} - MSE: {mse}, MAPE: {mape}, R2: {r2}")

    return ypred_dict

# Store predictions for each model
pred_lstm = evaluate_deep_learning_model(lstm_model, xtest_nn, ytest, name="LSTM")
pred_gru = evaluate_deep_learning_model(gru_model, xtest_nn, ytest, name="GRU")
pred_ffnn = evaluate_deep_learning_model(ffnn_model, xtest_scaled, ytest, name="FFNN")


[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
LSTM - MSE: 964.5618737143525, MAPE: 8.706481566862855, R2: 0.9497296629085717
  Day 1 - MSE: 105.78637241773971, MAPE: 4.041615855507613, R2: 0.9945133199288655
  Day 7 - MSE: 467.27648905838026, MAPE: 6.055296203086413, R2: 0.9757171588672918
  Day 14 - MSE: 875.9021807413652, MAPE: 8.410507880628343, R2: 0.9544295244455385
  Day 21 - MSE: 1343.1712789114893, MAPE: 10.903736639946118, R2: 0.9299346912783963
  Day 30 - MSE: 2030.6730474427995, MAPE: 14.121251255145795, R2: 0.8940536200227662
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step
GRU - MSE: 1033.6414877709128, MAPE: 9.859074280150057, R2: 0.9461354868038919
  Day 1 - MSE: 212.63710236315364, MAPE: 7.289609892141511, R2: 0.9889714362516124
  Day 7 - MSE: 518.1780616249883, MAPE: 7.521688297423143, R2: 0.9730719695008618
  Day 14 - MSE: 916.4069459886468, MAPE: 9.272730671546125, R2: 0.952322187056587
  Day 21 - MSE: 1379.26249674

In [None]:
print("Shape of ytest:", type(ytest))


Shape of ytest: <class 'pandas.core.frame.DataFrame'>


In [None]:
# Prediction dictionaries for each day
pred_dict_lstm = {'LSTM': pred_lstm}
pred_dict_gru = {'GRU': pred_gru}
pred_dict_ffnn = {'FFNN': pred_ffnn}

# Combine all predictions into one dictionary for plotting
pred_dict_all_models = {
    'LSTM': pred_lstm,
    'GRU': pred_gru,
    'FFNN': pred_ffnn
}

import plotly.graph_objs as go
import plotly.offline as pyo

# Create interactive plot for a single day (all models)
def plot_day_predictions(day, ytest, pred_dict, title):
    fig = go.Figure()

    # Add actual data
    fig.add_trace(go.Scatter(x=list(range(len(ytest))), y=ytest,
                             mode='lines', name='Actual', line=dict(color='blue')))

    # Add predictions from each model
    for model_name, pred in pred_dict.items():
        fig.add_trace(go.Scatter(x=list(range(len(pred[day]))), y=pred[day],
                                 mode='lines', name=model_name))

    # Layout configuration
    fig.update_layout(title=title, xaxis_title='Sample Index', yaxis_title='Stock Value',
                      legend_title="Models", hovermode="x unified")

    pyo.iplot(fig)

# Plot predictions for each day
plot_day_predictions('Day 1', ytest_dict['Day 1'], pred_dict_all_models, "Day 1 Predictions (All Models)")
plot_day_predictions('Day 7', ytest_dict['Day 7'], pred_dict_all_models, "Day 7 Predictions (All Models)")
plot_day_predictions('Day 14', ytest_dict['Day 14'], pred_dict_all_models, "Day 14 Predictions (All Models)")
plot_day_predictions('Day 21', ytest_dict['Day 21'], pred_dict_all_models, "Day 21 Predictions (All Models)")
plot_day_predictions('Day 30', ytest_dict['Day 30'], pred_dict_all_models, "Day 30 Predictions (All Models)")


### **Results**

```
LR   - MSE: 1.41969     R2 Score: 0.99987

SVM  - MSE: 6503.25374  R2 Score: 0.44050

KNN  - MSE: 1564.34046  R2 Score: 0.86541

RF   - MSE: 2.71287     R2 Score: 0.99976

LSTM - MSE: 461.63977,  R2 Score: 0.96028

GRU  - MSE: 150.09972,  R2 Score: 0.98708

FFNN - MSE: 226.46510,  R2 Score: 0.98051
```





---

The strong performance of **Linear Regression (LR)** and **Random Forest (RF)** can be explained by the nature of the dataset and the way these models function:

### **1. Why Linear Regression (LR) is Working Best:**
   - **Simple Relationships in Data**: If the stock data has a strong linear trend or relatively simple relationships between the features and target variable (e.g., price), linear regression tends to perform well. Stocks often exhibit trends that can be well-approximated by linear relationships in the short term.
   - **High R² Score**: A very high R² score (near 1) indicates that the model is explaining almost all of the variance in the data, suggesting a strong linear relationship between features and target.

### **2. Why Random Forest (RF) is Performing Well:**
   - **Non-linear Relationships**: Random Forest is a powerful ensemble model that can handle both linear and non-linear relationships. It works by building many decision trees and averaging their predictions, which can capture complex patterns in the stock data that LR might miss.
   - **Robustness**: Random Forests are robust to overfitting and work well with noisy data, which is common in stock price datasets. They also handle large numbers of features effectively, making them versatile for different data distributions.
   - **Feature Importance**: RF can capture interactions between features that other models, like SVM or KNN, may miss, leading to better performance.

### **Why Other Models Like SVM and KNN Perform Poorly:**
   - **SVM**: Support Vector Machines might struggle with larger datasets like stock data or if the relationship between features and the target is not linearly separable in higher dimensions.
   - **KNN**: K-Nearest Neighbors can perform poorly if the data isn't well-clustered or if there are many features that don't contribute meaningfully to the prediction. KNN is also sensitive to the distance metric, which might not be optimal for the given data.

### **Why Deep Learning Models (LSTM, GRU, FFNN) are Not the Best in This Case:**
   - **Data Complexity and Volume**: Deep learning models like LSTM, GRU, and Feedforward Neural Networks (FFNN) excel when there are complex temporal dependencies or large volumes of data. However, if the stock data does not have long-term temporal patterns, simpler models like LR and RF might outperform them.
   - **Overfitting**: Deep learning models can overfit the training data if not enough regularization is applied, or if the data volume is not large enough for the model to generalize well.

### **Why Compare with Feature Engineering Later:**
   - **Impact of Feature Engineering**: By introducing engineered features that capture trends or important patterns (e.g., moving averages, relative strength index), the deep learning models might improve significantly. Feature engineering can help both traditional and deep learning models understand the data better and potentially outperform simple models like LR.

### **Key Insight**:
- **Linear Regression** works best because it’s sufficient for this dataset (probably due to strong linear trends).
- **Random Forest** works well because of its ability to capture both linear and non-linear patterns without much tuning.
- **Deep Learning models** (LSTM, GRU) might require more data or more complex engineered features to outperform simpler models.

Moving forward, introducing **feature engineering** can help test how much it improves the performance of deep learning and other models.