In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import normaltest
from statsmodels.tsa.stattools import acf,pacf
from sklearn.metrics import mean_squared_error, mean_absolute_error
import tensorflow as tf

# Importing Data Sets

In [2]:
df1 = pd.read_csv("./data/SPX_Real.csv", index_col=False)
df1.columns = ['Timestamp', 'Close']
df1.set_index("Timestamp", inplace=True)
df1.index = pd.to_datetime(df1.index, format='%d-%m-%Y')
df2 = pd.read_csv("./data/AAPL_Real.csv")
df2.columns = ['Timestamp', 'Close']
df2.set_index("Timestamp", inplace=True)
df2.index = pd.to_datetime(df2.index, format='%d-%m-%Y')
df3 = pd.read_csv("./data/TWSE_Real.csv")
df3.columns = ['Timestamp', 'Close']
df3.set_index("Timestamp", inplace=True)
df3.index = pd.to_datetime(df3.index, format='%m/%d/%Y')

In [3]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
def create_dataset(series, window_size, pred_length):
    X, y = [], []
    for i in range(len(series) - window_size - pred_length + 1):
        X.append(series[i : i + window_size])
        y.append(series[i + window_size : i + window_size + pred_length])
    return np.array(X), np.array(y)



In [None]:
def get_data_for_period(df, window_size = 20, pred_length = 1):

    
    val_start = int(len(df) * 0.8)
    test_start = val_start + int(len(df) * 0.1)
    train_data = df.iloc[:val_start]
    val_data   = df.iloc[val_start:test_start]
    test_data  = df.iloc[test_start:]

    scaler = MinMaxScaler(feature_range=(0, 1))
    train_values = train_data['Close'].values.reshape(-1, 1)
    scaler.fit(train_values)
    
    train_scaled = scaler.transform(train_data['Close'].values.reshape(-1, 1)).flatten()
    val_scaled   = scaler.transform(val_data['Close'].values.reshape(-1, 1)).flatten()
    test_scaled  = scaler.transform(test_data['Close'].values.reshape(-1, 1)).flatten()
    
    X_train, y_train = create_dataset(train_scaled, window_size, pred_length)
    X_val, y_val     = create_dataset(val_scaled, window_size, pred_length)
    X_test, y_test   = create_dataset(test_scaled, window_size, pred_length)
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_val   = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))
    X_test  = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
    
    return (X_train, y_train), (X_val, y_val), (X_test, y_test), scaler

In [6]:
dfs = {'SPX': df1, 'AAPL': df2, 'TWSE': df3}

In [7]:
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense, Bidirectional
import keras_tuner as kt

def build_model(hp):
    model = Sequential()
    
    num_layers = hp.Int('num_layers', min_value=1, max_value=4, step=1)
    
    
    for i in range(num_layers):
        
        if i == 0:
            model.add(Bidirectional(LSTM(units=hp.Int(f'bilstm_units_{i}', min_value=30, max_value=100, step=10),
                                         activation='relu',
                                         return_sequences=(num_layers > 1)),  
                                         input_shape=(X_train.shape[1], 1)))
        else:
            
            return_seq = (i < num_layers - 1)
            model.add(Bidirectional(LSTM(units=hp.Int(f'bilstm_units_{i}', min_value=30, max_value=100, step=10),
                                         activation='relu',
                                         return_sequences=return_seq)))
        
        dropout_rate = hp.Float(f'dropout_rate_{i}', min_value=0.1, max_value=0.5, step=0.1)
        model.add(Dropout(dropout_rate))
    
    
    model.add(Dense(units=pred_length))
    
    
    lr = hp.Float('lr', min_value=1e-4, max_value=1e-2, sampling='LOG')
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr), loss='mse', metrics=['mape'])
    return model

def search(X_train, y_train, X_val, y_val, name, pred_length):
    
    tuner = kt.RandomSearch(
        build_model,
        objective='val_loss',
        max_trials=15,
        executions_per_trial=1,
        directory='final_Bilstm_tuner_dir',
        project_name=f'stock_forecasting_{name}_pred_length_{pred_length}'
    )

    
    tuner.search(X_train, y_train,
                epochs=50,
                batch_size=32,
                validation_data=(X_val, y_val),
                callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)])
                
    
    return tuner

In [9]:
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score, mean_absolute_error

In [14]:
# Window Size was 115
window_size = 55
res = []
for name in dfs:
    for pred_length in [1, 5, 10]:
        (X_train, y_train), (X_val, y_val), (X_test, y_test), scaler = get_data_for_period(dfs[name], window_size, pred_length)
        tuner = search(X_train, y_train, X_val, y_val, name, pred_length)

        real_y = scaler.inverse_transform(y_test.reshape(-1, 1))
        best_model = tuner.get_best_models(num_models=1)[0]
        pred_y = best_model.predict(X_test).flatten().reshape(-1, 1)
        adjusted_pred = scaler.inverse_transform(pred_y)

        test_dates = dfs[name].index[-len(y_test):]

        mape = mean_absolute_percentage_error(real_y, adjusted_pred)
        mse = mean_squared_error(real_y, adjusted_pred)
        r2 = r2_score(real_y, adjusted_pred)
        mae = mean_absolute_error(real_y, adjusted_pred)
        print(f"{name} MAPE: {mape:.7f}%")
        print(f"{name} MSE: {mse:.5f}")
        print(f"{name} R2: {r2:.5f}")
        print(f"{name} MAE: {mae:.5f}")

        res.append({
            "DataFrame": name,
            "Prediction_Size": pred_length,
            "MAE": mae,
            "MSE": mse,
            "MAPE": mape,
            "r2": r2
        })
    
    # Plotting
#     plt.figure(figsize=(12,8))
#     plt.plot(test_dates, real_y, label='Value')
#     plt.plot(test_dates, adjusted_pred, label='Prediction')
#     plt.title(f"Return Prediction for {name}")
#     plt.xlabel('Date')
#     plt.ylabel('Return ($)')
#     handles, labels = plt.gca().get_legend_handles_labels()
#     by_label = dict(zip(labels, handles))
#     plt.legend(by_label.values(), by_label.keys())
# #    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0)
#     plt.show()
#     plt.close()

results_df = pd.DataFrame(res)
results_df.to_csv("BiLSTM_errors.csv", index=False)
    


Trial 19 Complete [00h 04m 29s]
val_loss: 0.005948642734438181

Best val_loss So Far: 0.000554748170543462
Total elapsed time: 13h 06m 26s


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step
SPX MAPE: 0.0559972%
SPX MSE: 138623.66658
SPX R2: 0.67835
SPX MAE: 279.52907
Reloading Tuner from final_Bilstm_tuner_dir/stock_forecasting_SPX_pred_length_5/tuner0.json


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
SPX MAPE: 0.0607763%
SPX MSE: 116841.65295
SPX R2: 0.72680
SPX MAE: 288.49762
Reloading Tuner from final_Bilstm_tuner_dir/stock_forecasting_SPX_pred_length_10/tuner0.json


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
SPX MAPE: 58.0411026%
SPX MSE: 1275903986451.40479
SPX R2: -3006606.14683
SPX MAE: 340157.53777
Reloading Tuner from final_Bilstm_tuner_dir/stock_forecasting_AAPL_pred_length_1/tuner0.json


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step
AAPL MAPE: 0.0293740%
AAPL MSE: 85.71351
AAPL R2: 0.90098
AAPL MAE: 5.70277
Reloading Tuner from final_Bilstm_tuner_dir/stock_forecasting_AAPL_pred_length_5/tuner0.json


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step
AAPL MAPE: 0.5161816%
AAPL MSE: 64585.66817
AAPL R2: -73.94128
AAPL MAE: 114.38087
Reloading Tuner from final_Bilstm_tuner_dir/stock_forecasting_AAPL_pred_length_10/tuner0.json


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
AAPL MAPE: 0.0506202%
AAPL MSE: 129.45896
AAPL R2: 0.84866
AAPL MAE: 8.90766
Reloading Tuner from final_Bilstm_tuner_dir/stock_forecasting_TWSE_pred_length_1/tuner0.json


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step
TWSE MAPE: 0.0099916%
TWSE MSE: 56915.37834
TWSE R2: 0.99291
TWSE MAE: 177.93915
Reloading Tuner from final_Bilstm_tuner_dir/stock_forecasting_TWSE_pred_length_5/tuner0.json


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step
TWSE MAPE: 0.0205818%
TWSE MSE: 284561.86748
TWSE R2: 0.96438
TWSE MAE: 380.66218
Reloading Tuner from final_Bilstm_tuner_dir/stock_forecasting_TWSE_pred_length_10/tuner0.json


  super().__init__(**kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step
TWSE MAPE: 0.0495397%
TWSE MSE: 1089934.20356
TWSE R2: 0.86301
TWSE MAE: 905.28916
