In [1]:
# Importing packages
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler

In [2]:
# Loading STI's history into a dataframe
sti= yf.Ticker("^sti")
sti_df = sti.history(period='max')
sti_df.drop(["Dividends","Stock Splits"], axis=1, inplace=True)
sti_df

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1987-12-28,824.400024,824.400024,824.400024,824.400024,0
1987-12-29,810.900024,810.900024,810.900024,810.900024,0
1987-12-30,823.200012,823.200012,823.200012,823.200012,0
1988-01-04,833.599976,833.599976,833.599976,833.599976,0
1988-01-05,879.299988,879.299988,879.299988,879.299988,0
...,...,...,...,...,...
2022-12-14,3280.439941,3293.469971,3274.959961,3278.570068,271342400
2022-12-15,3280.949951,3287.530029,3256.790039,3273.750000,252158300
2022-12-16,3253.669922,3272.120117,3237.790039,3240.810059,387112800
2022-12-19,3239.500000,3273.350098,3239.500000,3256.610107,172547000


In [3]:
# Pick up Close column as array for data before 2021
training_set = sti_df[sti_df.index.year<2021][["Close"]].values # Array need to be in 2D
testing_set = sti_df[sti_df.index.year>=2021][["Close"]].values

In [4]:
# Perform scaling to trasform data between zero and one
sc = MinMaxScaler()
training_set_scaled = sc.fit_transform(training_set)
testing_set_scaled = sc.transform(testing_set)

In [5]:
# Restructure input to have a certain a amount of steps to one output
X_train_25 = []
y_train_25 = []
X_test_25 = []
y_test_25 = []

# 100 steps for one input, about 1 month for 1 value for train set
for i in range(25, len(training_set_scaled)):
    X_train_25.append(training_set_scaled[i-25:i, 0]) # Starting from 0 row to 100th row
    y_train_25.append(training_set_scaled[i,0]) # From 100th row onwards
X_train_25, y_train_25 = np.array(X_train_25), np.array(y_train_25)

# 100 steps for one input, about 1 month for 1 value for test set
testing_set_scaled_25 = np.vstack((training_set_scaled[-25:], testing_set_scaled))
for i in range(25, len(testing_set_scaled_25)):
    X_test_25.append(testing_set_scaled_25[i-25:i, 0])
    y_test_25.append(testing_set_scaled_25[i,0])
X_test_25, y_test_25 = np.array(X_test_25), np.array(y_test_25)

In [6]:
# Restructure input to have a certain a amount of steps to one output
X_train_50 = []
y_train_50 = []
X_test_50 = []
y_test_50 = []

# 100 steps for one input, about 2 month for 1 value for train set
for i in range(50, len(training_set_scaled)):
    X_train_50.append(training_set_scaled[i-50:i, 0]) # Starting from 0 row to 100th row
    y_train_50.append(training_set_scaled[i,0]) # From 100th row onwards
X_train_50, y_train_50 = np.array(X_train_50), np.array(y_train_50)

# 50 steps for one input, about 2 month for 1 value for test set
testing_set_scaled_50 = np.vstack((training_set_scaled[-50:], testing_set_scaled))
for i in range(50, len(testing_set_scaled_50)):
    X_test_50.append(testing_set_scaled_50[i-50:i, 0])
    y_test_50.append(testing_set_scaled_50[i,0])
X_test_50, y_test_50 = np.array(X_test_50), np.array(y_test_50)

In [7]:
# Restructure input to have a certain a amount of steps to one output
X_train_100 = []
y_train_100 = []
X_test_100 = []
y_test_100 = []

# 100 steps for one input, about 5 month for 1 value for train set
for i in range(100, len(training_set_scaled)):
    X_train_100.append(training_set_scaled[i-100:i, 0]) # Starting from 0 row to 100th row
    y_train_100.append(training_set_scaled[i,0]) # From 100th row onwards
X_train_100, y_train_100 = np.array(X_train_100), np.array(y_train_100)

# 100 steps for one input, about 5 month for 1 value for test set
testing_set_scaled_100 = np.vstack((training_set_scaled[-100:], testing_set_scaled))
for i in range(100, len(testing_set_scaled_100)):
    X_test_100.append(testing_set_scaled_100[i-100:i, 0])
    y_test_100.append(testing_set_scaled_100[i,0])
X_test_100, y_test_100 = np.array(X_test_100), np.array(y_test_100)

In [8]:
# Restructure input to have a certain a amount of steps to one output
X_train_200 = []
y_train_200 = []
X_test_200 = []
y_test_200 = []

# 200 steps for one input, about 5 month for 1 value for train set
for i in range(200, len(training_set_scaled)):
    X_train_200.append(training_set_scaled[i-200:i, 0]) # Starting from 0 row to 100th row
    y_train_200.append(training_set_scaled[i,0]) # From 100th row onwards
X_train_200, y_train_200 = np.array(X_train_200), np.array(y_train_200)

# 100 steps for one input, about 5 month for 1 value for test set
testing_set_scaled_200 = np.vstack((training_set_scaled[-200:], testing_set_scaled))
for i in range(200, len(testing_set_scaled_200)):
    X_test_200.append(testing_set_scaled_200[i-200:i, 0])
    y_test_200.append(testing_set_scaled_200[i,0])
X_test_200, y_test_200 = np.array(X_test_200), np.array(y_test_200)

In [9]:
# Reshape X_train data into 3D array as RNN only accept 3D data
# Shape(number of records, number of steps, 1)
X_train_25 = np.reshape(X_train_25, (X_train_25.shape[0], X_train_25.shape[1], 1))
X_train_50 = np.reshape(X_train_50, (X_train_50.shape[0], X_train_50.shape[1], 1))
X_train_100 = np.reshape(X_train_100, (X_train_100.shape[0], X_train_100.shape[1], 1))
X_train_200 = np.reshape(X_train_200, (X_train_200.shape[0], X_train_200.shape[1], 1))

In [10]:
# Import libraries
from sklearn import metrics
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout # Stopping a certain amount of neurons to prevent overfitting

In [11]:
x_train_list = [X_train_25, X_train_50, X_train_100, X_train_200]
y_train_list = [y_train_25, y_train_50, y_train_100, y_train_200]
x_test_list = [X_test_25, X_test_50, X_test_100, X_test_200]
y_test_list = [y_test_25, y_test_50, y_test_100, y_test_200]

# Trying out different parameters to find the best model
# NOTE: It takes more than 10 hours to run

In [12]:
# Initialise RNN
for j,k,l,m in zip(x_train_list, y_train_list, x_test_list, y_test_list):
    LSTM_regressor = Sequential()
    # Add LSTM layers and dropout regularization
    # Units: Number of neurons in the hidden layer
    # Activation: Activation function to be used, ReLu, sigmoid or tanh
    # Input_shape: Input shape to be provided to the LSTM RNN
    LSTM_regressor.add(LSTM (units= 50, return_sequences = True, input_shape = (j.shape[1], 1))) # Adding 50 neurons
    LSTM_regressor.add(Dropout(0.2))

    # Adding a second LSTM layer and some Dropout regularisation
    LSTM_regressor.add(LSTM(units= 50, return_sequences = True)) # Input has to be provided to first layer only, return_sequence=True help to access hidden state output for every layer 
    LSTM_regressor.add(Dropout(0.2))

    # Adding a third LSTM layer and some Dropout regularisation
    LSTM_regressor.add(LSTM(units= 50))
    LSTM_regressor.add(Dropout(0.2))

    # Adding the output layer
    LSTM_regressor.add(Dense(units=1))
    
    # Compile models
    LSTM_regressor.compile(optimizer="adam", loss="mean_squared_error")

    # Fitting RNN to training set
    history = LSTM_regressor.fit(j, k, validation_split=0.2, epochs=100, batch_size=32)
    
    prediction = LSTM_regressor.predict(l)

    mse = metrics.mean_squared_error(prediction, m)
    print(f"For train: {j}, the MSE is {mse}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78