In [1]:
# Install necesary libraries
!pip install keras==2.14.0
!pip install tensorflow
!pip install scikeras

Collecting keras<2.16,>=2.15.0 (from tensorflow)
  Downloading keras-2.15.0-py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: keras
  Attempting uninstall: keras
    Found existing installation: keras 2.14.0
    Uninstalling keras-2.14.0:
      Successfully uninstalled keras-2.14.0
Successfully installed keras-2.15.0


In [2]:
# Import libraries
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, LSTM
from keras.layers import Dropout

from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize

In [3]:
# Import the data
dataframe = pd.read_csv('minute_week_data.csv')

# drop first column (to fit normalisation code)
dataframe = dataframe.drop(dataframe.columns[0], axis=1)

display(dataframe)

Unnamed: 0,CSCO,ENPH,MCHP,ORCL,INTC,TXN,FTNT,WDC,MSFT,HPQ,AMAT,AMD,NXPI,NVDA,HPE,ON,QCOM,AAPL,MU,CRM
0,48.10,267.50,70.70,82.02,26.76,166.00,49.92,31.9,242.27,26.97,98.43,65.620,160.00,148.44,16.05,63.40,111.08,130.89,50.28,134.04
1,48.10,268.50,70.70,82.11,26.74,166.00,49.92,31.9,242.42,27.15,98.80,65.630,160.00,148.93,16.05,63.40,110.92,131.10,50.42,133.54
2,48.20,268.50,70.70,82.11,26.79,166.00,49.92,31.9,242.80,27.15,98.80,65.700,160.00,148.85,16.05,63.40,111.10,131.19,50.78,134.24
3,48.20,268.50,70.70,82.11,26.73,166.00,49.92,31.9,242.68,27.15,98.80,65.630,160.00,148.73,16.05,63.40,111.10,131.28,50.88,134.24
4,48.20,267.54,70.70,82.11,26.79,166.00,49.92,31.9,242.80,27.15,98.86,65.880,160.00,148.95,16.05,63.40,111.10,131.40,50.88,134.30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5755,48.73,230.10,75.67,86.55,29.50,179.12,48.80,38.2,229.20,28.81,107.28,68.175,166.32,159.85,17.05,64.01,116.91,130.91,57.29,147.98
5756,48.83,230.10,75.67,86.55,29.48,178.53,48.80,38.2,229.18,28.81,107.39,68.180,166.32,159.85,17.05,64.01,116.92,130.92,57.29,147.98
5757,48.83,230.10,75.67,86.55,29.51,178.53,48.80,38.2,229.20,28.81,107.39,68.200,166.32,159.85,17.05,64.01,116.81,130.95,57.29,147.98
5758,48.83,230.10,75.67,86.55,29.51,178.53,48.80,38.2,229.15,28.81,107.28,68.180,166.32,159.87,17.05,64.01,116.80,130.98,57.25,147.98


In [4]:
dataframe.shape

(5760, 20)

In [5]:
# Divide the training set and the test set
# (first 80% of time interval as training set and last 20% as test set)
train_ratio = 0.8
training_set_size = int(dataframe.shape[0] * train_ratio)
train_data = dataframe[ :training_set_size]
test_data = dataframe[training_set_size :]

# Normalisation process
scaler = MinMaxScaler(feature_range=(0, 1))
normalize_train_data = scaler.fit_transform(train_data)
normalize_test_data = scaler.fit_transform(test_data)

In [6]:
print(normalize_test_data.shape)
print(normalize_train_data.shape)
type(normalize_train_data)

(1152, 20)
(4608, 20)


numpy.ndarray

In [7]:
# Function to create x train and y train window
def create_dataset(data,n_predictions,nth):
    train_X,train_y = [],[]
    for i in range(data.shape[0]-n_predictions-nth-1):
        a = data[i:(i+n_predictions),:]
        train_X.append(a)
        b = data[i+n_predictions+nth,:]
        train_y.append(b)
    train_X = np.array(train_X,dtype='float64')
    train_y = np.array(train_y,dtype='float64')

    return train_X, train_y

In [8]:
# Generate train_X,train_y,test_X,test_y
# Using past 120 minutes to predict 10th minute in the future
train_X,train_y = create_dataset(normalize_train_data,120,10)
test_X,test_y = create_dataset(normalize_test_data,120,10)

In [9]:
print(train_X.shape)
print(train_y.shape)
print(test_X.shape)
print(test_y.shape)

(4477, 120, 20)
(4477, 20)
(1021, 120, 20)
(1021, 20)


In [10]:
# Construct LSTM model with temporary hyperparameters
def LSTM_model(hidden_neurons=40,dropout=0.2,activation='tanh',optimizer='adam'):
  X = train_X
  y = train_y
  LSTM_model = Sequential()
  # Hidden layer 1: LSTM
  LSTM_model.add(LSTM(hidden_neurons, input_shape=(X.shape[1], X.shape[2]), activation=activation))
  LSTM_model.add(Dropout(dropout))
  # Hidden layer 2: Dense
  # Run grid search once with this layer, and once without (comment out)
  LSTM_model.add(Dense(hidden_neurons, activation=activation))

  # Output layer: Dense
  LSTM_model.add(Dense(y.shape[1]))
  # Configure the model
  LSTM_model.compile(optimizer ,loss='mean_squared_error')
  return LSTM_model


In [11]:
# Define our alternative hyperparameters dictionary
parameters_grid = { 'model__hidden_neurons': [40,60,80],
           'model__activation': ['relu', 'tanh', 'sigmoid'],
           'model__optimizer':['adam', 'rmsprop','sgd']
}

In [12]:
 # Grid search model
model = KerasRegressor(model=LSTM_model, epochs=10, batch_size=32, verbose=1)
grid = GridSearchCV(estimator=model, param_grid = parameters_grid, scoring='neg_mean_squared_error',cv=3, error_score='raise')

grid_result = grid.fit(train_X, train_y)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
E

In [14]:
# Save Grid Search results as a CSV
results_df = pd.DataFrame(grid.cv_results_)
display(results_df)
results_df.to_csv('results.csv')

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_model__activation,param_model__hidden_neurons,param_model__optimizer,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,69.886356,20.316975,1.642721,0.120553,relu,40,adam,"{'model__activation': 'relu', 'model__hidden_n...",-0.036254,-0.008002,-0.034469,-0.026242,0.012918,11
1,41.537875,0.612963,1.27743,0.272445,relu,40,rmsprop,"{'model__activation': 'relu', 'model__hidden_n...",-0.031885,-0.007808,-0.04005,-0.026581,0.013687,12
2,69.895781,19.85,1.403749,0.169228,relu,40,sgd,"{'model__activation': 'relu', 'model__hidden_n...",-0.048709,-0.019831,-0.087225,-0.051922,0.027607,22
3,72.781859,15.530264,1.197544,0.214477,relu,60,adam,"{'model__activation': 'relu', 'model__hidden_n...",-0.028899,-0.006285,-0.043108,-0.026097,0.015163,9
4,83.697792,0.235549,1.520185,0.017619,relu,60,rmsprop,"{'model__activation': 'relu', 'model__hidden_n...",-0.034545,-0.005403,-0.038501,-0.02615,0.014759,10
5,63.54361,14.157027,1.49877,0.022352,relu,60,sgd,"{'model__activation': 'relu', 'model__hidden_n...",-0.041818,-0.017888,-0.040028,-0.033244,0.010883,16
6,77.719075,8.638578,1.512225,0.012629,relu,80,adam,"{'model__activation': 'relu', 'model__hidden_n...",-0.021438,-0.005935,-0.032675,-0.020016,0.010963,7
7,68.293709,10.744876,1.895588,0.699447,relu,80,rmsprop,"{'model__activation': 'relu', 'model__hidden_n...",-0.033608,-0.005644,-0.036065,-0.025105,0.013798,8
8,63.789164,1.156553,1.459324,0.124891,relu,80,sgd,"{'model__activation': 'relu', 'model__hidden_n...",-0.04449,-0.014215,-0.096242,-0.051649,0.033868,21
9,72.528837,17.56848,1.463104,0.275306,tanh,40,adam,"{'model__activation': 'tanh', 'model__hidden_n...",-0.015549,-0.00459,-0.03726,-0.019133,0.013576,6


In [None]:
# Run the model on the full data with the obtained hyperparameters

# Import the data
dataframe = pd.read_csv('minute_full_data.csv')

# Drop first (datetime) column
dataframe = dataframe.drop(dataframe.columns[0], axis=1)

display(dataframe)

In [None]:
# Divide the training set and the test set
# (first 80% of time interval as training set and last 20% as test set)
train_ratio = 0.8
training_set_size = int(dataframe.shape[0] * train_ratio)
train_data = dataframe[ :training_set_size]
test_data = dataframe[training_set_size :]


# Normalisation process
scaler = MinMaxScaler(feature_range=(0, 1))
normalize_train_data = scaler.fit_transform(train_data)
normalize_test_data = scaler.fit_transform(test_data)

In [None]:
# Function to create x train and y train window
def create_dataset(data,n_predictions,nth):
    train_X,train_y = [],[]
    for i in range(data.shape[0]-n_predictions-nth-1):
        a = data[i:(i+n_predictions),:]
        train_X.append(a)
        b = data[i+n_predictions+nth,:]
        train_y.append(b)
    train_X = np.array(train_X,dtype='float64')
    train_y = np.array(train_y,dtype='float64')

    return train_X, train_y

In [None]:
# Generate train_X,train_y,test_X,test_y
# Using past 120 minutes to predict 10th minute in the future
train_X,train_y = create_dataset(normalize_train_data,120,10)
test_X,test_y = create_dataset(normalize_test_data,120,10)

In [None]:
# Train LSTM model
def train_model(X,y,hidden_neurons,dropout,activation,optimizer):
  LSTM_model = Sequential()
  # Hidden layer 1: LSTM
  LSTM_model.add(LSTM(hidden_neurons, input_shape=(X.shape[1], X.shape[2]), activation=activation))
  LSTM_model.add(Dropout(dropout))
  # Hidden layer 2: Dense
  LSTM_model.add(Dense(hidden_neurons, activation=activation))

  # Output layer: Dense
  LSTM_model.add(Dense(y.shape[1]))
  # Configure the model
  LSTM_model.compile(optimizer ,loss='mean_squared_error')
  return LSTM_model

# Enter Hyperparameters from Grid Search
our_model = train_model(X=train_X,y=train_y,hidden_neurons=60,dropout=0.2,activation='tanh',optimizer='adam')
history = our_model.fit(train_X, train_y, epochs=10, batch_size=32, validation_split=(0.1),verbose=1)

In [None]:
# Plot history loss graph
import matplotlib.pyplot as plt
plt.plot(history.history['loss'])
plt.title('Model Loss Decline')
plt.xlabel('Epoch')
plt.ylabel('Loss (MSE)')
plt.show()

In [None]:
# Predict y_hat values using model
y_hat = our_model.predict(test_X)
print(y_hat.shape)
# Inverse normalisation transformation to get orignial scale
original_y_hat = scaler.inverse_transform(y_hat)
original_y_test = scaler.inverse_transform(test_y)

In [None]:
# Choose stock to display and visualise predicted and real prices
stock=1
actual_prices = original_y_test[:,stock]
predicted_prices = original_y_hat[:,stock]

plt.plot(actual_prices,label='Actual Price')
plt.plot(predicted_prices,label='Predicted Price')
plt.legend()
plt.title('Comparison of Predicted and Actual Stock Prices')
plt.xlabel('Time (Minutes)')
plt.ylabel('Price (USD)')
plt.show()

In [None]:
# Overall MSE measure
test_MSE = mean_squared_error(original_y_test, original_y_hat)
print(test_MSE)

In [None]:
# Predictive reliabillity assessment

# Initialise correct prediction count
correct_predictions = 0

# Iterate over prices to compare predicted vs actual
for i in range(len(predicted_prices)-1):
    # Is the predicted price in the next period higher?
    predicted_increase = predicted_prices[i+1] > predicted_prices[i]
    # Is the actual price in the next period higher?
    actual_increase = actual_prices[i+1] > actual_prices[i]
    if predicted_increase == actual_increase:
        # Count as correct prediction if boolean is the same
        correct_predictions += 1

# Calculate accuracy
accuracy = (correct_predictions / (len(predicted_prices)-1)) * 100

print(accuracy)