## Predictive model - machine learning

In [4]:
# Import org data
import pandas as pd
from pandas_datareader import data as web
import datetime
start = datetime.datetime(2016,1,1)
end = datetime.date.today()
org = web.DataReader("ORG.AX", 'yahoo', start, end)
org

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-01-03,4.930,4.710,4.71,4.900,5524711.0,4.150702
2016-01-04,4.880,4.730,4.87,4.830,5718988.0,4.091407
2016-01-05,4.790,4.580,4.79,4.710,6846296.0,3.989757
2016-01-06,4.645,4.290,4.56,4.350,10624829.0,3.684807
2016-01-07,4.460,4.260,4.26,4.440,10001706.0,3.761045
...,...,...,...,...,...,...
2022-08-12,6.120,5.965,6.09,6.000,4434183.0,6.000000
2022-08-15,6.090,6.000,6.03,6.060,3060034.0,6.060000
2022-08-16,6.125,6.020,6.10,6.040,3333978.0,6.040000
2022-08-17,6.090,5.980,6.06,6.070,3201553.0,6.070000


1.Predicting Stock price by time-series data

2. Predicting stock price with Moving Average (MA)

3. LSTMs (Long Short-term Memory) for the time-series data

3.1 Data set up for neural network

In [6]:
# Create new column called 'label' to apply shifted close price 10 days before the current day 
# set predict days as 10 days
pre_days = 10
org['label'] = org['Close'].shift(-pre_days)
org.head(20)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,label
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-01-03,4.93,4.71,4.71,4.9,5524711.0,4.150702,3.82
2016-01-04,4.88,4.73,4.87,4.83,5718988.0,4.091407,3.82
2016-01-05,4.79,4.58,4.79,4.71,6846296.0,3.989757,3.46
2016-01-06,4.645,4.29,4.56,4.35,10624829.0,3.684807,3.69
2016-01-07,4.46,4.26,4.26,4.44,10001706.0,3.761045,3.84
2016-01-10,4.44,4.1,4.33,4.25,9808906.0,3.600099,4.05
2016-01-11,4.23,4.025,4.23,4.05,10981544.0,3.430683,3.88
2016-01-12,4.2,4.01,4.05,4.18,10477083.0,3.540803,3.87
2016-01-13,4.09,3.97,4.0,4.05,9203850.0,3.430683,4.1
2016-01-14,4.26,4.0,4.16,4.05,7640144.0,3.430683,4.21


In [12]:
# To standardize the data
# StandardScaler removes the mean and scales each feature/variable to unit variance
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
sca_X = scaler.fit_transform(org.iloc[:,:-1])

In [21]:
# Set memory days for LSTM function
mem_his_days = 5

# Create a deque to record values as per set up memory days
from collections import deque
deq = deque(maxlen=mem_his_days)

X = []
for i in sca_X:
    deq.append(list(i))
    if len(deq) == mem_his_days:
        X.append(list(deq))
# The length of X so far is 1674, but need to remove 10 days from end because pre_days = 10
X = X[:-pre_days]     
print(len(X))

1664


In [23]:
# Set up y values, remove memory data and pre_day data. It should have same length as X
y = org['label'][mem_his_days-1:-pre_days]
print(len(y))

1664


In [29]:
import numpy as np
# change vars to array
X = np.array(X)
Y = np.array(y)

In [None]:
3.2 NN built up

In [39]:
# Split arrays into random train and test subsets
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.1)

In [37]:
# Tensorflow was installed for set up LSTM model learning
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
model = Sequential()
# set up 10 nerual units, relevant inputs and other features, create 3 layers of NNs 
model.add(LSTM(10, input_shape=X.shape[1:],activation='relu',return_sequences=True))
model.add(Dropout(0.1))

model.add(LSTM(10,activation='relu',return_sequences=True))
model.add(Dropout(0.1))

model.add(LSTM(10,activation='relu'))
model.add(Dropout(0.1))

model.add(Dense(10,activation='relu'))
model.add(Dropout(0.1))

model.add(Dense(1))
# set loss as meansquarederror, metrics as meanabsolutepercentageerror for now...
model.compile(optimizer = 'adam', loss='mse',metrics=['mape'])


In [40]:
# Train model 1
model.fit(X_train,y_train,batch_size=32,epochs=50,validation_data=(X_test,y_test))
# batch_size=32,epochs=50   -----> loss: 1.3348 - mape: 13.4132 - val_loss: 0.2849 - val_mape: 6.6363

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x2be7da853d0>

In [41]:
# Train model 2
model.fit(X_train,y_train,batch_size=32,epochs=10,validation_data=(X_test,y_test))
# batch_size=32,epochs=10   -----> loss: 1.1414 - mape: 12.6336 - val_loss: 0.2962 - val_mape: 6.7104, not showing big difference by changing epochs values

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2be093afd60>