# Ensemble Time Series

In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import timeit

import warnings
warnings.filterwarnings("ignore")

# List of Stocks and ETFs
Provided by Thomas Choi.

In [4]:
stock_list = pd.read_csv("../stocks_and_etfs/stock_list.csv")
etf_list = pd.read_csv("../stocks_and_etfs/etf_list.csv")

In [5]:
import random
# Choose a random stock
#stock_symbol = random.choice(stock_list.iloc[:,0])
stock_symbol = "FCX"
print(stock_symbol)

FCX


## MySQL connection
Choosing one stock from SQL query to reduce query time.

In [6]:
import os
import sys
import mysql.connector

HOST="143.244.188.157"
PORT="3306"
USER="patrick-finProj"
PASSWORD="Pat#21$rick"

try: 
    conn = mysql.connector.connect(
        host=HOST,
        port=PORT,
        user=USER,
        password=PASSWORD,
        database="GlobalMarketData"
    )
    query = f"SELECT Date, Exchange, Close, Open, High, Low, Volume from histdailyprice3 WHERE Symbol='{stock_symbol}';"
    histdailyprice3 = pd.read_sql(query, conn)
    conn.close()
except Exception as e:
    conn.close()
    print(str(e))

# Data

In [7]:
df = histdailyprice3.copy()
df.head()

Unnamed: 0,Date,Exchange,Close,Open,High,Low,Volume
0,1998-01-01,NYSE,7.875,7.875,7.875,7.875,0
1,1998-01-02,NYSE,7.875,8.0,7.875,7.97,279300
2,1998-01-05,NYSE,7.97,8.0,7.845,7.875,455300
3,1998-01-06,NYSE,7.845,7.875,7.75,7.75,476200
4,1998-01-07,NYSE,7.72,7.875,7.685,7.845,464100


In [8]:
# Get exchange
df.Exchange.iloc[0]

'NYSE'

# Normalize

In [9]:
from sklearn.preprocessing import MinMaxScaler
# normalize data
scaler = MinMaxScaler(feature_range = (0,1))
scaled_data = scaler.fit_transform(df["Close"].values.reshape(-1, 1))

# Train-Test Split

In [10]:
def create_dataset(dataset, time_step=1):
    x_data, y_data = [], []
    
    for i in range(len(dataset)-time_step-1):
        x_data.append(dataset[i:(i+time_step), 0])
        y_data.append(dataset[i + time_step, 0])
    return np.array(x_data), np.array(y_data)

In [11]:
# Preparing train and test data
training_size = int(len(scaled_data)*0.65)
test_size = len(scaled_data)-training_size
train_data, test_data = scaled_data[0:training_size,:], scaled_data[training_size:len(scaled_data),:1]

In [12]:
#Taking data for past 100 days for next prediction
time_step = 100

X_train, y_train = create_dataset(train_data, time_step)
X_test, ytest = create_dataset(test_data, time_step)

In [13]:
X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)

# Build LSTM Model

In [14]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout,LSTM

def build_model(hp):
    model = Sequential()
    model.add(layers.LSTM(units = hp.Choice('layer1_units', [10,20,30,40,50,60,70,80,90,100]),return_sequences=True,input_shape=(100,1)))
    
    for i in range(hp.Int('num_layers', 2, 15)):                        
        model.add(layers.LSTM(units =  hp.Int('units' + str(i), min_value=10, max_value=150, step=10), return_sequences=True))
    
    model.add(LSTM(units = hp.Choice('last_lstm_units', [50, 100, 150])))
    model.add(Dropout(rate = hp.Choice('rate', [0.3, 0.4, 0.5, 0.6, 0.7])))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error',optimizer='adam' )
    return model

## Tuning

In [None]:
import keras_tuner
from tensorflow.keras import layers
tuner = keras_tuner.RandomSearch(
    build_model,
    objective='val_loss',
    max_trials= 5,
    executions_per_trial=3,
    directory='tuner', project_name = f'{stock_symbol}')

tuner.search_space_summary()

tuner.search(X_train, y_train,
             epochs= 5,
             validation_data=(X_test, ytest))

2022-02-26 15:23:01.314669: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Search space summary
Default search space size: 6
layer1_units (Choice)
{'default': 10, 'conditions': [], 'values': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100], 'ordered': True}
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 2, 'max_value': 15, 'step': 1, 'sampling': None}
units0 (Int)
{'default': None, 'conditions': [], 'min_value': 10, 'max_value': 150, 'step': 10, 'sampling': None}
units1 (Int)
{'default': None, 'conditions': [], 'min_value': 10, 'max_value': 150, 'step': 10, 'sampling': None}
last_lstm_units (Choice)
{'default': 50, 'conditions': [], 'values': [50, 100, 150], 'ordered': True}
rate (Choice)
{'default': 0.3, 'conditions': [], 'values': [0.3, 0.4, 0.5, 0.6, 0.7], 'ordered': True}

Search: Running Trial #1

Hyperparameter    |Value             |Best Value So Far 
layer1_units      |80                |?                 
num_layers        |14                |?                 
units0            |60                |?                 
units1            |60

In [None]:
tuner.results_summary()

# Callbacks
**EarlyStoping:** It will stop the traning if score of model didn't increase. This prevent model from overfitting. We are to set max in 10 epoches if it didn't increase then we will stop the training <br>
**ReduceLROnPlateau:** Use for reduce the learning rate. In 3 steps the score didn't increase we will reduce the learning rate to improve the training <br>
**ModelCheckpoint:** Use for save model only when the score increased

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

callbacks = [
    EarlyStopping(patience=10, verbose=1),
    ReduceLROnPlateau(factor=0.1, patience=3, min_lr=0.00001, verbose=1),
    ModelCheckpoint('weights_best.hdf5', verbose=2, save_best_only=True, 
                    save_weights_only=True)
]

# Compile LSTM Model

In [None]:
model = tuner.get_best_models(num_models=1)[0]
model.summary()

In [None]:
start = timeit.default_timer()

#model.fit(x_train, y_train, epochs=25, batch_size=32, callbacks=callbacks)
model_history = model.fit(X_train,y_train, epochs=100, validation_data=(X_test,ytest), callbacks=callbacks)

stop = timeit.default_timer()
print('Time: ', stop - start)

# Loss

In [None]:
loss = model_history.history['loss']
validation_loss = model_history.history['val_loss']

plt.figure(figsize=(17, 7));
plt.plot(loss, label='Training Loss');
plt.plot(validation_loss, label='Validation Loss');
plt.legend(loc='upper left');
plt.title('Loss : Training Vs Validation ');

# Prediction

In [None]:
train_predict=model.predict(X_train)
test_predict=model.predict(X_test)

In [None]:
train_predict=scaler.inverse_transform(train_predict)
test_predict=scaler.inverse_transform(test_predict)

In [None]:
look_back=100
trainPredictPlot = np.empty_like(df)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(train_predict) + look_back, :] = train_predict

# shift test predictions for plotting
testPredictPlot = np.empty_like(df)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(train_predict) + (look_back*2) + 1: len(df) - 1, :] = test_predict

plt.plot(df["Close"])
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.title('Train Vs Test predictions');
plt.show()

# Evaluation

In [None]:
from sklearn.metrics import mean_squared_error
print("Train RMSE: ", math.sqrt(mean_squared_error(y_train,train_predict)))
print("Test RMSE: ", math.sqrt(mean_squared_error(ytest,test_predict)))