# Stock Price Movement

In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import timeit

import warnings
warnings.filterwarnings("ignore")

# List of Stocks and ETFs

In [2]:
stock_list = pd.read_csv("../stocks_and_etfs/stock_list.csv")
etf_list = pd.read_csv("../stocks_and_etfs/etf_list.csv")

In [3]:
import random
# Choose a random stock
#stock_symbol = random.choice(stock_list.iloc[:,0])
stock_symbol = "FCX"
print(stock_symbol)

FCX


## MySQL connection
Choosing one stock from SQL query to reduce query time.

In [5]:
import os
import sys
import mysql.connector

HOST="143.244.188.157"
PORT="3306"
USER="patrick-finProj"
PASSWORD="Pat#21$rick"

try: 
    conn = mysql.connector.connect(
        host=HOST,
        port=PORT,
        user=USER,
        password=PASSWORD,
        database="GlobalMarketData"
    )
    query = f"SELECT Date, Close, Open, High, Low, Volume from histdailyprice3 WHERE Symbol='{stock_symbol}';"
    histdailyprice3 = pd.read_sql(query, conn)
    conn.close()
except Exception as e:
    conn.close()
    print(str(e))

# Data

In [6]:
df = histdailyprice3.copy()
df.head()

Unnamed: 0,Date,Close,Open,High,Low,Volume
0,1998-01-01,7.875,7.875,7.875,7.875,0
1,1998-01-02,7.875,8.0,7.875,7.97,279300
2,1998-01-05,7.97,8.0,7.845,7.875,455300
3,1998-01-06,7.845,7.875,7.75,7.75,476200
4,1998-01-07,7.72,7.875,7.685,7.845,464100


# Test Train Set

In [8]:
from sklearn.model_selection import train_test_split

train_cols = ["Open","High","Low","Close","Volume"]
df_train, df_test = train_test_split(df, train_size=0.9, test_size=0.1, shuffle=False)
print("Training data set size -", len(df_train))
print("Test data set size -", len(df_test))

Training data set size - 5559
Test data set size - 618


# Normalization

In [10]:
from sklearn.preprocessing import MinMaxScaler
x = df_train.loc[:,train_cols].values
min_max_scaler = MinMaxScaler()
x_train = min_max_scaler.fit_transform(x)
x_test = min_max_scaler.transform(df_test.loc[:,train_cols])

In [11]:
'''Set environment parameters'''
#Show warning & error
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Parameters

In [12]:
#set parameters for model
params = {
    "BATCH_SIZE": 50,
    "EPOCHS": 50,
    "LR": 0.00010000,
    "TIME_STEPS": 60
    }

In [16]:
TIME_STEPS = params['TIME_STEPS']
BATCH_SIZE = params['BATCH_SIZE']

In [14]:
def print_time(text, stime):
    seconds = (time.time()-stime)
    print(text, seconds//60,"minutes : ", np.round(seconds%60),"seconds")

def trim_dataset(mat, batch_size):
    
    #trims dataset to a size that's divisible by BATCH_SIZE
    no_of_rows_drop = mat.shape[0] % batch_size

    if no_of_rows_drop > 0:
        return mat[:-no_of_rows_drop]
    else:
        return mat

In [15]:
def build_timeseries(mat, y_col_index):
    
    dim_0 = mat.shape[0] - TIME_STEPS
    dim_1 = mat.shape[1]

    x = np.zeros((dim_0, TIME_STEPS, dim_1))
    y = np.zeros((dim_0,))

    print("Length of inputs", dim_0)

    for i in range(dim_0):
        x[i] = mat[i:TIME_STEPS+i]
        y[i] = mat[TIME_STEPS+i, y_col_index]

    print("length of time-series - inputs", x.shape)
    print("length of time-series - outputs", y.shape)

    return x, y

In [17]:
x_t, y_t = build_timeseries(x_train, 3)
x_t = trim_dataset(x_t, BATCH_SIZE)
y_t = trim_dataset(y_t, BATCH_SIZE)
print("Batch trimmed size", x_t.shape, y_t.shape)

Length of inputs 5499
length of time-series - inputs (5499, 60, 5)
length of time-series - outputs (5499,)
Batch trimmed size (5450, 60, 5) (5450,)


In [18]:
def custom_loss(y_true, y_pred):
    
    #extract the "next day's price" of tensor
    y_true_next = y_true[1:]
    y_pred_next = y_pred[1:]
    
    #extract the "today's price" of tensor
    y_true_tdy = y_true[:-1]
    y_pred_tdy = y_pred[:-1]
    
    print('Shape of y_pred_back -', y_pred_tdy.get_shape())
    
    #substract to get up/down movement of the two tensors
    y_true_diff = tf.subtract(y_true_next, y_true_tdy)
    y_pred_diff = tf.subtract(y_pred_next, y_pred_tdy)
        
    #create a standard tensor with zero value for comparison
    standard = tf.zeros_like(y_pred_diff)
    
    #compare with the standard; if true, UP; else DOWN
    y_true_move = tf.greater_equal(y_true_diff, standard)
    y_pred_move = tf.greater_equal(y_pred_diff, standard)
    y_true_move = tf.reshape(y_true_move, [-1])
    y_pred_move = tf.reshape(y_pred_move, [-1])
    

    #find indices where the directions are not the same
    condition = tf.not_equal(y_true_move, y_pred_move)
    indices = tf.where(condition)

    #move one position later
    ones = tf.ones_like(indices)
    indices = tf.add(indices, ones)
    indices = K.cast(indices, dtype='int32')
    
    
    #create a tensor to store directional loss and put it into custom loss output
    direction_loss = tf.Variable(tf.ones_like(y_pred), dtype='float32')
    updates = K.cast(tf.ones_like(indices), dtype='float32')
    alpha = 1000
    direction_loss = tf.scatter_nd_update(direction_loss, indices, alpha*updates)
    
    custom_loss = K.mean(tf.multiply(K.square(y_true - y_pred), direction_loss), axis=-1)
    
    return custom_loss

In [19]:
def create_lstm_model():
      
    lstm_model = Sequential()
    lstm_model.add(LSTM(100, batch_input_shape=(BATCH_SIZE, TIME_STEPS, x_t.shape[2]),
                        dropout=0.0, recurrent_dropout=0.0,
                        stateful=True, return_sequences=True,
                        kernel_initializer='random_uniform'))
    #lstm_model.add(Dropout(0.4))

    lstm_model.add(LSTM(60, dropout=0.0))
    #lstm_model.add(Dropout(0.4))
    
    lstm_model.add(Dense(20,activation='relu'))
    lstm_model.add(Dense(1, activation='sigmoid'))
        
    #compile the model
    optimizer = optimizers.Adam(lr=params["LR"])
    lstm_model.compile(loss=custom_loss, optimizer=optimizer)
  
    return lstm_model

model = None

In [None]:
from keras.models import Sequential, load_model

x_temp, y_temp = build_timeseries(x_test, 3)
x_val, x_test_t = np.array_split(trim_dataset(x_temp, BATCH_SIZE), 2)
y_val, y_test_t = np.array_split(trim_dataset(y_temp, BATCH_SIZE), 2)
print("Test size", x_test_t.shape, y_test_t.shape, x_val.shape, y_val.shape)


is_update_model = False


if model is None or is_update_model:
      
    print("Building model...")

    lstm_model = create_lstm_model()
    print(lstm_model.summary())
    
    mcp = ModelCheckpoint(os.path.join(OUTPUT_PATH, "best_lstm_model.h5"), monitor='val_loss', verbose=2, save_best_only=True, save_weights_only=False, mode='min', period=1)

    '''Step 14 - Fit model'''
    history_lstm = lstm_model.fit(x_t, y_t, epochs=params["EPOCHS"], verbose=1, batch_size=BATCH_SIZE,
                      shuffle=False, validation_data=(trim_dataset(x_val, BATCH_SIZE),
                      trim_dataset(y_val, BATCH_SIZE)), callbacks=[mcp])

    print("saving model...")
  
    pickle.dump(lstm_model, open("lstm_model", "wb"))