# LSTM with sliding window

## Import Modules

In [None]:
#Import important libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import csv
import time
import math
import itertools
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
np.random.seed(7)

## Helper Functions

In [None]:
# Define helper functions
def transform_data(data, look_back):
    X, y = [], []
    for i in range(len(data) - look_back - 1):
        X.append(data[i:(i+look_back),0])
        y.append(data[i+look_back,0])
    return np.array(X),np.array(y)

def divisorGenerator(n):
    divisors = []
    for i in range(1, int(math.sqrt(n) + 1)):
        if n % i == 0:
            yield i
            if i*i != n:
                divisors.append(n / i)
    return divisors

def get_common_divisor(num1,num2):
    s1 = set(divisorGenerator(num1))
    s2 = set(divisorGenerator(num2))
    
    return list(s1.intersection(s2))


In [None]:
def load_file(filename,stock_parameter):
    df = pd.read_csv('./'+filename+'.csv',usecols=[stock_parameter])
    df = df[::-1]
    
    return df

In [None]:
def create_dataset(data,look_back):
    train_size = int(len(data)*0.8)
    test_size = len(data) - train_size
    train_set = data[0:train_size,:]
    test_set = data[train_size:len(data),:]    
    
    X_train, y_train = transform_data(train_set, look_back)
    X_test, y_test = transform_data(test_set, look_back)
    
    # reshape input to be [samples, time steps, features]
    X_train = np.reshape(X_train, (X_train.shape[0],1,X_train.shape[1]))
    X_test = np.reshape(X_test, (X_test.shape[0],1,X_test.shape[1]))
                        
    batch_size = get_common_divisor(X_train.shape[0],X_test.shape[0])[-1]
                        
    return X_train,y_train,X_test,y_test,batch_size

## Define the LSTM model

In [None]:
# function to create custom dataset based on the lookback value
def lstm_time_steps(data,X_train,y_train,X_test,y_test,batch_size,epoch,look_back,no_of_days,filename,stock_parameter,scaler):
    # creating the LSTM
    algorithm = "lstm_lookback_window"
    
    # create model
    model = Sequential()
    model.add(LSTM(4,
                   input_shape = (1, look_back)))
    model.add(Dense(1))
    model.compile(loss = 'mean_squared_error', optimizer = 'adam')
    
    # train the model
    start_time = time.clock()
    model.fit(X_train,
              y_train,
              epochs = epoch,
              batch_size = batch_size, 
              verbose = 2)
    time_taken = int(time.clock() - start_time)
    
    # do predictions
    predicted_train = model.predict(X_train)
    predaicted_test = model.predict(X_test)
    predicted_train = scaler.inverse_transform(predicted_train)
    y_train_org = scaler.inverse_transform([y_train])

    predicted_test = scaler.inverse_transform(predicted_test)
    y_test_org = scaler.inverse_transform([y_test])
    
    train_score = round(math.sqrt(mean_squared_error(y_train_org[0],predicted_train[:,0])),2)
    test_score = round(math.sqrt(mean_squared_error(y_test_org[0],predicted_test[:,0])),2)
    
    with open("./logs_sliding.csv", "a") as myfile:
        myfile.write('\n'+algorithm+','
                     +str(no_of_days)+','
                     +str(X_test.shape[0])+','
                     +str(look_back)+','
                     +str(epoch)+','
                     +str(batch_size)+','
                     +str(train_score)+','
                     +str(test_score)+','
                     +str(time_taken)+','
                     +str(filename)+','
                     +str(stock_parameter))

## Grid Search over LSTM

In [None]:
def main():
    epoch_array = [10,20,30,40]
    loop_back_array = [3,7,14,28]
    no_of_days_array = [300,600,900]

    filename = "BABA"
    stock_parameter = "close"
    
    file_loaded = load_file(filename,stock_parameter)
    
    # Conduct Grid Search over the LSTM model and save the results to log file
    for x in list(itertools.product(epoch_array, loop_back_array,no_of_days_array)):
        print("---------------------------",x,"--------------------------")
        df = file_loaded.iloc[-x[2]:]
        data = df.values
        data = data.astype('float32')    

        # Apply min max scaling over the data
        scaler = MinMaxScaler(feature_range=(0,1))
        data = scaler.fit_transform(data)
        
        # Create custom dataset
        X_train,y_train,X_test,y_test,batch_size = create_dataset(data,x[1])
        
        lstm_time_steps(data,
                        X_train,
                        y_train,
                        X_test,
                        y_test,
                        batch_size,
                        x[0], #ephoch
                        x[1], #loopback
                        x[2], #no_of_days
                        filename,
                        stock_parameter,
                        scaler)
        
    f = open("./logs_sliding.csv", "r")
    csv_f = csv.reader(f)
    for row in csv_f:
        print('{:<25} {:<10} {:<10} {:<8} {:<7} {:<10} {:<10} {:<10} {:<10} {:<9} {:<7}'.format(*row))    

In [None]:
main()