In [None]:
def import_libs():
    # Importing the libraries
    global np; import numpy as np
    global plt; import matplotlib.pyplot as plt; plt.style.use('fivethirtyeight')
    global pd; import pandas as pd
    global MinMaxScaler; from sklearn.preprocessing import MinMaxScaler
    global Sequential; from keras.models import Sequential
    global Dense, LSTM, Dropout, GRU, Bidirectional; from keras.layers import Dense, LSTM, Dropout, GRU, Bidirectional
    global SGD; from keras.optimizers import SGD
    global math; import math
    global mean_squared_error; from sklearn.metrics import mean_squared_error
import_libs()
# rcParams

In [None]:
# Some functions to help out with
def plot_predictions(test,predicted):
    plt.plot(test, color='red',label='Real IBM Stock Price')
    plt.plot(predicted, color='blue',label='Predicted IBM Stock Price')
    plt.title('IBM Stock Price Prediction')
    plt.xlabel('Time')
    plt.ylabel('IBM Stock Price')
    plt.legend()
    plt.show()

def return_rmse(test,predicted):
    rmse = math.sqrt(mean_squared_error(test, predicted))
    print("The root mean squared error is {}.".format(rmse))


# First, we get the data
dataset = pd.read_csv('../input/IBM_2006-01-01_to_2018-01-01.csv', index_col='Date', parse_dates=['Date'])
print('Tail of the dataset is: \n\n {}:'.format(dataset.tail()))

In [None]:
# Checking for missing values
training_set = dataset[:'2016'].iloc[:,1:2].values
test_set = dataset['2017':].iloc[:,1:2].values
# dataset[:'2016'].iloc[:,0:2].isnull().sum()
# len(dataset[:'2016'].iloc[:,1:2].values)

# We have chosen 'High' attribute for prices. Let's see what it looks like
dataset["High"][:'2016'].plot(figsize=(16,4),legend=True)
dataset["High"]['2017':].plot(figsize=(16,4),legend=True)
plt.legend(['Training set (Before 2017)','Test set (2017 and beyond)'])
plt.title('IBM stock price');plt.show()

In [None]:
# Scaling the training set
sc = MinMaxScaler(feature_range=(0,1)); training_set_scaled = sc.fit_transform(training_set)
# print(training_set_scaled[:20], end='\n\n'); print(training_set[:20])


def X_y_split(lookback):
    # Since LSTMs store long term memory state, we create a data structure with 60 timesteps and 1 output
    # So for each element of training set, we have 60 previous training set elements 
    X_train=[training_set_scaled[i-lookback:i,0] for i in range(lookback, len(training_set))]
    y_train=[training_set_scaled[i,0] for i in range(lookback, len(training_set))]
    # X_train = []; y_train = []
    # for i in range(60,len(training_set)): # len(training_set) # = 2769
    #     X_train.append(training_set_scaled[i-60:i,0])
    #     y_train.append(training_set_scaled[i,0])
    X_train, y_train = np.array(X_train), np.array(y_train)
    # X_train[:20][1]; X_train.shape; y_train.shape
    return X_train, y_train

X_train, y_train=X_y_split(60)


def X_y_reshape(single_y_size, x, y):
    if float(single_y_size).is_integer():
        x=[x[i] for i in range(0,len(y)-len(y)%single_y_size,single_y_size)]
        y=[y[i:i+single_y_size] for i in range(0,len(y)-len(y)%single_y_size,single_y_size)]
    else:
        print('Need an integer!')
    return np.array(x), np.array(y)

X_train, y_train=X_y_reshape(1,X_train, y_train)

# Reshaping X_train for efficient modelling, why do we need another index?? 
# .fit() in the next codeblock somehow expects 3 dimensions for the X_input 
X_train = np.reshape(X_train, (X_train.shape[0],X_train.shape[1],1)) # 2709 x 60 x 1, 1 means only one feature 'price'
# Always give a 3D array as an input to LSTM network. 
# 1st dimension represents the number of samples (or batch size)
# 2nd dimension represents the number of time-steps you are feeding a sequence. 
# 3rd dimension represents the number of units/features in one input sequence.
# X_train.shape  # X_train[:][:][0]
print(X_train.shape);print(y_train.shape)

* Num_params_1st_layer= 4xUNITSx(units+1+1)=4x50x(50+1+1)=10400; units is the number of outputs of this layer
* Num_params_2nd_layer= 4xUNITSx(units+output_from_1st_layer+1)=4x50x(50+1+1)=10400

In [None]:
def build(X, y, batch_size, epoch):
    # The LSTM architecture
    regressor = Sequential()
    #=======================================================================================================================
    # return_sequences: Whether to return the last output. in the output sequence, or the full sequence. Default: False.
    regressor.add(LSTM(units=50, return_sequences=True, input_shape=(X.shape[1],1))) 
    #if using batch_input_shape=( , , ), the 1st number 'batch_size' must be a factor of 2709
    # we cannot really give any batch_size here if using batch_input_shape= (, ,), but if we use input_shape=(), we can later
    # specify the batch_size in fit(), which doesn't reshape the actual input data.
    regressor.add(Dropout(0.2))
    #=======================================================================================================================
    regressor.add(LSTM(units=60, return_sequences=True));regressor.add(Dropout(0.2))
    # 1st dimension of output is None because we do not know the batch size in advance. See the console log below
    #=======================================================================================================================
    regressor.add(LSTM(units=50, return_sequences=True));regressor.add(Dropout(0.2))
    #=======================================================================================================================
    regressor.add(LSTM(units=50));regressor.add(Dropout(0.2))
    # The output layer
    regressor.add(Dense(units=1))

    # Compiling the RNN
    regressor.compile(optimizer='rmsprop',loss='mean_squared_error')
    regressor.summary()
    history=regressor.fit(X,y,epochs=epoch, batch_size=batch_size)
    return regressor

regressor= build(X_train, y_train, batch_size=50, epoch=30)

# # Fitting to the training set
# history=regressor.fit(X_train,y_train,epochs=30,batch_size=32) # history.history.keys() shows dict_keys(['loss'])
# # vars(history) shows what 'history' class has to offer


In [None]:
# Now to get the test set ready in a similar way as the training set.
# The following has been done so the first 60 entires of test set have 60 previous values which is impossible to get unless we take the whole 
# 'High' attribute data for processing
dataset_total = pd.concat((dataset["High"][:'2016'],dataset["High"]['2017':]),axis=0)
inputs = dataset_total[len(dataset_total)-len(test_set) - 60:].values # 251+60 =311
# print(len(dataset_total[len(dataset_total)-len(test_set) - 60:].values)) # =311
# len(test_set) # = 251
# len(dataset_total) # = 3020

#shape into one column as indicated by '1', but the row number has to be compatible with the original list
inputs = inputs.reshape(-1,1); #(-1, ) simply means that it is an unknown dimension and we want numpy to figure it out.
# print(inputs) #gives ONE column of numbers
inputs  = sc.transform(inputs)
# print(inputs.shape) # (311, 1)
# print(len(test_set)) # =251

In [None]:
def test_prep(lookback,data):
    # Preparing X_test and predicting the prices
    X_test = [data[i-lookback:i,0] for i in range(lookback,lookback+len(test_set))]
    X_test = np.array(X_test); X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))
#     print(X_test.shape) # (251, 60, 1)
    return X_test

X_test=test_prep(60,inputs) # (251, 60, 1)

predicted_stock_price = regressor.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)

# Visualizing the results for LSTM
plot_predictions(test_set,predicted_stock_price)

In [None]:
# Evaluating our model
return_rmse(test_set,predicted_stock_price)

In [None]:
print(LSTM.units)