# **LSTM/RNN Neural Network Framework**

In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import math
from sklearn.metrics import accuracy_score

In [None]:
df = pd.read_excel('S&P 500 .xlsx')

In [None]:

df

In [None]:
df.describe ().round(4)

In [None]:
df.tail(10)

In [None]:
df.info()

In [None]:
plt.figure(figsize=(16,8))
plt.title('Return on the S&P 500 index')
plt.plot(df['Return on the S&P 500 Index'], color = 'purple')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Daily return USD ($)', fontsize=18)

In [None]:
plt.figure(figsize=(16,8))
plt.title('S&P 500 index close price')
plt.plot(df['Level of the S&P 500 Index'], color = 'blue')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)

In [None]:
data= df[['Value-Weighted Return-incl. dividends','Value-Weighted Return-excl. dividends','Equal-Weighted Return-incl. dividends','Equal-Weighted Return-excl. dividends','Return on the S&P 500 Index']]
#target = df['Return on the S&P 500 Index']

In [None]:
data

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
import sklearn.metrics as sm
from sklearn.preprocessing import MinMaxScaler
# kf = KFold(n_splits=5, shuffle=True, random_state=42)



In [None]:
def get_data():

  scaler = MinMaxScaler(feature_range=(0,1))
  scaled_data = scaler.fit_transform(data)
  print(scaled_data,scaled_data.shape)

  dataset = data.values
# Get the number of rows to train the model on
  training_data_len = int(np.ceil( len(dataset) * .8 ))

  print(training_data_len)
  # Create the training data set 
  # Create the scaled training data set
  train_data = scaled_data[0:int(training_data_len), :]
  #target_data = target[0:int(training_data_len)]
  # Split the data into x_train and y_train data sets
  x_train = []
  y_train = []

  for i in range(30, len(train_data)):
      x_train.append(train_data[i-30:i,:])
      y_train.append(train_data[i,-1])
      if i <= 31:
          # print(x_train)
          # print(y_train)
          print()
          
  # Convert the x_train and y_train to numpy arrays 
  x_train, y_train = np.array(x_train), np.array(y_train)

  # Reshape the data
  x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], x_train.shape[2]))
  print(x_train.shape,y_train.shape)

  # Create the testing data set
# Create a new array containing scaled values from index 1543 to 2002 
  test_data = scaled_data[training_data_len - 30: , :]
  # Create the data sets x_test and y_test
  x_test = []
  y_test = scaled_data[training_data_len:,-1]
  for i in range(30, len(test_data)):
      x_test.append(test_data[i-30:i, :])
      
  # Convert the data to a numpy array
  x_test = np.array(x_test)

  # Reshape the data
  # x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))

  print(x_test.shape,y_test.shape)
  return x_train,y_train,x_test,y_test,scaler,scaled_data,training_data_len

In [None]:
# Create a new dataframe with only the 'Close column 

# Convert the dataframe to a numpy array
x_train,y_train,x_test,y_test,scaler,scaled_data,training_data_len = get_data()

In [None]:
def get_predictions(model):
    # Get the models predicted price values 
    predictions = model.predict(x_test)
    print(model.evaluate(x_test,y_test))
    tmp = scaled_data[training_data_len: , :]
    print(tmp.shape,predictions.shape)
    
    tmp[:,-1] = predictions.reshape(tmp.shape[0])
    tmpss = scaler.inverse_transform(tmp)
    print(tmpss.shape)

    # Get the root mean squared error (RMSE)
    # rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))
    # print("rmse", rmse)
    return tmpss[:,-1]
    
def draw(pred,training_data_len):
  # Plot the data
  train = df[:training_data_len]
  valid = df[training_data_len:]
  valid['Predictions'] = pred

  # Visualize the data
  plt.figure(figsize=(18,8))
  plt.title('Model Prediction')
  plt.xlabel('Date', fontsize=18)
  plt.ylabel('Close Price USD ($)', fontsize=18)
  plt.plot(train['Return on the S&P 500 Index'])
  plt.plot(valid[['Return on the S&P 500 Index', 'Predictions']])
  plt.legend(['Train', 'Val', 'Predictions'], loc='best')
  plt.show()

**LinearRegression**

In [None]:
model = LinearRegression()
model.fit(x_train.reshape((x_train.shape[0],x_train.shape[1]*x_train.shape[2])), y_train)

predicts = model.predict(x_test.reshape((x_test.shape[0],x_test.shape[1]*x_test.shape[2])))
print(sm.mean_squared_error(y_test,predicts))
tmp = scaled_data[training_data_len: , :]
tmp[:,-1] = predicts.reshape(tmp.shape[0])
tmpss = scaler.inverse_transform(tmp)
predicts = tmpss[:,-1]
draw(predicts,training_data_len)

****LSTM****

In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM

# Build the LSTM model
lstm_model = Sequential()
lstm_model.add(LSTM(65, return_sequences=True, input_shape= (x_train.shape[1:])))
lstm_model.add(LSTM(32, return_sequences=False))
lstm_model.add(Dense(16))
lstm_model.add(Dense(1))

lstm_model.summary()

In [None]:
lstm_model.compile(optimizer='adam', loss='mean_squared_error')

x_train,y_train,x_test,y_test,scaler,scaled_data,training_data_len = get_data()
# Train the model
print("Model training ...")
lstm_model.fit(x_train, y_train, batch_size=16, epochs=5)
print("Finished!")

In [None]:
lstm_pred = get_predictions(lstm_model)

In [None]:
draw(lstm_pred,training_data_len)

**RNN**

In [None]:
from keras.layers import Dense, GRU
rnn_model = Sequential()
rnn_model.add(GRU(64, return_sequences=True, input_shape= (x_train.shape[1:])))
rnn_model.add(GRU(32, return_sequences=False))
rnn_model.add(Dense(16))
rnn_model.add(Dense(1))
rnn_model.summary()

In [None]:
rnn_model.compile(optimizer='adam', loss='mean_squared_error')

x_train,y_train,x_test,y_test,scaler,scaled_data,training_data_len = get_data()
# Train the model
print("Model training ...")
rnn_model.fit(x_train, y_train, batch_size=16, epochs=5)
print("Finished!")

In [None]:
rnn_pred = get_predictions(rnn_model)
draw(rnn_pred,training_data_len)