In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
from pandas import DataFrame
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM,Dense,Dropout
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
from math import sqrt

In [None]:
df = pd.read_csv('../input/nyse/prices-split-adjusted.csv')
df2 = df[df.symbol == 'NFLX']
df2.drop(['symbol'],1,inplace = True)
df2.set_index(['date'],inplace = True) # convert the date into index
df2

**Normalize Data**

In [None]:
scaler = MinMaxScaler(feature_range = (0,1))
sc_X = scaler.fit_transform(df2.values)
print(sc_X)

In [None]:
seq_len = 7 # use the previous 7 days' value to predict the 8th day's close value
def dataset(data, seq_len):
    X, y = [], []
    for i in range(len(data) - seq_len):
        lis = data[i:(i + seq_len), 0]
        X.append(lis)
        y.append(data[i + seq_len, 0])
    return np.array(X), np.array(y)

**Create training and test sets**

In [None]:
train_size = int(len(sc_X)*0.8)
test_size = len(sc_X) - train_size

train,test = sc_X[0:train_size,:], sc_X[train_size:len(sc_X),:]
train_X, train_y = dataset(train, seq_len)
test_X, test_y = dataset(test, seq_len)

train_X = np.reshape(train_X, (train_X.shape[0], train_X.shape[1], 1))
test_X = np.reshape(test_X, (test_X.shape[0], test_X.shape[1], 1))
print(train_y)

In [None]:
train_X.shape

**Build the model**

In [None]:

model = Sequential () 

model.add(LSTM(256, input_shape=(seq_len,1), return_sequences=True))
model.add(LSTM(128,return_sequences=False))
model.add(Dense(1, activation = 'linear')) 
model.compile(loss='mse',optimizer='adam', metrics=['mae'])
history = model.fit(train_X,train_y,batch_size=512, epochs=90,verbose=1)
#score = model.evaluate(test_X,test_y, batch_size = 64, verbose = 1)

In [None]:
model.summary()

**Make prediction and denormalize the predicted values**

In [None]:
scaler2 = MinMaxScaler()
close = df2['close'].values.reshape(-1,1)
close_denorm = scaler2.fit_transform(close)

test_pred_y = model.predict(test_X) # 346,1
test_pred_y = scaler2.inverse_transform(test_pred_y.reshape(-1,1))

test_y_denorm = scaler2.inverse_transform(test_y.reshape(-1,1))


In [None]:
plt.plot(test_pred_y,color='red', label='Prediction')
plt.plot(test_y_denorm,color='blue', label='Actual')
plt.legend(loc='best')
plt.show()

**Loss graph**

In [None]:
plt.plot(history.history['loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
import math
def model_score(model, X_train, y_train, X_test, y_test):
    trainScore = model.evaluate(X_train, y_train, verbose=0)
    print('Train Score: %.5f MSE (%.2f RMSE)' % (trainScore[0], math.sqrt(trainScore[0])))
    testScore = model.evaluate(X_test, y_test, verbose=0)
    print('Test Score: %.5f MSE (%.2f RMSE)' % (testScore[0], math.sqrt(testScore[0])))
    return trainScore[0], testScore[0]

model_score(model, train_X, train_y , test_X, test_y)

In [None]:
model = Sequential () 

model.add(LSTM(256, input_shape=(seq_len,1), return_sequences=True))
model.add(LSTM(128,return_sequences=False))
model.add(Dense(1, activation = 'relu'))
model.compile(loss='mse',optimizer='adam', metrics=['mae'])
history = model.fit(train_X,train_y,batch_size=512, epochs=90,verbose=1)

In [None]:
model = Sequential () 

model.add(LSTM(256, input_shape=(seq_len,1), return_sequences=True))
model.add(LSTM(128,return_sequences=False))
model.add(Dense(1, activation = 'relu'))
model.compile(loss='mse',optimizer='adam', metrics=['mae'])
history = model.fit(train_X,train_y,batch_size=512, epochs=90,verbose=1)

**Optimizer**

In [None]:
time_stamp = 1 # predict days
seq_len = [5,10,22] # backtracking num of days
lstm_layers = [1,2,3]
dense_layers = [1,2,3]
node_num = [64,128,256]# number of nodes each layer
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
for md in mem_days:
    for ll in lstm_layers:
        for dl in dense_layers:
            for nn in node_num:
                filepath = './models/{val_mape:.2f}_{epoch:02d}_'+f'mem_{md}_lstm{ll}_dense{dl}_node{nn}'
                checkpoint = ModelCheckpoint(
                    filepath = filepath,
                    save_weights_only=False,
                    monitor='val_mape',
                    mode='min',
                    save_best_only=True)
                
                X,y,X_latest = dataset(df2, md, time_stßamp)
                X_train, X_test, y_train, y_test = train_test_split(X,y,shuffle = False, test_size = 0.1)
                
                model = Sequential()
                model.add(LSTM(nn, input_shape = X.shape[1:], activation = 'relu', return_sequences = True))
                model.add(Dropout(0.1)) # avoid overfitting
                
                for i in range(ll):
                    model.add(LSTM(nn, activation = 'relu', return_sequences = True))
                    model.add(Dropout(0.1))
                
                model.add(LSTM(nn, activation = 'relu', return_sequences = True))
                model.add(Dropout(0.1))
                
                for i in range(dl):
                    model.add(Dense(nn, activation = 'relu')) # 32
                    model.add(Dropout(0.1))

                model.add(Dense(1)) # output layer
                                                 # cuz linear regression
                model.compile(optimizer = 'adam', loss= 'mse', metrics = ['mape'])
                
                model.fit(X_train, y_train,batch_size = 32, epochs=50, validation_data=(X_test,y_test),callbacks=[checkpoint]) 