### Import Libraries

In [78]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Load the Data

In [79]:
data_set_train = pd.read_csv('/Users/kyotun/Desktop/ML/stock-price/csv/Google_Stock_Price_Train.csv')
data_set_train

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,1/3/2012,325.25,332.83,324.97,663.59,7380500
1,1/4/2012,331.27,333.87,329.08,666.45,5749400
2,1/5/2012,329.83,330.75,326.89,657.21,6590300
3,1/6/2012,328.34,328.77,323.68,648.24,5405900
4,1/9/2012,322.04,322.29,309.46,620.76,11688800
...,...,...,...,...,...,...
1253,12/23/2016,790.90,792.74,787.28,789.91,623400
1254,12/27/2016,790.68,797.86,787.66,791.55,789100
1255,12/28/2016,793.70,794.23,783.20,785.05,1153800
1256,12/29/2016,783.33,785.93,778.92,782.79,744300


In [80]:
training_set = data_set_train.iloc[:,1:2]
training_set

Unnamed: 0,Open
0,325.25
1,331.27
2,329.83
3,328.34
4,322.04
...,...
1253,790.90
1254,790.68
1255,793.70
1256,783.33


### Feature Scaling 

In [81]:
#There is 2 feature scaling type
#Normalization
#Standardization
#We'll use Normalization for this example
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))

In [82]:
scaled_train_set = scaler.fit_transform(training_set)
scaled_train_set

array([[8.581e-02],
       [9.701e-02],
       [9.433e-02],
       [9.156e-02],
       [7.984e-02],
       [6.433e-02],
       [5.854e-02],
       [6.569e-02],
       [6.109e-02],
       [6.639e-02],
       [6.143e-02],
       [7.475e-02],
       [2.798e-02],
       [2.379e-02],
       [2.409e-02],
       [1.592e-02],
       [1.079e-02],
       [9.673e-03],
       [1.643e-02],
       [2.100e-02],
       [2.281e-02],
       [2.273e-02],
       [2.811e-02],
       [3.213e-02],
       [4.338e-02],
       [4.476e-02],
       [4.790e-02],
       [4.407e-02],
       [4.649e-02],
       [4.746e-02],
       [4.874e-02],
       [3.936e-02],
       [4.137e-02],
       [4.035e-02],
       [4.785e-02],
       [4.325e-02],
       [4.357e-02],
       [4.286e-02],
       [4.602e-02],
       [5.398e-02],
       [5.739e-02],
       [5.715e-02],
       [5.570e-02],
       [4.422e-02],
       [4.515e-02],
       [4.606e-02],
       [4.413e-02],
       [3.676e-02],
       [4.487e-02],
       [5.065e-02],


### Create A Data Structure

In [83]:
X_train = []
y_train = []
size_of_box = 60
size_of_data = len(scaled_train_set)
np.set_printoptions(precision=3, threshold=np.inf)

In [84]:
for i in range(size_of_box, size_of_data):
    X_train.append(scaled_train_set[i-size_of_box:i,0])
    y_train.append(scaled_train_set[i,0])
#X_train contains first 60 days
#y_train contains the value of last day

In [85]:
X_train, y_train = np.array(X_train), np.array(y_train)

### Reshaping

In [86]:
#Use reshape to add new dimension
#New dimension = new indicator, therefore prediction could be optimized
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

### Create the RNN

In [87]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

In [88]:
#We're predicting continuous data, therefore -> Regression
regressor = Sequential()
hidden_layer_size = 50

In [90]:
# Adding the first/initial layer
regressor.add(LSTM(hidden_layer_size, return_sequences = True, input_shape = (X_train.shape[1], 1)))

# %20 of the neurons of the LSTM layer will be ignored 
# during the training in forward and back propagation
regressor.add(Dropout(0.2))

In [91]:
# Adding more layer
# don't need to specify input_shape anymore
# cause it's already prespecified in first hidden layer
regressor.add(LSTM(hidden_layer_size, return_sequences = True))
regressor.add(Dropout(0.2))

In [92]:
regressor.add(LSTM(hidden_layer_size, return_sequences = True))
regressor.add(Dropout(0.2))

In [93]:
#After this layer comes the output layer, so we don't have any return_sequence
regressor.add(LSTM(hidden_layer_size, return_sequences = False))
regressor.add(Dropout(0.2))

In [94]:
#Output_layer -> Stock price
regressor.add(Dense(units=1))

In [95]:
#Compile the RNN, optimizer and loss func.
#Adam or RMSprop
regressor.compile(optimizer='adam', loss = 'mean_squared_error')