### Import Libraries

In [78]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Load the Data

In [79]:
data_set_train = pd.read_csv('/Users/kyotun/Desktop/ML/stock-price/csv/Google_Stock_Price_Train.csv')
data_set_train

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,1/3/2012,325.25,332.83,324.97,663.59,7380500
1,1/4/2012,331.27,333.87,329.08,666.45,5749400
2,1/5/2012,329.83,330.75,326.89,657.21,6590300
3,1/6/2012,328.34,328.77,323.68,648.24,5405900
4,1/9/2012,322.04,322.29,309.46,620.76,11688800
...,...,...,...,...,...,...
1253,12/23/2016,790.90,792.74,787.28,789.91,623400
1254,12/27/2016,790.68,797.86,787.66,791.55,789100
1255,12/28/2016,793.70,794.23,783.20,785.05,1153800
1256,12/29/2016,783.33,785.93,778.92,782.79,744300


In [80]:
training_set = data_set_train.iloc[:,1:2]
training_set

Unnamed: 0,Open
0,325.25
1,331.27
2,329.83
3,328.34
4,322.04
...,...
1253,790.90
1254,790.68
1255,793.70
1256,783.33


### Feature Scaling 

In [81]:
#There is 2 feature scaling type
#Normalization
#Standardization
#We'll use Normalization for this example
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))

In [82]:
scaled_train_set = scaler.fit_transform(training_set)
scaled_train_set

array([[8.581e-02],
       [9.701e-02],
       [9.433e-02],
       [9.156e-02],
       [7.984e-02],
       [6.433e-02],
       [5.854e-02],
       [6.569e-02],
       [6.109e-02],
       [6.639e-02],
       [6.143e-02],
       [7.475e-02],
       [2.798e-02],
       [2.379e-02],
       [2.409e-02],
       [1.592e-02],
       [1.079e-02],
       [9.673e-03],
       [1.643e-02],
       [2.100e-02],
       [2.281e-02],
       [2.273e-02],
       [2.811e-02],
       [3.213e-02],
       [4.338e-02],
       [4.476e-02],
       [4.790e-02],
       [4.407e-02],
       [4.649e-02],
       [4.746e-02],
       [4.874e-02],
       [3.936e-02],
       [4.137e-02],
       [4.035e-02],
       [4.785e-02],
       [4.325e-02],
       [4.357e-02],
       [4.286e-02],
       [4.602e-02],
       [5.398e-02],
       [5.739e-02],
       [5.715e-02],
       [5.570e-02],
       [4.422e-02],
       [4.515e-02],
       [4.606e-02],
       [4.413e-02],
       [3.676e-02],
       [4.487e-02],
       [5.065e-02],


### Create A Data Structure

In [83]:
X_train = []
y_train = []
size_of_box = 60
size_of_data = len(scaled_train_set)
np.set_printoptions(precision=3, threshold=np.inf)

In [84]:
for i in range(size_of_box, size_of_data):
    X_train.append(scaled_train_set[i-size_of_box:i,0])
    y_train.append(scaled_train_set[i,0])
#X_train contains first 60 days
#y_train contains the value of last day

In [85]:
X_train, y_train = np.array(X_train), np.array(y_train)

### Reshaping

In [86]:
#Use reshape to add new dimension
#New dimension = new indicator, therefore prediction could be optimized
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

### Create the RNN

In [87]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

In [88]:
#We're predicting continuous data, therefore -> Regression
regressor = Sequential()
hidden_layer_size = 50

In [90]:
# Adding the first/initial layer
regressor.add(LSTM(hidden_layer_size, return_sequences = True, input_shape = (X_train.shape[1], 1)))

# %20 of the neurons of the LSTM layer will be ignored 
# during the training in forward and back propagation
regressor.add(Dropout(0.2))

In [91]:
# Adding more layer
# don't need to specify input_shape anymore
# cause it's already prespecified in first hidden layer
regressor.add(LSTM(hidden_layer_size, return_sequences = True))
regressor.add(Dropout(0.2))

In [92]:
regressor.add(LSTM(hidden_layer_size, return_sequences = True))
regressor.add(Dropout(0.2))

In [93]:
#After this layer comes the output layer, so we don't have any return_sequence
regressor.add(LSTM(hidden_layer_size, return_sequences = False))
regressor.add(Dropout(0.2))

In [94]:
#Output_layer -> Stock price
regressor.add(Dense(units=1))

In [95]:
#Compile the RNN, optimizer and loss func.
#Adam or RMSprop
regressor.compile(optimizer='adam', loss = 'mean_squared_error')

### Fit the Train set to RNN

In [97]:
regressor.fit(X_train, y_train, epochs = 100, batch_size = 32)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x28f4ff8d0>

### Get the Test Data

In [100]:
data_set_test = pd.read_csv('/Users/kyotun/Desktop/ML/stock-price/csv/Google_Stock_Price_Test.csv')
data_set_test

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,1/3/2017,778.81,789.63,775.8,786.14,1657300
1,1/4/2017,788.36,791.34,783.16,786.9,1073000
2,1/5/2017,786.08,794.48,785.02,794.02,1335200
3,1/6/2017,795.26,807.9,792.2,806.15,1640200
4,1/9/2017,806.4,809.97,802.83,806.65,1272400
5,1/10/2017,807.86,809.13,803.51,804.79,1176800
6,1/11/2017,805.0,808.15,801.37,807.91,1065900
7,1/12/2017,807.14,807.39,799.17,806.36,1353100
8,1/13/2017,807.48,811.22,806.69,807.88,1099200
9,1/17/2017,807.08,807.14,800.37,804.61,1362100


In [101]:
real_stock_price = data_set_test.iloc[:, 1:2].values
real_stock_price
#never scale the test(real) values

array([[778.81],
       [788.36],
       [786.08],
       [795.26],
       [806.4 ],
       [807.86],
       [805.  ],
       [807.14],
       [807.48],
       [807.08],
       [805.81],
       [805.12],
       [806.91],
       [807.25],
       [822.3 ],
       [829.62],
       [837.81],
       [834.71],
       [814.66],
       [796.86]])

### Prepare the Data for Predictions

In [115]:
#concatination of training and test set but we have to be careful
#we will scale the concationation version but we need to keep same the actual test values
#we will scale just the inputs not the actual test values

dataset_total = pd.concat((data_set_train['Open'], data_set_test['Open']), axis = 0) #train + test
dataset_total

0     325.25
1     331.27
2     329.83
3     328.34
4     322.04
       ...  
15    829.62
16    837.81
17    834.71
18    814.66
19    796.86
Name: Open, Length: 1278, dtype: float64

In [120]:
inputs = dataset_total[len(dataset_total) - len(data_set_test) - 60:].values
inputs

array([779.  , 779.66, 777.71, 786.66, 783.76, 781.22, 781.65, 779.8 ,
       787.85, 798.24, 803.3 , 795.  , 804.9 , 816.68, 806.34, 801.  ,
       808.35, 795.47, 782.89, 778.2 , 767.25, 750.66, 774.5 , 783.4 ,
       779.94, 791.17, 756.54, 755.6 , 746.97, 755.2 , 766.92, 771.37,
       762.61, 772.63, 767.73, 764.26, 760.  , 771.53, 770.07, 757.44,
       744.59, 757.71, 764.73, 761.  , 772.48, 780.  , 785.04, 793.9 ,
       797.4 , 797.34, 800.4 , 790.22, 796.76, 795.84, 792.36, 790.9 ,
       790.68, 793.7 , 783.33, 782.75, 778.81, 788.36, 786.08, 795.26,
       806.4 , 807.86, 805.  , 807.14, 807.48, 807.08, 805.81, 805.12,
       806.91, 807.25, 822.3 , 829.62, 837.81, 834.71, 814.66, 796.86])

In [121]:
inputs = inputs.reshape(-1,1) # make the inputs 3D

In [122]:
inputs = scaler.transform(inputs) # scale the inputs



In [123]:
X_test = []
lower_end = size_of_box
higher_end = size_of_box + len(data_set_test)

In [124]:

for i in range(lower_end, higher_end):
    X_test.append(inputs[i-lower_end:i, 0]) #from 0 to inclusive 59, 

In [125]:
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [126]:
predicted_stock_price = regressor.predict(X_test) # regressor model predict the outputs based on inputs 0-59, 1-60, 2-61, ...
predicted_stock_price = scaler.inverse_transform(predicted_stock_price) # inversing the transformed values to see real values



NameError: name 'sc' is not defined