# Part1 - Data Preprocessing

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

Importing the training set

In [2]:
#獲取當前路徑
import os
os.chdir('/Users/shiliu/Downloads')
os.getcwd()

dataset_train = pd.read_csv('Google_Stock_Price_Train.csv')
training_set = dataset_train.iloc[:, 1:2].values
# '.values' means numpy array
dataset_train
pd.DataFrame(training_set)

Unnamed: 0,0
0,325.25
1,331.27
2,329.83
3,328.34
4,322.04
5,313.70
6,310.59
7,314.43
8,311.96
9,314.81


Feature Scaling

In [3]:
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0, 1))
training_set_scaled = sc.fit_transform(training_set)

Creating a data structure with timesteps and 1 output

In [4]:
X_train = []
# 60 previous stock prices
y_train = []
for i in range(60, 1258):
    X_train.append(training_set_scaled[i-60:i, 0]) 
    # get the 60 previous stock prices
    y_train.append(training_set_scaled[i, 0])
X_train, y_train = np.array(X_train),  np.array(y_train)

Reshaping

In [5]:
X_train = np.reshape(X_train, (X_train.shape[0],  
                               X_train.shape[1],
                               1))
# 3 dimensions
X_train

array([[[0.08581368],
        [0.09701243],
        [0.09433366],
        ...,
        [0.07846566],
        [0.08034452],
        [0.08497656]],

       [[0.09701243],
        [0.09433366],
        [0.09156187],
        ...,
        [0.08034452],
        [0.08497656],
        [0.08627874]],

       [[0.09433366],
        [0.09156187],
        [0.07984225],
        ...,
        [0.08497656],
        [0.08627874],
        [0.08471612]],

       ...,

       [[0.92106928],
        [0.92438053],
        [0.93048218],
        ...,
        [0.95475854],
        [0.95204256],
        [0.95163331]],

       [[0.92438053],
        [0.93048218],
        [0.9299055 ],
        ...,
        [0.95204256],
        [0.95163331],
        [0.95725128]],

       [[0.93048218],
        [0.9299055 ],
        [0.93113327],
        ...,
        [0.95163331],
        [0.95725128],
        [0.93796041]]])

# Part 2 - Building the RNN

In [6]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

Using TensorFlow backend.


Initialising the RNN

In [7]:
regressor = Sequential()

Adding the first LSTM layer and also some Dropout regularisation

In [8]:
regressor.add(LSTM(units = 50, 
                   return_sequences = True, 
                   input_shape = (X_train.shape[1], 1)))
regressor.add(Dropout(0.2))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Adding the second LSTM layer and also some Dropout regularisation

In [None]:
regressor.add(LSTM(units = 50, 
                   return_sequences = True))
regressor.add(Dropout(0.2))

Adding the third LSTM layer and also some Dropout regularisation

In [None]:
regressor.add(LSTM(units = 50, 
                   return_sequences = True))
regressor.add(Dropout(0.2))

Adding the fourth LSTM layer and also some Dropout regularisation

In [None]:
regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.2))

Adding the output layer

In [None]:
regressor.add(Dense(units = 1))

Compiling the RNN

In [None]:
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')

Fitting the RNN to the training set

In [None]:
regressor.fit(X_train, y_train, epochs = 100, batch_size = 32)

# Part 3 - Making the predictions and visualising the results

Getting the real stock price of 2017

In [None]:
#獲取當前路徑
import os
os.chdir('/Users/shiliu/Downloads')
os.getcwd()

dataset_test = pd.read_csv('Google_Stock_Price_Test.csv')
real_stock_price = dataset_test.iloc[:, 1:2].values
real_stock_price

Getting the predicted sotck price of 2017

In [None]:
dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']),
                          axis = 0)
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
inputs = inputs.reshape(-1, 1)
inputs = sc.transform(inputs)
# ---- Creating a data structure with timesteps and 1 output ---- #
X_test = []
# 60 previous stock prices
for i in range(60, 80):
    X_test.append(inputs[i-60:i, 0]) 
    # get the 60 previous stock prices
X_test = np.array(X_test)
# ---- Reshape ---- #
X_test = np.reshape(X_test, (X_test.shape[0],  
                             X_test.shape[1],
                             1))

predicted_stock_price = regressor.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)
pd.DataFrame(predicted_stock_price)

Visualising the results

In [None]:
plt.plot(real_stock_price, color = 'red', 
         label = 'Real Google Stock Price')
plt.plot(predicted_stock_price, color = 'blue',
         label = 'Predicted Google Stock Price')
plt.title('Google Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Google Stock Price')
plt.legend()
plt.show()