In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [2]:
pd.set_option('display.max_columns', None) 
pd.set_option('display.max_rows', None)
np.set_printoptions(threshold = np.inf)

### 1) Loading data

In [3]:
train_data_path = os.getcwd() + "/data/google_stock_price_Train.csv"

In [4]:
raw_data_train = pd.read_csv(train_data_path)
data_train = raw_data_train.iloc[:,1:2].values #dataframe values to numpy array
data_train
print(data_train.shape)

(1258, 1)


### 2) Feature Scaling using Normalization

In [5]:
from sklearn.preprocessing import MinMaxScaler

In [6]:
sc = MinMaxScaler(feature_range=(0, 1))# create an object of class MinMaxScaler

sc.fit(data_train)
print("max of observed values:", sc.data_max_[0])
print("min of observed values:", sc.data_min_[0])


max of observed values: 816.68
min of observed values: 279.12


In [7]:
data_train_scaled = sc.fit_transform(data_train)

In [8]:
data_train_scaled[:6]

array([[0.08581368],
       [0.09701243],
       [0.09433366],
       [0.09156187],
       [0.07984225],
       [0.0643277 ]])

### 3) Data structure with 60 timesteps and 1 output

#### i. RNN learns from the 60 previous stock price timesteps to find the patterns / trends and based on it's understanding, rnn predicts to predict the next output i.e stock price at time t+1

In [9]:
X_train = [] # 60 previous stock prices for each observation for each financial day
y_train = [] # next stock price

In [10]:
for i in range(60, 1258):
    X_train.append(data_train_scaled[i-60:i,0])
    y_train.append(data_train_scaled[i,0])
    
X_train, y_train = np.array(X_train), np.array(y_train)

In [11]:
print("Training data", X_train[:3])
print("Test data", y_train[:3])
print(X_train.shape)
print(y_train.shape)

Training data [[0.08581368 0.09701243 0.09433366 0.09156187 0.07984225 0.0643277
  0.0585423  0.06568569 0.06109085 0.06639259 0.0614257  0.07474514
  0.02797827 0.02379269 0.02409033 0.0159238  0.01078949 0.00967334
  0.01642607 0.02100231 0.02280676 0.02273235 0.02810849 0.03212665
  0.0433812  0.04475779 0.04790163 0.0440695  0.04648783 0.04745517
  0.04873875 0.03936305 0.04137213 0.04034898 0.04784582 0.04325099
  0.04356723 0.04286033 0.04602277 0.05398467 0.05738894 0.05714711
  0.05569611 0.04421832 0.04514845 0.04605997 0.04412531 0.03675869
  0.04486941 0.05065481 0.05214302 0.05612397 0.05818885 0.06540665
  0.06882953 0.07243843 0.07993526 0.07846566 0.08034452 0.08497656]
 [0.09701243 0.09433366 0.09156187 0.07984225 0.0643277  0.0585423
  0.06568569 0.06109085 0.06639259 0.0614257  0.07474514 0.02797827
  0.02379269 0.02409033 0.0159238  0.01078949 0.00967334 0.01642607
  0.02100231 0.02280676 0.02273235 0.02810849 0.03212665 0.0433812
  0.04475779 0.04790163 0.0440695  0

In [13]:
# 2D to 3D vector ==> (no of entries, no of timesteps, no of indicators/predictors)

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1)) 
print(X_train.shape)

(1198, 60, 1)
