In [26]:
import pandas as pd
import numpy as np

from sklearn import preprocessing   # Scaling

from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae

from statsmodels.tsa.stattools import pacf

## Neural Network

from keras.models import Sequential    # For linear stacking of layers

from keras.optimizers import SGD    # Optimization: sgd stands for stochastic gradient descent

from keras.layers.core import Dense, Activation    # An Activation fuction and a Dense layer for output layer.

from keras.layers.recurrent import SimpleRNN    # A fully connected RNN where the output is to be fed to input layer.

In [6]:
temp = pd.read_csv('google.csv')

In [7]:
temp.head()

Unnamed: 0,date,open,high,low,volume,close,direction
0,03-01-2007,231.494354,236.789917,229.065155,15513200.0,232.28421,0
1,04-01-2007,232.984665,240.411362,232.661758,15877700.0,240.068588,1
2,05-01-2007,239.69104,242.174881,237.510223,13833500.0,242.020889,1
3,08-01-2007,242.269272,243.352234,239.542007,9570600.0,240.227554,0
4,09-01-2007,241.156509,242.54747,239.045242,10832700.0,241.181351,1


In [8]:
temp.tail()

Unnamed: 0,date,open,high,low,volume,close,direction
2825,23-03-2018,1047.030029,1063.359985,1021.219971,2156700.0,1021.570007,0
2826,26-03-2018,1046.0,1055.630005,1008.400024,2665100.0,1053.209961,1
2827,27-03-2018,1063.0,1064.838989,996.919983,3095300.0,1005.099976,0
2828,28-03-2018,998.0,1024.22998,980.640015,3369300.0,1004.559998,0
2829,29-03-2018,1011.630005,1043.0,1002.900024,2702200.0,1031.790039,1


In [9]:
## Dropping Date Column
data = temp.drop(temp.columns[[0]], axis=1)

In [10]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2830 entries, 0 to 2829
Data columns (total 6 columns):
open         2830 non-null float64
high         2830 non-null float64
low          2830 non-null float64
volume       2830 non-null float64
close        2830 non-null float64
direction    2830 non-null int64
dtypes: float64(5), int64(1)
memory usage: 132.7 KB


In [11]:
target = data['close']

In [12]:
x = data.drop(data.columns[[4,5]], axis=1)

x.head()

Unnamed: 0,open,high,low,volume
0,231.494354,236.789917,229.065155,15513200.0
1,232.984665,240.411362,232.661758,15877700.0
2,239.69104,242.174881,237.510223,13833500.0
3,242.269272,243.352234,239.542007,9570600.0
4,241.156509,242.54747,239.045242,10832700.0


# Data Preprocessing

## Transformations

- Step 1: Apply Log transformation
- Step 2: Reshape Array as a 2-D so that we can apply scaler function
- Step 3: Reshape Array to 1-D

In [13]:
## Step 1:
x = x.apply(np.log)

x = pd.concat([x, data['direction']], axis=1)

In [15]:
scaler_x = preprocessing.MinMaxScaler(feature_range=(0,1))    # Rescaling the parameters

scaler_y = preprocessing.MinMaxScaler(feature_range=(0,1))    # Rescaling the parameters

In [16]:
### Reshape

# In order to use fit_transform we need to reshape 'x' and 'y' to 2-D Array

x = np.array(x).reshape((len(x), 5))

x = scaler_x.fit_transform(x)

y = np.array(target).reshape((len(target),1))

y = np.log(y)

y = scaler_y.fit_transform(y)

## Assessing Partial Auto-Correlation

- A Partial Auto-Correlation is the amount of Linear Correlation between an observation x<sub>t</sub> and a lag of itself (say x<sub>t-k</sub> ) that is **not explained by correlations of the obervations in between.**

In [None]:
x_pacf = pacf(pacf1, nlags=6, method='ols')

print(x_pacf)

#### Train- Test Division

In [59]:
end =  len(x) -1 
learn_end = int(end*0.95)

x_train = x[0:learn_end - 1,]

x_test = x[learn_end:end-1, ]

In [65]:
y_train = y[1:learn_end]

y_test = y[learn_end+1:end]

In [70]:
### Reshape for Keras

x_train = x_train.reshape(x_train.shape + (1,))

x_test = x_test.reshape(x_test.shape + (1,))

### Modeling

In [82]:
### Model Structure

seed = 666
np.random.seed(seed)

fit1 = Sequential()

### Keras 2 API: output_dim is changed to units
fit1.add(SimpleRNN(units =5, activation = "tanh", input_shape = (5,1)))
fit1.add(Dense(units=1, activation = "linear"))

In [83]:
#### Choosing the momentum

sgd = SGD(lr=0.0001, momentum=0.8, nesterov=True)

fit1.compile(loss="mean_squared_error", optimizer=sgd)

### Model Training

In [84]:
fit1.fit(x_train, y_train, batch_size=10, epochs=500)

Instructions for updating:
Use tf.cast instead.
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500

Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 234/500
Epoch 235/500
Epoch 236/500
Epoch 237/500
Epoch 238/500
Epoch 239/500
Epoch 240/500
Epoch 241/500
Epoch 242/500
Epoch 243/500
Epoch 244/500
Epoch 245/500
Epoch 246/500
Epoch 247/500
Epoch 248/500
Epoch 249/500
Epoch 250/500
Epoch 251/500
Epoch 252/500
Epoch 253/500
Epoch 254/500
Epoch 

Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 367/500
Epoch 368/500
Epoch 369/500
Epoch 370/500
Epoch 371/500
Epoch 372/500
Epoch 373/500
Epoch 374/500
Epoch 375/500
Epoch 376/500
Epoch 377/500
Epoch 378/500
Epoch 379/500
Epoch 380/500
Epoch 381/500
Epoch 382/500
Epoch 383/500
Epoch 384/500
Epoch 385/500
Epoch 386/500
Epoch 387/500
Epoch 388/500
Epoch 389/500
Epoch 390/500
Epoch 391/500
Epoch 392/500
Epoch 393/500
Epoch 394/500
Epoch 395/500
Epoch 396/500
Epoch 397/500
Epoch 398/500
Epoch 399/500
Epoch 400/500
Epoch 401/500
Epoch 402/500
Epoch 403/500
Epoch 404/500
Epoch 405/500
Epoch 406/500
Epoch 407/500
Epoch 408/500
Epoch 409/500
Epoch 410/500
Epoch 411/500
Epoch 412/500
Epoch 413/500
Epoch 414/500
Epoch 415/500
Epoch 416/500
Epoch 417/500
Epoch 418/500
Epoch 419/500
Epoch 420/500
Epoch 421/500
Epoch 422/500
Epoch 423/500
Epoch 424/500
Epoch 425/500
Epoch 426/500
Epoch 427/500
Epoch 428/500
Epoch 429/500
Epoch 430/500
Epoch 431/500
Epoch 432/500
Epoch 

<keras.callbacks.History at 0x1fc6117ca90>

In [85]:
score_train = fit1.evaluate(x_train, y_train, batch_size=10)



In [86]:
score_test = fit1.evaluate(x_test, y_test, batch_size=10)



### Prediction

In [90]:
pred1 =  fit1.predict(x_test)

pred1 = scaler_y.inverse_transform(np.array(pred1).reshape((len(pred1),1)))

In [91]:
pred1 = np.exp(pred1)

In [101]:
## Transforming to original value
y_true = scaler_y.inverse_transform(np.array(y_test).reshape((len(y_test),1)))

y_true = np.exp(y_true)

In [106]:
### Model Evaluation
np.sqrt(mse(y_true, pred1))

141.279618336662