# For this workshop we will be using Keras to quickly prototype a working stock prediction model!

In [None]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.recurrent import LSTM
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from io import StringIO
import requests
import numpy as np
import pandas as pd
import time, math

np.set_printoptions(precision=4)

ticker = 'AAPL'

r = requests.get("https://finance.google.com/finance/historical?q=" + ticker + "&startdate=01-Jan-2008&output=csv")
stock = pd.read_csv(StringIO(r.text))

stock.head()

# The Date Feature in time series serves as indexing

## For style purposes the column we are trying to predict is always the last one(s)

### Although here we are only predict closing price, recurrent neural networks are capable of predicting all the features in the dataset

* unlike a traditional classification problem, the target column we are trying to predict is located within the training set
* In the LSTM model, we are trying to use previous input sequences to try and predict future output sequences

In [None]:
stock.drop('Date', axis=1, inplace=True)

cols = stock.columns.tolist()
cols = cols[-1:] + cols[:-1]
stock = stock[cols]

stock.head()

# When using any algorithm that uses an activation function, your data must be normalized to values within the activation

## For using the ReLu function, you use min-max scaler (values between 0 and 1)

## For using the Tanh function, you use StandardScaler (values between -1 and 1)

In [None]:
# normalizaing data
scale = MinMaxScaler(feature_range=(0,1)) # or StandardScaler
#scale = StandardScaler()
price = MinMaxScaler(feature_range=(0,1))
price.fit(stock['Close'].reshape(-1,1))
stock = pd.DataFrame(scale.fit_transform(stock), columns=['Volume', 
                                                          'Open', 
                                                          'High', 
                                                          'Low', 
                                                          'Close'])

# Representing our data as a sequential model

## If we have data like

Volume | Open | High | Low | Close
--- | --- | --- | --- | ---
0.3106 | 0.1019 | 0.1004 | 0.0978 | 0.0991
0.2393 | 0.0987 | 0.0979 | 0.0979 | 0.0992
0.4237 | 0.0953 | 0.0942 | 0.0861 | 0.0866
0.3547 | 0.0436 | 0.0416 | 0.0388 | 0.0397

## suppose we choose to learn in sequences of 2 then our data will be represented like this

[0.3106 | 0.1019 | 0.1004 | 0.0978 | 0.0991

0.2393 | 0.0987 | 0.0979 | 0.0979 | 0.0992],

[0.2393 | 0.0987 | 0.0979 | 0.0979 | 0.0992

0.4237 | 0.0953 | 0.0942 | 0.0861 | 0.0866],

[0.4237 | 0.0953 | 0.0942 | 0.0861 | 0.0866

0.3547 | 0.0436 | 0.0416 | 0.0388 | 0.0397]

we'll create a new object which is 3x2x5, [amount_of_sequences, sequence_length, amount_of_features]


In [None]:
scalers = {}
prices = {}

def load_data(stock, seq_len, split):
    amount_of_features = len(stock.columns)
    data = stock.as_matrix()
    sequence_length = seq_len + 1
    result = []
    
    for index in range(len(data) - sequence_length):
        result.append(data[index: index + sequence_length])
    
    result = np.array(result)
    row = len(result) * split
    train = result[:int(row), :]
    x_train = train[:, :-1]
    y_train = train[:, -1][:,-1]
    x_test = result[int(row):, :-1]
    y_test = result[int(row):, -1][:,-1]
    
    
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], amount_of_features))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], amount_of_features))  
    
    return [x_train, y_train, x_test, y_test]

# What is a Recurrent Neural Network?

![alt text](recurrent_cell.PNG "Title")

Unlike regular feed foward networks, the output of a layer in a recurrent neural network feeds back in on itself

![alt text](recurrent_network.PNG "Title")

The problem with recurrent neural networks however is that the weight update is a function that grows exponential, meaning that updating weights suffer from something known as vanishing gradient

![alt text](vanashing_RNN.PNG "Title")

Popularity of Recurrent Neural Networks has been resugring thanks to the invention of this new architecture

![alt text](LSTM_cell.PNG "Title")

LSTM solves the issue of vanishing gradients by making linear changes to the output C_t, while keeping the change within the hidden layer

In [None]:
def build_model(layers):
    model = Sequential()

    for x in range(0,1):
        model.add(LSTM(input_dim=layers[0], output_dim=layers[1], return_sequences=True))
        model.add(Dropout(0.2))

    model.add(LSTM(layers[2], return_sequences=False)) 
    model.add(Dropout(0.2))

    model.add(Dense(output_dim=layers[2]))
    model.add(Activation("relu"))

    start = time.time()
    model.compile(loss="mse", optimizer="rmsprop",metrics=['accuracy'])
    print("Compilation Time : ", time.time() - start)
    return model

In [None]:
model = build_model([5, window, 1])

In [None]:
trainScore = model.evaluate(X_train, y_train, verbose=0)
print('Train Score: %.2f MSE (%.2f RMSE)' % (trainScore[0], math.sqrt(trainScore[0])))

testScore = model.evaluate(X_test, y_test, verbose=0)
print('Test Score: %.2f MSE (%.2f RMSE)' % (testScore[0], math.sqrt(testScore[0])))

In [None]:
pred = model.predict(X_test)

In [None]:
plt.figure(figsize=(15,4), dpi=100)

plt.plot(pred, color='red', label='predicted price')
plt.plot(y_test, color='black', label='read price')


plt.xlabel('number of days where 01-Jan-2008 is 0 ')
plt.ylabel('price per stock')
plt.legend(loc='upper left')
plt.show()