## Imports

In [15]:
#  IMPORTING LIBRARIES
import datetime as dt
import numpy as np
import pandas as pd
import os
import warnings
warnings.filterwarnings('ignore')

import plotly
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

#  Applying settings to plotly.offline for visualization
init_notebook_mode(connected=True)

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler
import sklearn.metrics

import sklearn
from sklearn import preprocessing

from statsmodels.tools.eval_measures import rmse
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout

from keras.preprocessing.sequence import TimeseriesGenerator

## Import Data

In [16]:
#  Example import, needs to be updated accordingly...
df = pd.read_csv("MockData.csv")

## Functions

In [17]:
def calculate_Pi(df):
    """Calculates the prediction result for a series, 1=true and 0=false"""
    
    result = (df['Expected_Output'].shift(1) - df['Expected_Output'])*(df['Forecast'].shift(1) - df['Forecast'])
    result.where(result <= 0, 1, inplace=True) # .where() returns if false.. hence reversed.
    result.where(result > 0, 0, inplace=True)
    return result

def calculate_hit_ratio(Pi):
    """Calculates the hit ratio for the prediction series Pi."""
    
    result = (Pi.sum()/ len(Pi))
    return result

def calculate_rmse(prediction, labels):
    """Function that calculates the Root Mean Square Error of predctions and labels"""
    
    rmse = np.sqrt(np.mean(prediction - labels) **2)
    return rmse

def calculate_r_squared(prediction, labels):
    """Function that calculates the R-squared error of predctions and labels"""
    
    correlation_matrix = np.corrcoef(prediction, labels)
    correlation_xy = correlation_matrix[0,1]
    r_squared = correlation_xy**2
    #r_squared = sklearn.metrics.r2_score(labels, prediction)
    return r_squared

## Prediction Models
### Linear Regression Model

In [18]:
def calculate_lrm_stats(_days, _df):
    """Function that calculates Hit Ratio, RMSE, R-squared values for forecasted 'days' forward"""
    
    forecast_days = _days
    _df['Label'] = _df['Close'].shift(-forecast_days)
    data = _df
    data.dropna(how="any", inplace=True)
    X = np.array(data.drop(['Label'],1))
    y = np.array(data['Label'])
    
    tss = TimeSeriesSplit(n_splits=5)
    for train_index, test_index in tss.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
    l = LinearRegression()
    l.fit(X_train, y_train.reshape(len(y_train)))
    
    forecast = l.predict(X_test)
    n = len(forecast)
    df_b_r = pd.DataFrame({
        'DateTime':data.index[-n:],
        'Forecast':forecast,
        'Expected_Output':y_test})
    df_b_r.set_index('DateTime', inplace=True)
    
    Pi = calculate_Pi(df_b_r)
    hit_r = calculate_hit_ratio(Pi)
    rmse = calculate_rmse(df_b_r.Forecast, df_b_r.Expected_Output)
    train_r2 = l.score(X_train, y_train)
    test_r2 = l.score(X_test, y_test)
    
    return (forecast_days, hit_r, rmse, train_r2, test_r2, df_b_r, Pi)

### LSTM Model

In [19]:
def calculate_lstm_stats(_days, _df, _epochs, _n_features):
    """Function that calculates LSTM Model statistics"""
    
    _batch_size = 16
    forecast_days = _days
    _df['Label'] = _df['Close'].shift(-forecast_days)
    data = _df
    data.dropna(how="any", inplace=True)
    X = np.array(data.drop(['Label'],1))
    y = np.array(data['Label'])
    
    tss = TimeSeriesSplit(n_splits=5)
    for train_index, test_index in tss.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
    n_input = 14
    n_features = _n_features
    
    generator_train = TimeseriesGenerator(X_train, y_train, length = n_input, batch_size=_batch_size)
    generator_test = TimeseriesGenerator(X_test, y_test, length = n_input, batch_size=_batch_size)
    
    model = Sequential()
    model.add(LSTM(120, activation='tanh', input_shape=(n_input, n_features)))
    model.add(Dropout(0.15)) # prevents overfitting
    model.add(Dense(50))
    model.add(Dense(40))
    model.add(Dense(30))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    model.fit_generator(generator_train, epochs=_epochs)
    
    lstm_prediction_test = model.predict(generator_test)
    lstm_prediction_train = model.predict(generator_train)
    
    n = len(lstm_prediction_test)
    m = len(lstm_prediction_train)
    
    df_lstm = pd.DataFrame(
        {'DateTime':data.index[-n:],
         'Forecast':lstm_prediction_test.reshape(n,), 
         'Expected_Output':y_test[-n:]})
    df_lstm.set_index('DateTime', inplace=True)
    
    Pi = calculate_Pi(df_lstm)
    hit_r = calculate_hit_ratio(Pi)
    rmse = calculate_rmse(lstm_prediction_test, y_test[-n:])
    train_r2 = calculate_r_squared(lstm_prediction_train.reshape(m,), y_train[-m:])
    test_r2 = calculate_r_squared(lstm_prediction_test.reshape(n,), y_test[-n:])
    
    return (forecast_days, hit_r, rmse, train_r2, test_r2, df_lstm, Pi)

## Decision Models

## Implementation

In [20]:
df = df.set_index('DateTime')
df_lstm = df
df_lrm = df

In [21]:
#  Linear Regression Model, forecast 1 day forward
LRM_1 = calculate_lrm_stats(1, df_lrm)

In [22]:
LRM_1[-2]

Unnamed: 0_level_0,Forecast,Expected_Output
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-05-07,0.347359,0.331828
2018-05-08,0.332759,0.327243
2018-05-09,0.328671,0.342300
2018-05-10,0.343218,0.348099
2018-05-11,0.348874,0.350998
...,...,...
2019-12-25,0.157710,0.159700
2019-12-26,0.158769,0.175454
2019-12-27,0.174245,0.177701
2019-12-29,0.176497,0.182196


In [None]:
#  LSTM Model, forecast 1 day forward
LSTM_1 = calculate_lstm_stats(_days=1, _df=df_lstm, _epochs=5, _n_features=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
LSTM_1[-2]