## Project – Creating an automated trading system

#### Christian Karvonen IA-15 & Wilhelm Kinos IA-15

#### Imports:

In [1]:
import datetime as dt
import numpy as np
import pandas as pd
pd.core.common.is_list_like = pd.api.types.is_list_like
from pandas_datareader.data import DataReader
from plotly.offline import init_notebook_mode, iplot
from plotly.graph_objs import *
import matplotlib.pyplot as plt
from collections import defaultdict
from sklearn.linear_model import LinearRegression, ElasticNetCV, Ridge, BayesianRidge, LassoCV, LassoLarsCV
from sklearn.neural_network import MLPRegressor
from sklearn import preprocessing, svm, model_selection
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import confusion_matrix
init_notebook_mode(connected=True)

### All definitions/functions:

#### Rolling average definition:

In [2]:
def rolling_average(values, window):
    nan = []
    nan = np.append(nan, np.repeat(np.nan, window-1))
    weights = np.repeat(1.0, window)/window
    sma = np.convolve(values,weights,'valid')
    sma = np.append(nan, sma)
    return sma

#### MASE

In [3]:
def mean_absolute_scaled_error(y_test, forecast_prediction):
    y_test, forecast_prediction = np.array(y_test), np.array(forecast_prediction)
    n = len(y_test)
    d = np.abs(np.diff(y_test, axis=0)).sum()/(n-1) 
    errors = np.abs(y_test.flat - forecast_prediction)
    return errors.mean()/d

#### SMAPE

In [4]:
def SMAPE(y_test, forecast_prediction):
    y_test, forecast_prediction = np.array(y_test), np.array(forecast_prediction)
    return 1/len(y_test) * np.sum(np.abs(forecast_prediction - y_test.flat) / (np.abs(y_test) + np.abs(forecast_prediction))/2)

#### Defining timeframe of stocks:

In [5]:
end = dt.date.today()
endday = end
endday = endday.strftime('%Y-%m-%d')
print([endday])
start = end - dt.timedelta(days=5*365)

['2018-10-31']


#### Use of DataReader to get MU stock data from IEX https://iextrading.com/developer/

In [6]:
df = DataReader('MU', 'iex', start, end)
df.tail(10)

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-10-17,43.33,43.39,42.11,42.35,27758963
2018-10-18,42.04,42.2125,41.14,41.3,33560822
2018-10-19,41.67,41.985,40.27,40.45,28641375
2018-10-22,40.56,40.7,38.64,39.76,34441221
2018-10-23,38.15,39.11,37.41,38.68,46364174
2018-10-24,38.0,38.3,35.4,35.43,53096450
2018-10-25,35.59,37.16,35.06,36.78,39625699
2018-10-26,35.28,35.91,34.3,35.4,41257357
2018-10-29,36.14,36.2,33.82,34.66,34176423
2018-10-30,34.64,36.05,34.495,36.01,35319497


### The 5 features:

#### Momentum: 

In [7]:
df['momentum'] = df.close - df.close.shift(4)
df[['momentum']].head(10)

Unnamed: 0_level_0,momentum
date,Unnamed: 1_level_1
2013-11-01,
2013-11-04,
2013-11-05,
2013-11-06,
2013-11-07,0.144
2013-11-08,0.215
2013-11-11,0.6
2013-11-12,1.13
2013-11-13,1.201
2013-11-14,1.085


#### A/O Oscillator (accumulation/distribution oscillator):

In [8]:
df['A/O'] = (df.high - df.close.shift(1))/(df.high - df.low)
df[['A/O']].head(10)

Unnamed: 0_level_0,A/O
date,Unnamed: 1_level_1
2013-11-01,
2013-11-04,0.882979
2013-11-05,-0.333333
2013-11-06,0.348837
2013-11-07,1.13913
2013-11-08,0.786275
2013-11-11,0.460526
2013-11-12,0.727273
2013-11-13,0.631579
2013-11-14,0.933333


#### MA5:

In [9]:
df['MA5'] = rolling_average(df.close, 5)
df[['MA5']].head(10)

Unnamed: 0_level_0,MA5
date,Unnamed: 1_level_1
2013-11-01,
2013-11-04,
2013-11-05,
2013-11-06,
2013-11-07,17.6708
2013-11-08,17.7768
2013-11-11,17.8528
2013-11-12,18.0448
2013-11-13,18.3288
2013-11-14,18.623


#### BIAS6:

In [10]:
df['MA6'] = rolling_average(df.close, 6)
df['BIAS6'] = ((df.close-df.MA6)/(df.MA6))*100
df[['BIAS6']].head(10)

Unnamed: 0_level_0,BIAS6
date,Unnamed: 1_level_1
2013-11-01,
2013-11-04,
2013-11-05,
2013-11-06,
2013-11-07,
2013-11-08,2.039283
2013-11-11,2.301361
2013-11-12,3.601683
2013-11-13,4.009382
2013-11-14,3.885089


#### ROC (rate of change)

In [11]:
df['ROC'] = (df.close - df.close.shift(3)) / df.close.shift(3) * 100
df[['ROC']].head(10)

Unnamed: 0_level_0,ROC
date,Unnamed: 1_level_1
2013-11-01,
2013-11-04,
2013-11-05,
2013-11-06,-0.426743
2013-11-07,-0.955841
2013-11-08,2.4618
2013-11-11,4.4
2013-11-12,5.141374
2013-11-13,4.501519
2013-11-14,5.035577


### Moving window

In [12]:
def get_moving_window(data, window):
    for i in range(1, window + 1):
        df["Moving_Window_"+str(i)] = data.shift(i) 

In [13]:
get_moving_window(df.close, 10)
df.head(10)

Unnamed: 0_level_0,open,high,low,close,volume,momentum,A/O,MA5,MA6,BIAS6,...,Moving_Window_1,Moving_Window_2,Moving_Window_3,Moving_Window_4,Moving_Window_5,Moving_Window_6,Moving_Window_7,Moving_Window_8,Moving_Window_9,Moving_Window_10
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-11-01,17.75,17.91,17.43,17.575,41372306,,,,,,...,,,,,,,,,,
2013-11-04,17.62,17.99,17.52,17.89,37802791,,0.882979,,,,...,17.575,,,,,,,,,
2013-11-05,17.74,17.79,17.49,17.67,23800232,,-0.333333,,,,...,17.89,17.575,,,,,,,,
2013-11-06,17.77,17.82,17.39,17.5,21799539,,0.348837,,,,...,17.67,17.89,17.575,,,,,,,
2013-11-07,17.83,18.155,17.58,17.719,54196790,0.144,1.13913,17.6708,,,...,17.5,17.67,17.89,17.575,,,,,,
2013-11-08,17.7,18.12,17.61,18.105,33201332,0.215,0.786275,17.7768,17.743167,2.039283,...,17.719,17.5,17.67,17.89,17.575,,,,,
2013-11-11,18.04,18.28,17.9,18.27,36990366,0.6,0.460526,17.8528,17.859,2.301361,...,18.105,17.719,17.5,17.67,17.89,17.575,,,,
2013-11-12,18.17,18.67,18.12,18.63,32232323,1.13,0.727273,18.0448,17.982333,3.601683,...,18.27,18.105,17.719,17.5,17.67,17.89,17.575,,,
2013-11-13,18.47,18.99,18.42,18.92,38821218,1.201,0.631579,18.3288,18.190667,4.009382,...,18.63,18.27,18.105,17.719,17.5,17.67,17.89,17.575,,
2013-11-14,18.91,19.2,18.9,19.19,31417954,1.085,0.933333,18.623,18.472333,3.885089,...,18.92,18.63,18.27,18.105,17.719,17.5,17.67,17.89,17.575,


Dropna

In [14]:
df.dropna(inplace=True)
df.head()

Unnamed: 0_level_0,open,high,low,close,volume,momentum,A/O,MA5,MA6,BIAS6,...,Moving_Window_1,Moving_Window_2,Moving_Window_3,Moving_Window_4,Moving_Window_5,Moving_Window_6,Moving_Window_7,Moving_Window_8,Moving_Window_9,Moving_Window_10
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-11-15,19.38,19.83,19.22,19.461,51592829,1.191,1.04918,18.8942,18.762667,3.72193,...,19.19,18.92,18.63,18.27,18.105,17.719,17.5,17.67,17.89,17.575
2013-11-18,19.58,19.88,19.09,19.29,47549892,0.66,0.53038,19.0982,18.960167,1.739612,...,19.461,19.19,18.92,18.63,18.27,18.105,17.719,17.5,17.67,17.89
2013-11-19,19.35,19.87,19.05,19.06,51853726,0.14,0.707317,19.1842,19.091833,-0.166738,...,19.29,19.461,19.19,18.92,18.63,18.27,18.105,17.719,17.5,17.67
2013-11-20,19.19,19.22,18.51,18.8,44815768,-0.39,0.225352,19.1602,19.120167,-1.674497,...,19.06,19.29,19.461,19.19,18.92,18.63,18.27,18.105,17.719,17.5
2013-11-21,19.04,20.115,18.96,19.99,93347344,0.529,1.138528,19.3202,19.2985,3.58318,...,18.8,19.06,19.29,19.461,19.19,18.92,18.63,18.27,18.105,17.719


Linear Regression

In [15]:
def get_y_columns(data):
    feature_col = 11
    return data.columns[feature_col:].values 
    
get_y_columns(df)

array(['Moving_Window_1', 'Moving_Window_2', 'Moving_Window_3',
       'Moving_Window_4', 'Moving_Window_5', 'Moving_Window_6',
       'Moving_Window_7', 'Moving_Window_8', 'Moving_Window_9',
       'Moving_Window_10'], dtype=object)

In [31]:
x = np.array(df.drop(get_y_columns(df), 1))
y = np.array(df[get_y_columns(df)])

y = y.reshape(-1, 1)

scaler = preprocessing.StandardScaler().fit(y)

x = scaler.transform(x)
y = scaler.transform(y)

tscv = TimeSeriesSplit(n_splits=5)

for train_index, test_index in tscv.split(x):
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]

In [32]:
regr = LinearRegression()
regr.fit(x_train, y_train.reshape(len(y_train)))

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [33]:
regr.score(x_test, y_test)

-3.8841986600116689