## Project – Creating an automated trading system

#### Christian Karvonen IA-15 & Wilhelm Kinos IA-15

#### Imports:

In [56]:
import datetime as dt
import numpy as np
import pandas as pd
pd.core.common.is_list_like = pd.api.types.is_list_like
from pandas_datareader.data import DataReader
from plotly.offline import init_notebook_mode, iplot
from plotly.graph_objs import *
import matplotlib.pyplot as plt
from collections import defaultdict
from sklearn.linear_model import LinearRegression, ElasticNetCV, BayesianRidge, LassoLarsCV, ARDRegression, LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.neural_network import MLPRegressor
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn import preprocessing, svm, model_selection
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.externals import joblib
from sklearn import utils
init_notebook_mode(connected=True)

### All definitions/functions:

#### Rolling average definition:

In [2]:
def rolling_average(values, window):
    nan = []
    nan = np.append(nan, np.repeat(np.nan, window-1))
    weights = np.repeat(1.0, window)/window
    sma = np.convolve(values,weights,'valid')
    sma = np.append(nan, sma)
    return sma

#### MASE

In [3]:
def MASE(y_test, forecast_prediction):
    y_test, forecast_prediction = np.array(y_test), np.array(forecast_prediction)
    n = len(y_test)
    d = np.abs(np.diff(y_test, axis=0)).sum()/(n-1) 
    errors = np.abs(y_test.flat - forecast_prediction)
    return errors.mean()/d

#### SMAPE

In [4]:
def SMAPE(y_test, forecast_prediction):
    y_test, forecast_prediction = np.array(y_test), np.array(forecast_prediction)
    return 1/len(y_test) * np.sum(np.abs(forecast_prediction - y_test.flat) / (np.abs(y_test) + np.abs(forecast_prediction))/2)

#### Defining timeframe of stocks:

In [5]:
end = dt.date.today()
endday = end
endday = endday.strftime('%Y-%m-%d')
print([endday])
start = end - dt.timedelta(days=5*365)

['2018-11-13']


#### Use of DataReader to get MU stock data from IEX https://iextrading.com/developer/

In [6]:
df = DataReader('MU', 'iex', start, end)
df.tail(10)

5y


Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-10-30,34.64,36.05,34.495,36.01,35319497
2018-10-31,36.58,38.04,35.8,37.72,44055370
2018-11-01,37.98,40.2,37.52,40.12,43981410
2018-11-02,40.19,40.94,39.64,40.32,33645741
2018-11-05,40.1,40.24,38.77,39.92,22903972
2018-11-06,39.74,40.56,39.56,39.8,19316554
2018-11-07,40.32,41.05,39.96,40.93,22957221
2018-11-08,40.67,41.43,40.34,40.44,15609308
2018-11-09,39.89,39.99,38.41,39.11,26102786
2018-11-12,38.24,38.53,36.835,37.44,24822391


### The 5 features:

#### Momentum: 

In [7]:
df['momentum'] = df.close - df.close.shift(4)
df[['momentum']].head(10)

Unnamed: 0_level_0,momentum
date,Unnamed: 1_level_1
2013-11-14,
2013-11-15,
2013-11-18,
2013-11-19,
2013-11-20,-0.39
2013-11-21,0.529
2013-11-22,0.9
2013-11-25,0.8
2013-11-26,1.51
2013-11-27,1.18


#### A/O Oscillator (accumulation/distribution oscillator):

In [8]:
df['A/O'] = (df.high - df.close.shift(1))/(df.high - df.low)
df[['A/O']].head(10)

Unnamed: 0_level_0,A/O
date,Unnamed: 1_level_1
2013-11-14,
2013-11-15,1.04918
2013-11-18,0.53038
2013-11-19,0.707317
2013-11-20,0.225352
2013-11-21,1.138528
2013-11-22,0.5
2013-11-25,0.392604
2013-11-26,0.948276
2013-11-27,0.977778


#### MA5:

In [9]:
df['MA5'] = rolling_average(df.close, 5)
df[['MA5']].head(10)

Unnamed: 0_level_0,MA5
date,Unnamed: 1_level_1
2013-11-14,
2013-11-15,
2013-11-18,
2013-11-19,
2013-11-20,19.1602
2013-11-21,19.3202
2013-11-22,19.466
2013-11-25,19.58
2013-11-26,19.83
2013-11-27,20.304


#### BIAS6:

In [10]:
df['MA6'] = rolling_average(df.close, 6)
df['BIAS6'] = ((df.close-df.MA6)/(df.MA6))*100
df[['BIAS6']].head(10)

Unnamed: 0_level_0,BIAS6
date,Unnamed: 1_level_1
2013-11-14,
2013-11-15,
2013-11-18,
2013-11-19,
2013-11-20,
2013-11-21,3.58318
2013-11-22,3.723746
2013-11-25,1.681031
2013-11-26,3.087725
2013-11-27,5.568484


#### ROC (rate of change)

In [11]:
df['ROC'] = (df.close - df.close.shift(3)) / df.close.shift(3) * 100
df[['ROC']].head(10)

Unnamed: 0_level_0,ROC
date,Unnamed: 1_level_1
2013-11-14,
2013-11-15,
2013-11-18,
2013-11-19,-0.677436
2013-11-20,-3.396537
2013-11-21,3.628823
2013-11-22,5.928646
2013-11-25,5.638298
2013-11-26,1.6008
2013-11-27,4.853888


In [12]:
df.head(5)

Unnamed: 0_level_0,open,high,low,close,volume,momentum,A/O,MA5,MA6,BIAS6,ROC
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2013-11-14,18.91,19.2,18.9,19.19,31417954,,,,,,
2013-11-15,19.38,19.83,19.22,19.461,51592829,,1.04918,,,,
2013-11-18,19.58,19.88,19.09,19.29,47549892,,0.53038,,,,
2013-11-19,19.35,19.87,19.05,19.06,51853726,,0.707317,,,,-0.677436
2013-11-20,19.19,19.22,18.51,18.8,44815768,-0.39,0.225352,19.1602,,,-3.396537


### Moving window

In [13]:
def get_moving_window(data, window):
    for i in range(1, window + 1):
        df["Moving_Window_"+str(i)] = data.shift(i) 

In [14]:
#Label
label_int = int(1)
df['label'] = df['close'].shift(-label_int)

In [15]:
#df = df.drop(['open','high','low','volume','momentum','A/O','MA5','MA6','BIAS6','ROC'],1)

In [16]:
get_moving_window(df.close, 15)
df.head(10)

Unnamed: 0_level_0,open,high,low,close,volume,momentum,A/O,MA5,MA6,BIAS6,...,Moving_Window_6,Moving_Window_7,Moving_Window_8,Moving_Window_9,Moving_Window_10,Moving_Window_11,Moving_Window_12,Moving_Window_13,Moving_Window_14,Moving_Window_15
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-11-14,18.91,19.2,18.9,19.19,31417954,,,,,,...,,,,,,,,,,
2013-11-15,19.38,19.83,19.22,19.461,51592829,,1.04918,,,,...,,,,,,,,,,
2013-11-18,19.58,19.88,19.09,19.29,47549892,,0.53038,,,,...,,,,,,,,,,
2013-11-19,19.35,19.87,19.05,19.06,51853726,,0.707317,,,,...,,,,,,,,,,
2013-11-20,19.19,19.22,18.51,18.8,44815768,-0.39,0.225352,19.1602,,,...,,,,,,,,,,
2013-11-21,19.04,20.115,18.96,19.99,93347344,0.529,1.138528,19.3202,19.2985,3.58318,...,,,,,,,,,,
2013-11-22,19.91,20.32,19.66,20.19,40144827,0.9,0.5,19.466,19.465167,3.723746,...,19.19,,,,,,,,,
2013-11-25,20.44,20.5,19.7104,19.86,33875466,0.8,0.392604,19.58,19.531667,1.681031,...,19.461,19.19,,,,,,,,
2013-11-26,19.93,20.41,19.83,20.31,33585049,1.51,0.948276,19.83,19.701667,3.087725,...,19.29,19.461,19.19,,,,,,,
2013-11-27,20.41,21.19,20.29,21.17,40572662,1.18,0.977778,20.304,20.053333,5.568484,...,19.06,19.29,19.461,19.19,,,,,,


Dropna

In [17]:
df.dropna(inplace=True)
df.head()

Unnamed: 0_level_0,open,high,low,close,volume,momentum,A/O,MA5,MA6,BIAS6,...,Moving_Window_6,Moving_Window_7,Moving_Window_8,Moving_Window_9,Moving_Window_10,Moving_Window_11,Moving_Window_12,Moving_Window_13,Moving_Window_14,Moving_Window_15
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-12-06,21.97,22.31,21.8,22.31,32237775,1.04,1.294118,21.7638,21.653167,3.033429,...,21.17,20.31,19.86,20.19,19.99,18.8,19.06,19.29,19.461,19.19
2013-12-09,22.66,23.67,22.6,23.12,65756702,1.29,1.271028,22.1338,21.989833,5.139496,...,21.1,21.17,20.31,19.86,20.19,19.99,18.8,19.06,19.29,19.461
2013-12-10,23.12,23.4385,22.8,23.14,41919491,1.381,0.498825,22.3958,22.3015,3.759837,...,21.27,21.1,21.17,20.31,19.86,20.19,19.99,18.8,19.06,19.29
2013-12-11,22.47,22.91,22.29,22.5,52447475,0.85,-0.370968,22.544,22.413167,0.387421,...,21.83,21.27,21.1,21.17,20.31,19.86,20.19,19.99,18.8,19.06
2013-12-12,22.68,23.09,22.355,22.54,52600226,0.23,0.802721,22.722,22.543333,-0.014786,...,21.759,21.83,21.27,21.1,21.17,20.31,19.86,20.19,19.99,18.8


Linear Regression

In [18]:
def get_y_columns(data):
    feature_col = 12
    return data.columns[feature_col:].values 
    
print(get_y_columns(df))
print(np.array(df[get_y_columns(df)]))

['Moving_Window_1' 'Moving_Window_2' 'Moving_Window_3' 'Moving_Window_4'
 'Moving_Window_5' 'Moving_Window_6' 'Moving_Window_7' 'Moving_Window_8'
 'Moving_Window_9' 'Moving_Window_10' 'Moving_Window_11'
 'Moving_Window_12' 'Moving_Window_13' 'Moving_Window_14'
 'Moving_Window_15']
[[21.65  21.759 21.83  ... 19.29  19.461 19.19 ]
 [22.31  21.65  21.759 ... 19.06  19.29  19.461]
 [23.12  22.31  21.65  ... 18.8   19.06  19.29 ]
 ...
 [39.8   39.92  40.32  ... 40.45  41.3   42.35 ]
 [40.93  39.8   39.92  ... 39.76  40.45  41.3  ]
 [40.44  40.93  39.8   ... 38.68  39.76  40.45 ]]


In [19]:
x = np.array(df.drop(['label'], 1))
y = np.array(df['label'])
print(x)
print(y)
y = y.reshape(-1, 1)

scaler = preprocessing.StandardScaler().fit(y)

x = scaler.transform(x)
y = scaler.transform(y)

tscv = TimeSeriesSplit(n_splits=5)

for train_index, test_index in tscv.split(x):
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]

[[21.97   22.31   21.8    ... 19.29   19.461  19.19  ]
 [22.66   23.67   22.6    ... 19.06   19.29   19.461 ]
 [23.12   23.4385 22.8    ... 18.8    19.06   19.29  ]
 ...
 [40.32   41.05   39.96   ... 40.45   41.3    42.35  ]
 [40.67   41.43   40.34   ... 39.76   40.45   41.3   ]
 [39.89   39.99   38.41   ... 38.68   39.76   40.45  ]]
[23.12 23.14 22.5  ... 40.44 39.11 37.44]


In [20]:
regr = LinearRegression()
regr.fit(x_train, y_train.reshape(len(y_train)))

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [21]:
regr.score(x_test, y_test)
forecast = regr.predict(x_test)
#inverse_transform the result for future plotting
forecast_list = forecast.tolist()
new_forecast = scaler.inverse_transform(forecast_list)

In [22]:
MASE(y_test, forecast)

1.018745969664408

In [23]:
SMAPE(y_test, forecast)

3.00936454694048

In [24]:
dates = df.index.values
print([endday])
datespred = np.append(dates, [endday])
print(datespred)
lenofytest = len(y_test)
print(lenofytest)

['2018-11-13']
['2013-12-06' '2013-12-09' '2013-12-10' ... '2018-11-08' '2018-11-09'
 '2018-11-13']
207


In [25]:
df_cm = pd.DataFrame(index=datespred[-lenofytest:])
#actual
df_cm['actual'] = df['close'][-lenofytest:]
#label
df_cm['label'] = df['label'][-lenofytest:]
#pred
df_cm['pred'] = new_forecast
#Creating the buy and the sell decisions for label
df_cm['labelDecision'] = np.where(df_cm['label'] > df_cm['actual'], "Buy", "Sell")
#Creating the buy and the sell decisions for pred
df_cm['predDecision'] = np.where(df_cm['pred'] > df_cm['actual'], "Buy", "Sell")
#Determining the hits between labelDecision and predDecisions
df_cm['hitDecision'] = np.where(df_cm['labelDecision'] == df_cm['predDecision'], 1.0, 0.0)

#Finding confusion matrix values TN, FP, FN and TP
df_cm['TN'] = np.where((df_cm['labelDecision'] == "Sell") & (df_cm['predDecision'] == "Sell"), 1.0, 0.0)
df_cm['FP'] = np.where((df_cm['labelDecision'] == "Sell") & (df_cm['predDecision'] == "Buy"), 1.0, 0.0)
df_cm['FN'] = np.where((df_cm['labelDecision'] == "Buy") & (df_cm['predDecision'] == "Sell"), 1.0, 0.0)
df_cm['TP'] = np.where((df_cm['labelDecision'] == "Buy") & (df_cm['predDecision'] == "Buy"), 1.0, 0.0)
df_cm.head(10)

Unnamed: 0,actual,label,pred,labelDecision,predDecision,hitDecision,TN,FP,FN,TP
2018-01-19,42.75,42.88,43.400253,Buy,Buy,1.0,0.0,0.0,0.0,1.0
2018-01-22,42.88,43.95,42.681136,Buy,Sell,0.0,0.0,0.0,1.0,0.0
2018-01-23,43.95,43.08,42.432484,Sell,Sell,1.0,1.0,0.0,0.0,0.0
2018-01-24,43.08,43.01,44.4622,Sell,Buy,0.0,0.0,1.0,0.0,0.0
2018-01-25,43.01,43.67,43.136926,Buy,Buy,1.0,0.0,0.0,0.0,1.0
2018-01-26,43.67,43.29,43.557281,Sell,Sell,1.0,1.0,0.0,0.0,0.0
2018-01-29,43.29,41.67,43.703504,Sell,Buy,0.0,0.0,1.0,0.0,0.0
2018-01-30,41.67,43.72,43.33038,Buy,Buy,1.0,0.0,0.0,0.0,1.0
2018-01-31,43.72,42.49,41.352068,Sell,Sell,1.0,1.0,0.0,0.0,0.0
2018-02-01,42.49,40.82,43.95732,Sell,Buy,0.0,0.0,1.0,0.0,0.0


In [26]:
#printing out the confusion matrix values 
print(len(df_cm.loc[df_cm.TN == 1.0]))
print(len(df_cm.loc[df_cm.FP == 1.0]))
print(len(df_cm.loc[df_cm.FN == 1.0]))
print(len(df_cm.loc[df_cm.TP == 1.0]))


#checking if the values are correct with pd.crosstab
df_confusion = pd.crosstab(df_cm.labelDecision, df_cm.predDecision)
df_confusion

48
55
48
56


predDecision,Buy,Sell
labelDecision,Unnamed: 1_level_1,Unnamed: 2_level_1
Buy,56,48
Sell,55,48


In [27]:
#hitRatio 1
lenOfHits = len(df_cm.loc[df_cm.hitDecision == 1.0])
lenOfMisses = len(df_cm.loc[df_cm.hitDecision == 0.0])
lenOfDecisions = len(df_cm.hitDecision)

print(lenOfHits)
print(lenOfMisses)
print(lenOfDecisions)

hitRatio = ((lenOfHits) / (lenOfDecisions)) * 100
print(hitRatio)

104
103
207
50.24154589371981


#### Creating a pipeline

In [113]:
#Construct some pipelines
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import SGDRegressor, Ridge, RidgeCV, LassoCV, LassoLarsIC, Lars, LarsCV, PassiveAggressiveRegressor,OrthogonalMatchingPursuit, OrthogonalMatchingPursuitCV, TheilSenRegressor, HuberRegressor, RANSACRegressor
from sklearn.svm import LinearSVR
pipe_lr = Pipeline([('scl', StandardScaler()),
            #'('pca', PCA()),
            ('clf', LinearRegression())])

pipe_br = Pipeline([('scl', StandardScaler()),
            #('pca', PCA()),
            ('clf', BayesianRidge())])

pipe_llcv = Pipeline([('scl', StandardScaler()),
            #('pca', PCA()),
            ('clf', LassoLarsCV())])

pipe_en = Pipeline([('scl', StandardScaler()),
            #('pca', PCA()),
            ('clf', ElasticNetCV(max_iter=2000))])

pipe_ardr = Pipeline([('scl', StandardScaler()),
            #('pca', PCA()),
            ('clf', ARDRegression())])

pipe_mlp = Pipeline([('scl', StandardScaler()),
            #('pca', PCA()),
            ('clf', MLPRegressor())])

pipe_sgd = Pipeline([('scl', StandardScaler()),
            #('pca', PCA()),
            ('clf', SGDRegressor(max_iter=2000))])

pipe_r = Pipeline([('scl', StandardScaler()),
            #('pca', PCA()),
            ('clf', Ridge())])

pipe_llic = Pipeline([('scl', StandardScaler()),
            #('pca', PCA()),
            ('clf', LassoLarsIC())])

pipe_lars = Pipeline([('scl', StandardScaler()),
            #('pca', PCA()),
            ('clf', Lars())])

pipe_omp = Pipeline([('scl', StandardScaler()),
            #('pca', PCA()),
            ('clf', OrthogonalMatchingPursuit())])

pipe_tsr = Pipeline([('scl', StandardScaler()),
            #('pca', PCA()),
            ('clf', TheilSenRegressor())])

pipe_hr = Pipeline([('scl', StandardScaler()),
            #('pca', PCA()),
            ('clf', HuberRegressor())])

pipe_ransac = Pipeline([('scl', StandardScaler()),
            #('pca', PCA()),
            ('clf', RANSACRegressor())])

pipe_ompcv = Pipeline([('scl', StandardScaler()),
            #('pca', PCA()),
            ('clf', OrthogonalMatchingPursuitCV())])

pipe_rcv = Pipeline([('scl', StandardScaler()),
            #('pca', PCA()),
            ('clf', RidgeCV())])

pipe_pa = Pipeline([('scl', StandardScaler()),
            #('pca', PCA()),
            ('clf', PassiveAggressiveRegressor())])

pipe_lcv = Pipeline([('scl', StandardScaler()),
            #('pca', PCA()),
            ('clf', LarsCV())])

pipe_lscv = Pipeline([('scl', StandardScaler()),
            #('pca', PCA()),
            ('clf', LassoCV())])

pipe_lsvr = Pipeline([('scl', StandardScaler()),
            #('pca', PCA()),
            ('clf', LinearSVR())])

#List of pipelines for ease of iteration
pipelines = [pipe_lr, pipe_br, pipe_llcv, pipe_en, pipe_ardr, pipe_mlp, pipe_sgd, pipe_r, pipe_llic, pipe_lars,
             pipe_omp, pipe_tsr, pipe_hr, pipe_ransac, pipe_ompcv, pipe_rcv, pipe_pa, pipe_lcv, pipe_lscv, pipe_lsvr]

#Dictionary of pipelines and classifier types for ease of reference
pipe_dict = {0: 'LinearRegression', 1: 'BayesianRidge', 2: 'LassoLarsCV', 3: 'ElasticNetCV', 4: 'ARDRegression', 5: 'MLPRegressor', 6: 'SGDRegressor'
             ,7: 'Ridge', 8: 'LassoLarsIC', 9: 'Lars', 10: 'OrthogonalMatchingPursuit', 11: 'TheilSenRegressor', 12: 'HuberRegressor'
            ,13: 'RANSAC', 14: 'OrthogonalMatchingPursuitCV', 15: 'RidgeCV', 16: 'PassiveAggressiveRegressor'
            ,17: 'LarsCV', 18: 'LassoCV', 19: 'LinearSVR'}

#Fit the pipelines
for pipe in pipelines:
    pipe.fit(x_train, y_train.flat)
      
#Compare accuracies
for idx, val in enumerate(pipelines):
    print('%s pipeline test accuracy: %.5f' % (pipe_dict[idx], val.score(x_test, y_test)))

#Identify the most accurate model on test data
best_acc = 0.0
best_clf = 0
best_pipe = ''
for idx, val in enumerate(pipelines):
    if val.score(x_test, y_test) > best_acc:
        best_acc = val.score(x_test, y_test)
        best_pipe = val
        best_clf = idx
print('Classifier with best accuracy: %s' % pipe_dict[best_clf])

#Predictions
#for idx, val in enumerate(pipelines):
#    print((pipe_dict[idx], val.predict(x_test)))
    
#Ensemble
ensemble = 0
all_x_pred = 0
ensemble_list = 0
new_ensemble = 0
for pipe in pipelines:
    all_x_pred += pipe.predict(x_test)
    
ensemble = all_x_pred/len(pipelines)
ensemble_list = ensemble.tolist()
new_ensemble = scaler.inverse_transform(ensemble_list)
print("Ensemble of 20 regression: ")
print(ensemble) 
print(new_ensemble) 
    
#Save pipeline to file
#joblib.dump(best_pipe, 'best_pipeline.pkl', compress=1)
#print('Saved %s pipeline to file' % pipe_dict[best_clf])


Regressors in active set degenerate. Dropping a regressor, after 25 iterations, i.e. alpha=2.651e-07, with an active set of 22 regressors, and the smallest cholesky pivot element being 2.107e-08. Reduce max_iter or increase eps parameters.


max_iter and tol parameters have been added in <class 'sklearn.linear_model.passive_aggressive.PassiveAggressiveRegressor'> in 0.19. If both are left unset, they default to max_iter=5 and tol=None. If tol is not None, max_iter defaults to max_iter=1000. From 0.21, default max_iter will be 1000, and default tol will be 1e-3.


Regressors in active set degenerate. Dropping a regressor, after 23 iterations, i.e. alpha=3.098e-06, with an active set of 20 regressors, and the smallest cholesky pivot element being 1.490e-08. Reduce max_iter or increase eps parameters.


Regressors in active set degenerate. Dropping a regressor, after 25 iterations, i.e. alpha=4.545e-07, with an active set of 22 regressors, and the smallest cholesky pivot element being 1.

LinearRegression pipeline test accuracy: 0.94548
BayesianRidge pipeline test accuracy: 0.94692
LassoLarsCV pipeline test accuracy: 0.94823
ElasticNetCV pipeline test accuracy: 0.94796
ARDRegression pipeline test accuracy: 0.94704
MLPRegressor pipeline test accuracy: 0.74158
SGDRegressor pipeline test accuracy: 0.94736
Ridge pipeline test accuracy: 0.94768
LassoLarsIC pipeline test accuracy: 0.92780
Lars pipeline test accuracy: 0.94369
OrthogonalMatchingPursuit pipeline test accuracy: 0.94899
TheilSenRegressor pipeline test accuracy: 0.94784
HuberRegressor pipeline test accuracy: 0.94759
RANSAC pipeline test accuracy: 0.94565
OrthogonalMatchingPursuitCV pipeline test accuracy: 0.94890
RidgeCV pipeline test accuracy: 0.94631
PassiveAggressiveRegressor pipeline test accuracy: 0.90954
LarsCV pipeline test accuracy: 0.94823
LassoCV pipeline test accuracy: 0.94742
LinearSVR pipeline test accuracy: 0.93810
Classifier with best accuracy: OrthogonalMatchingPursuit
Ensemble of 20 regression: 
[1

In [100]:
MASE(y_test, ensemble)

1.0128641455499263

In [99]:
SMAPE(y_test, ensemble)

2.998151842856457

In [118]:
p1 = Scatter(x=datespred[-lenofytest:], 
             y=df.close[-lenofytest:], 
             mode='lines',
             marker=dict(color='#3D5A80'),
             name='Close'
             )

p2 = Scatter(x=datespred[-lenofytest:], 
             y=new_ensemble,
             mode='lines',
             line=dict(color='#EFB509'),
             name='Ensemble'
             )

#p3 = Scatter(x=datespred[-lenofytest:], 
#             y=new_forecast,
#             mode='lines',
#             name='Linear Regression Forecast'
#             )

layout = Layout(title='Close compared to Ensemble of 20 regression',
                xaxis=dict(ticks='', showticklabels=True,
                zeroline=False),
                yaxis=dict(ticks='', showticklabels=True,
                zeroline=False),
                showlegend=True, hovermode='closest')

config = {'scrollZoom': True}

fig = Figure(data=[p1,p2], layout=layout)

iplot(fig, config=config)