In [2]:
## run following in termminal (or in notebook) if needed to get packages to work
# !conda create -n tensorflow
# !source activate tensorflow
# !pip install jupyter notebook
# !jupyter-notebook
# !which pip
# !pip install tensorflow
# !pip install keras
# !pip install sklearn pandas numpy seaborn

## data split custom tool
from timeseries_train_test_split import TimeseriesTestTrainSplit as ts
##standard tools
import tensorflow
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
##preprocessing
from sklearn.preprocessing import MinMaxScaler, Normalizer, StandardScaler
## model selection and building
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score as r2
from sklearn.metrics import explained_variance_score as evs
from sklearn.metrics import mean_absolute_percentage_error as mape
from sklearn.metrics import median_absolute_error as mae
from sklearn.metrics import mean_squared_error as mse
from keras.wrappers.scikit_learn import KerasRegressor
##models
from keras.models import Sequential
from keras.layers import Dense, LSTM
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression, SGDRegressor, Ridge, Lasso, ElasticNet, LassoLars, BayesianRidge, ARDRegression, Perceptron, PassiveAggressiveRegressor, TheilSenRegressor, HuberRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor, AdaBoostRegressor, StackingRegressor, VotingRegressor

In [3]:
# def run():
stock_name = 'GSIT'
X_train, y_train, X_test, y_test = ts.timeseries_test_train_split(stock_name)
print(X_test.shape)


FileNotFoundError: [Errno 2] No such file or directory: './data/stocks_df_2021-02-26.pickle'

In [None]:
transformers = FeatureUnion([('scaler', MinMaxScaler()), ('norm', Normalizer())])
pipe = Pipeline([('preprocess', StandardScaler()), ('predictor', LinearRegression())])

In [None]:
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(13, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model 
def lstm_model():
    # create model
    model = Sequential()
    model.add(LSTM(13, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model 
keras_base = KerasRegressor(build_fn = baseline_model, nb_epoch=100, batch_size=5, verbose=1)
keras_lstm = KerasRegressor(build_fn = lstm_model, nb_epoch=100, batch_size=5, verbose=1)

In [None]:
lr_grid = {'predictor': [LinearRegression()]}
svr_grid = {
    'predictor': [SVR()],
    'predictor__kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}
d_tree_grid = {
    'predictor': [DecisionTreeRegressor()],
    'predictor__criterion': ['mse', 'friedman_mse', 'mae', 'poisson']  
}
mlp_grid = {
    'predictor': [MLPRegressor()],
    'predictor__learning_rate_init': [0.001],
    'predictor__random_state': [1],
    'predictor__max_iter': [400],
    'predictor__activation': ['relu','logistic'], ##'tanh',
    'predictor__hidden_layer_sizes': [(100,),(125,)], ## ,(50,)
    'predictor__alpha': [10**-x for x in range(1, 7)]
}
rfr_grid = {'predictor': [RandomForestRegressor()]  }
gbr_grid = {
    'predictor': [GradientBoostingRegressor()],
    'predictor__loss': ['ls', 'lad', 'huber', 'quantile']
}
etr_grid = {'predictor': [ExtraTreesRegressor()]}
abr_grid = {'predictor': [AdaBoostRegressor()]}
sgdr_grid = {'predictor': [SGDRegressor()]}
ridge_grid = {'predictor': [Ridge()]}
lasso_grid = {'predictor': [Lasso()]}
enet_grid = {'predictor': [ElasticNet()]}
lars_lasso_grid = {'predictor': [LassoLars()]}
br_grid = {'predictor': [BayesianRidge()]}
adrr_grid = {'predictor': [ARDRegression()]}
percep_grid = {'predictor': [Perceptron()]}
par_grid = {'predictor': [PassiveAggressiveRegressor()]}
tsr_grid = {'predictor': [TheilSenRegressor()]}
hbr_grid = {'predictor': [HuberRegressor()]}
keras_base_grid = {'predictor': [keras_base]}
keras_lstm_grid = {'predictor': [keras_lstm]}
param_grid = [
    {**lr_grid},
    {**mlp_grid}, ##takes a long time
    {**svr_grid},
    {**d_tree_grid},
    {**rfr_grid},
    {**gbr_grid},
    {**etr_grid},
    {**abr_grid},
    {**sgdr_grid}, ##near competitor to linreg
    {**ridge_grid}, ##near competitor to linreg
    {**lasso_grid},
    {**enet_grid},
    {**lars_lasso_grid},
    {**br_grid},
    {**adrr_grid},
    {**percep_grid}, ##not working
    {**par_grid},
    {**tsr_grid}, ##better than linear; and takes a while
    {**hbr_grid}
    {**keras_base_grid}
    {**keras_lstm_grid}
]

In [None]:
pipe.fit(X_train, y_train)
grid = GridSearchCV(pipe, param_grid, verbose=4, cv = 3, scoring='neg_median_absolute_error')
grid.fit(X_train, y_train)

In [None]:

grid_est = grid.best_estimator_
y_pred = pipe.predict(X_test)
grid_pred = grid_est.predict(X_test)
grid.score(X_test, y_test)
print(grid.best_params_)
print('scores in ', grid.get_scorer(),' : grid', grid_est.score(X_test, y_test), 'linreg', pipe.score(X_test, y_test))
print(grid.scorer_)
print('median_absolute_error as mae;','explained_variance_score as evs;','mean_absolute_percentage_error as mape;')
print('linreg', mae(y_test, y_pred), evs(y_test, y_pred), mape(y_test, y_pred))
print('grid', mae(y_test, grid_pred), evs(y_test, grid_pred), mape(y_test, grid_pred))

In [None]:
index = [i for i in range(len(y_test))]
plt.scatter(index, y_test, 'b')
plt.plot(index, grid_pred, 'r')


In [None]:
plt.scatter(index, y_test)
plt.plot(index, y_pred)