In [1]:
## run following in termminal (or in notebook) if needed to get packages to work
# !conda create -n tensorflow
# !source activate tensorflow
# !pip install jupyter notebook
# !jupyter-notebook
# !which pip
# !pip install tensorflow
# !pip install keras
# !pip install sklearn pandas numpy seaborn

## data split custom tool
from timeseries_train_test_split import TimeseriesTestTrainSplit as ts
##standard tools
import tensorflow
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
##preprocessing
from sklearn.preprocessing import MinMaxScaler, Normalizer, StandardScaler
## model selection and building
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score as r2
from sklearn.metrics import explained_variance_score as evs
from sklearn.metrics import mean_absolute_percentage_error as mape
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
##models
from keras.models import Sequential
from keras.layers import Dense, LSTM
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression, SGDRegressor, Ridge, Lasso, ElasticNet, LassoLars, BayesianRidge, ARDRegression, Perceptron, PassiveAggressiveRegressor, TheilSenRegressor, HuberRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor, AdaBoostRegressor, StackingRegressor, VotingRegressor

In [5]:
# def run():
X_train, y_train, X_test, y_test = ts.timeseries_test_train_split()

In [6]:
transformers = FeatureUnion([('scaler', MinMaxScaler()), ('norm', Normalizer())])
pipe = Pipeline([('preprocess', StandardScaler()), ('predictor', LinearRegression())])
keras_reg = KerasRegressor()

AttributeError: 'KerasRegressor' object has no attribute '__call__'

In [7]:
lr_grid = {'predictor': [LinearRegression()]}
svr_grid = {
    'predictor': [SVR()],
    'predictor__kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}
d_tree_grid = {
    'predictor': [DecisionTreeRegressor()],
    'predictor__criterion': ['mse', 'friedman_mse', 'mae', 'poisson']  
}
mlp_grid = {
    'predictor': [MLPRegressor()],
    'predictor__learning_rate_init': [0.001],
    'predictor__random_state': [1],
    'predictor__max_iter': [400],
    'predictor__activation': ['relu','logistic'], ##'tanh',
    'predictor__hidden_layer_sizes': [(100,),(125,)], ## ,(50,)
    'predictor__alpha': [10**-x for x in range(1, 7)]
}
rfr_grid = {'predictor': [RandomForestRegressor()]  }
gbr_grid = {
    'predictor': [GradientBoostingRegressor()],
    'predictor__loss': ['ls', 'lad', 'huber', 'quantile']
}
etr_grid = {'predictor': [ExtraTreesRegressor()]}
abr_grid = {'predictor': [AdaBoostRegressor()]}
sgdr_grid = {'predictor': [SGDRegressor()]}
ridge_grid = {'predictor': [Ridge()]}
lasso_grid = {'predictor': [Lasso()]}
enet_grid = {'predictor': [ElasticNet()]}
lars_lasso_grid = {'predictor': [LassoLars()]}
br_grid = {'predictor': [BayesianRidge()]}
adrr_grid = {'predictor': [ARDRegression()]}
percep_grid = {'predictor': [Perceptron()]}
par_grid = {'predictor': [PassiveAggressiveRegressor()]}
tsr_grid = {'predictor': [TheilSenRegressor()]}
hbr_grid = {'predictor': [HuberRegressor()]}

param_grid = [
    {**lr_grid},
#     {**mlp_grid}, ##takes a long time
#     {**svr_grid}
#     {**d_tree_grid}
#     {**rfr_grid}
#     {**gbr_grid}
#     {**etr_grid}
#     {**abr_grid}
    {**sgdr_grid}, ##near competitor to linreg
    {**ridge_grid}, ##near competitor to linreg
#     {**lasso_grid}
#     {**enet_grid}
#     {**lars_lasso_grid}
#     {**br_grid}
#     {**adrr_grid}
#     {**percep_grid} ##not working
#     {**par_grid}
    {**tsr_grid} ##better than linear; and takes a while
#     {**hbr_grid}
]

In [None]:
pipe.fit(X_train, y_train)
grid = GridSearchCV(pipe, param_grid, verbose=4, cv = 3, scoring='r2')
grid.fit(X_train, y_train)

Fitting 3 folds for each of 4 candidates, totalling 12 fits
[CV 1/3] END ...................predictor=LinearRegression(); total time=   0.0s
[CV 2/3] END ...................predictor=LinearRegression(); total time=   0.0s
[CV 3/3] END ...................predictor=LinearRegression(); total time=   0.0s
[CV 1/3] END .......................predictor=SGDRegressor(); total time=   0.0s
[CV 2/3] END .......................predictor=SGDRegressor(); total time=   0.0s
[CV 3/3] END .......................predictor=SGDRegressor(); total time=   0.0s
[CV 1/3] END ..............................predictor=Ridge(); total time=   0.0s
[CV 2/3] END ..............................predictor=Ridge(); total time=   0.0s
[CV 3/3] END ..............................predictor=Ridge(); total time=   0.0s


In [None]:
grid_est = grid.best_estimator_
y_pred = pipe.predict(X_test)
grid_pred = grid_est.predict(X_test)
grid.score(X_test, y_test)
print(grid.best_params_)
print('scores: grid', grid_est.score(X_test, y_test), 'linreg', pipe.score(X_test, y_test))
print(grid.scorer_)
print('r2_score as r2;','explained_variance_score as evs;','mean_absolute_percentage_error as mape;')
print('linreg', r2(y_test, y_pred), evs(y_test, y_pred), mape(y_test, y_pred))
print('grid', r2(y_test, grid_pred), evs(y_test, grid_pred), mape(y_test, grid_pred))

In [None]:
# .get_params()