In [1]:
from DataGenerator import TimeSeriesGenerator, suddenDrift, incrementalDrift
from conditions import scenarios
from sklearn.preprocessing import StandardScaler
from grouped_series import SeriesGrouper, ExperimentTracker, Experiment
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer, make_column_selector
from sklearn.pipeline import Pipeline
from TimeSeriesGradientBoosting import TimeSeriesGradientBoosting
from xgboost import XGBRegressor
from sklearn.linear_model import SGDRegressor, LinearRegression

In [2]:
hybrid_vars = [0,1, 2, 3, 4, -3, -2, -1]

In [3]:
hybrid_model = TimeSeriesGradientBoosting(model1 = LinearRegression(), model2 = XGBRegressor(), model1_variables = hybrid_vars)

In [4]:
onehot_cols = ColumnTransformer([('one_hot_encoder', OneHotEncoder(drop='first'
), make_column_selector(dtype_include=object))], remainder=StandardScaler())

xgb_pipe = Pipeline([
    ('preprocessor', onehot_cols),
    ('regressor', XGBRegressor())
])

hybrid_xgb_pipe = Pipeline([
    ('preprocessor', onehot_cols),
    ('regressor', hybrid_model)
])

sgd_pipe = Pipeline([
    ('preprocessor', onehot_cols),
    ('regressor', SGDRegressor())
])

In [5]:
def full_run():
    products = []
    customers = []
    for i in range(2):
        product = TimeSeriesGenerator(size=365, amountOfVariables=7)
        customer = TimeSeriesGenerator(size=365, amountOfVariables=3)
        products.append(product)
        customers.append(customer)
    thesis = ExperimentTracker(products, customers, scenarios())
    thesis.runExperiment(algorithms=[hybrid_xgb_pipe, sgd_pipe], algorithm_name=["Gradient boosted decision tree", "Linear regression"], online=[False, False])
    return thesis


In [6]:
thesis = full_run()

ValueError: Found input variables with inconsistent numbers of samples: [8, 1096]

In [None]:
results = thesis.resultsToDF()

In [None]:
results[["Algorithm","SMAPE", "Drift type", "Drift time", "Drift magnitude"]].groupby(["Algorithm","Drift type", "Drift time", "Drift magnitude"]).aggregate(["mean", "std", "min", "max"])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,SMAPE,SMAPE,SMAPE,SMAPE
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mean,std,min,max
Algorithm,Drift type,Drift time,Drift magnitude,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Gradient boosted decision tree,Incremental Drift,Fully observed,Large,3.555,0.284237,3.17,3.91
Gradient boosted decision tree,Incremental Drift,Fully observed,Small,3.85,0.151598,3.54,4.05
Gradient boosted decision tree,Incremental Drift,Half observed,Large,14.9125,7.688757,4.51,22.24
Gradient boosted decision tree,Incremental Drift,Half observed,Small,6.245833,1.548768,4.04,7.99
Gradient boosted decision tree,Incremental Drift,Unobserved,Large,12.6525,6.056767,4.42,18.42
Gradient boosted decision tree,Incremental Drift,Unobserved,Small,5.828333,1.258273,4.03,7.26
Gradient boosted decision tree,No Drift,No Drift,No Drift,3.9825,0.131244,3.82,4.14
Gradient boosted decision tree,Sudden Drift,Fully observed,Large,3.5,0.301541,3.22,3.99
Gradient boosted decision tree,Sudden Drift,Fully observed,Small,3.785,0.147741,3.6,4.05
Gradient boosted decision tree,Sudden Drift,Half observed,Large,4.765833,1.24427,3.84,8.23


In [None]:
results.sort_values("SMAPE", ascending=False)   

Unnamed: 0,Algorithm,Dropped variable,Drift type,Drift magnitude,Variable importance,Drift time,MSE,SMAPE
236,Linear regression,0.0,Incremental Drift,Large,Medium,Half observed,1967.599593,30.33
219,Linear regression,0.0,Sudden Drift,Large,Medium,Unobserved,1096.013194,27.81
318,Linear regression,6.0,Incremental Drift,Large,Medium,Half observed,1382.909703,26.54
301,Linear regression,6.0,Sudden Drift,Large,Medium,Unobserved,866.403390,25.27
52,Gradient boosted decision tree,0.0,Sudden Drift,Large,Important,Unobserved,1022.348231,25.01
...,...,...,...,...,...,...,...,...
50,Gradient boosted decision tree,0.0,Sudden Drift,Large,Important,Fully observed,18.500273,3.23
132,Gradient boosted decision tree,6.0,Sudden Drift,Large,Important,Fully observed,17.548693,3.22
296,Linear regression,6.0,Sudden Drift,Large,Important,Fully observed,18.718341,3.21
109,Gradient boosted decision tree,3.0,Incremental Drift,Large,Important,Fully observed,17.959962,3.18
