In [1]:
from DataGenerator import TimeSeriesGenerator, suddenDrift, incrementalDrift
from conditions import scenarios
from sklearn.preprocessing import StandardScaler
from grouped_series import SeriesGrouper, ExperimentTracker, Experiment
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer, make_column_selector
from sklearn.pipeline import Pipeline
from TimeSeriesGradientBoosting import TimeSeriesGradientBoosting
from xgboost import XGBRegressor
from sklearn.linear_model import SGDRegressor, LinearRegression

In [16]:
hybrid_vars = [0,1, 2, 3, 4, 5, 6, 7, -3, -2, -1]

In [17]:
hybrid_model = TimeSeriesGradientBoosting(model1 = LinearRegression(), model2 = XGBRegressor(), model1_variables = hybrid_vars)

In [18]:
onehot_cols = ColumnTransformer([('one_hot_encoder', OneHotEncoder(
), make_column_selector(dtype_include=object))], remainder=StandardScaler())

xgb_pipe = Pipeline([
    ('preprocessor', onehot_cols),
    ('regressor', XGBRegressor())
])

hybrid_xgb_pipe = Pipeline([
    ('preprocessor', onehot_cols),
    ('regressor', hybrid_model)
])

sgd_pipe = Pipeline([
    ('preprocessor', onehot_cols),
    ('regressor', SGDRegressor())
])

In [21]:
def full_run():
    products = []
    customers = []
    for i in range(2):
        product = TimeSeriesGenerator(size=365, amountOfVariables=7)
        customer = TimeSeriesGenerator(size=365, amountOfVariables=3)
        products.append(product)
        customers.append(customer)
    thesis = ExperimentTracker(products, customers, scenarios())
    thesis.runExperiment(algorithms=[hybrid_xgb_pipe, sgd_pipe], algorithm_name=["Gradient boosted decision tree", "Linear regression"], online=[False, False])
    return thesis


In [22]:
thesis = full_run()

Finished experiment! Elapsed time: 0.5059776306152344, total Elapsed time: 0.5059776306152344, Algorithm: Gradient boosted decision tree Type: Sudden Drift, Dropped variables: None, magnitude: Small, Drift time: Fully observed, importance: Important
Finished experiment! Elapsed time: 0.5464060306549072, total Elapsed time: 1.0533816814422607, Algorithm: Gradient boosted decision tree Type: Sudden Drift, Dropped variables: None, magnitude: Small, Drift time: Half observed, importance: Important
Finished experiment! Elapsed time: 0.5224804878234863, total Elapsed time: 1.575862169265747, Algorithm: Gradient boosted decision tree Type: Sudden Drift, Dropped variables: None, magnitude: Small, Drift time: Unobserved, importance: Important
Finished experiment! Elapsed time: 0.5257503986358643, total Elapsed time: 2.102609872817993, Algorithm: Gradient boosted decision tree Type: Sudden Drift, Dropped variables: None, magnitude: Small, Drift time: Fully observed, importance: Medium
Finished e

In [23]:
results = thesis.resultsToDF()

In [24]:
results[["Algorithm","SMAPE", "Drift type", "Drift time", "Drift magnitude"]].groupby(["Algorithm","Drift type", "Drift time", "Drift magnitude"]).aggregate(["mean", "std", "min", "max"])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,SMAPE,SMAPE,SMAPE,SMAPE
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mean,std,min,max
Algorithm,Drift type,Drift time,Drift magnitude,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Gradient boosted decision tree,Incremental Drift,Fully observed,Large,5.358333,3.079078,2.17,10.19
Gradient boosted decision tree,Incremental Drift,Fully observed,Small,2.69,0.567498,2.12,3.56
Gradient boosted decision tree,Incremental Drift,Half observed,Large,10.680833,8.076255,2.23,21.75
Gradient boosted decision tree,Incremental Drift,Half observed,Small,3.324167,1.538113,2.14,6.17
Gradient boosted decision tree,Incremental Drift,Unobserved,Large,8.833333,6.307863,2.16,17.43
Gradient boosted decision tree,Incremental Drift,Unobserved,Small,3.0475,1.166768,2.13,5.18
Gradient boosted decision tree,No Drift,No Drift,No Drift,2.15,0.040825,2.11,2.19
Gradient boosted decision tree,Sudden Drift,Fully observed,Large,4.6725,2.259087,2.17,7.74
Gradient boosted decision tree,Sudden Drift,Fully observed,Small,2.635,0.466096,2.08,3.45
Gradient boosted decision tree,Sudden Drift,Half observed,Large,4.0325,2.386058,2.14,9.92


In [10]:
results.sort_values("SMAPE", ascending=False)   

Unnamed: 0,Algorithm,Dropped variable,Drift type,Drift magnitude,Variable importance,Drift time,MSE,SMAPE
298,Linear regression,6.0,Sudden Drift,Large,Important,Unobserved,23994.038881,127.58
175,Linear regression,,Sudden Drift,Large,Important,Unobserved,13951.542613,114.69
257,Linear regression,3.0,Sudden Drift,Large,Important,Unobserved,13699.533142,113.60
315,Linear regression,6.0,Incremental Drift,Large,Important,Half observed,24929.315174,109.27
274,Linear regression,3.0,Incremental Drift,Large,Important,Half observed,14701.285495,99.51
...,...,...,...,...,...,...,...,...
47,Gradient boosted decision tree,0.0,Sudden Drift,Small,Unimportant,Fully observed,11.920706,2.13
88,Gradient boosted decision tree,3.0,Sudden Drift,Small,Unimportant,Fully observed,11.576972,2.13
36,Gradient boosted decision tree,,No Drift,No Drift,No Drift,No Drift,11.239483,2.10
77,Gradient boosted decision tree,0.0,No Drift,No Drift,No Drift,No Drift,11.268753,2.09
