In [1]:
from DataGenerator import TimeSeriesGenerator
from conditions import scenarios
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from grouped_series import ExperimentTracker
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer, make_column_selector
from sklearn.pipeline import Pipeline
from TimeSeriesGradientBoosting import TimeSeriesGradientBoosting
from xgboost import XGBRegressor
from sklearn.linear_model import SGDRegressor, LinearRegression

In [2]:
onehot1 = ColumnTransformer([('one_hot_encoder', OneHotEncoder(
), make_column_selector(dtype_include=object))], remainder=StandardScaler())

onehot2 = ColumnTransformer([('one_hot_encoder', OneHotEncoder(
), make_column_selector(dtype_include=object))], remainder=StandardScaler())

hybrid_vars = [0,1, 2, 3, 4, 5, 6, 7, -3, -2, -1]
hybrid_model = TimeSeriesGradientBoosting(model1 = LinearRegression(), model2 = XGBRegressor(), model1_variables = hybrid_vars)

hybrid_xgb_pipe = Pipeline([
    ('preprocessor', onehot1),
    ('regressor', hybrid_model)
])

sgd_pipe = Pipeline([
    ('preprocessor', onehot2),
    ('regressor', SGDRegressor())
])

In [3]:
def full_run():
    products = []
    customers = []
    for i in range(2):
        product = TimeSeriesGenerator(size=365, amountOfVariables=7)
        customer = TimeSeriesGenerator(size=365, amountOfVariables=3)
        products.append(product)
        customers.append(customer)
    thesis = ExperimentTracker(products, customers, scenarios())
    thesis.runExperiment(algorithms=[hybrid_xgb_pipe, sgd_pipe], algorithm_name=["Gradient boosted decision tree", "Linear regression"], online=[False, False])
    return thesis


In [4]:
thesis = full_run()

Finished experiment! Elapsed time: 0.7166211605072021, total Elapsed time: 0.7176151275634766, Algorithm: Gradient boosted decision tree Type: Sudden Drift, Dropped variables: None, magnitude: Small, Drift time: Fully observed, importance: Important
Finished experiment! Elapsed time: 0.6088011264801025, total Elapsed time: 1.3274121284484863, Algorithm: Gradient boosted decision tree Type: Sudden Drift, Dropped variables: None, magnitude: Small, Drift time: Half observed, importance: Important
Finished experiment! Elapsed time: 0.5100016593933105, total Elapsed time: 1.8374137878417969, Algorithm: Gradient boosted decision tree Type: Sudden Drift, Dropped variables: None, magnitude: Small, Drift time: Unobserved, importance: Important
Finished experiment! Elapsed time: 0.513495922088623, total Elapsed time: 2.35090970993042, Algorithm: Gradient boosted decision tree Type: Sudden Drift, Dropped variables: None, magnitude: Small, Drift time: Fully observed, importance: Medium
Finished ex

In [5]:
results = thesis.resultsToDF()

In [6]:
results[["Algorithm","SMAPE", "Drift type", "Drift time", "Drift magnitude"]].groupby(["Algorithm","Drift type", "Drift time", "Drift magnitude"]).aggregate(["mean", "std", "min", "max"])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,SMAPE,SMAPE,SMAPE,SMAPE
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mean,std,min,max
Algorithm,Drift type,Drift time,Drift magnitude,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Gradient boosted decision tree,Incremental Drift,Fully observed,Large,5.678333,2.793251,3.23,9.62
Gradient boosted decision tree,Incremental Drift,Fully observed,Small,3.496667,0.350229,2.91,4.0
Gradient boosted decision tree,Incremental Drift,Half observed,Large,14.508333,10.518592,3.13,28.29
Gradient boosted decision tree,Incremental Drift,Half observed,Small,5.795,2.566251,3.12,9.66
Gradient boosted decision tree,Incremental Drift,Unobserved,Large,12.180833,8.311933,3.19,23.07
Gradient boosted decision tree,Incremental Drift,Unobserved,Small,5.328333,2.07041,3.17,8.43
Gradient boosted decision tree,No Drift,No Drift,No Drift,3.38,0.195107,3.19,3.59
Gradient boosted decision tree,Sudden Drift,Fully observed,Large,5.3825,2.477011,3.08,8.83
Gradient boosted decision tree,Sudden Drift,Fully observed,Small,3.120833,0.253106,2.68,3.44
Gradient boosted decision tree,Sudden Drift,Half observed,Large,5.45,2.084021,3.23,8.36


In [7]:
results.sort_values("SMAPE", ascending=False)   

Unnamed: 0,Algorithm,Dropped variable,Drift type,Drift magnitude,Variable importance,Drift time,MSE,SMAPE
52,Gradient boosted decision tree,0.0,Sudden Drift,Large,Important,Unobserved,2084.583217,32.31
11,Gradient boosted decision tree,,Sudden Drift,Large,Important,Unobserved,2050.824207,32.15
134,Gradient boosted decision tree,6.0,Sudden Drift,Large,Important,Unobserved,1961.504375,31.16
93,Gradient boosted decision tree,3.0,Sudden Drift,Large,Important,Unobserved,1919.339326,30.74
216,Linear regression,0.0,Sudden Drift,Large,Important,Unobserved,1894.468609,29.64
...,...,...,...,...,...,...,...,...
314,Linear regression,6.0,Incremental Drift,Large,Important,Fully observed,30.650648,2.82
255,Linear regression,3.0,Sudden Drift,Large,Important,Fully observed,30.319409,2.82
296,Linear regression,6.0,Sudden Drift,Large,Important,Fully observed,30.282155,2.81
3,Gradient boosted decision tree,,Sudden Drift,Small,Medium,Fully observed,16.530015,2.78


Next up is changepoint/drift detection!
Let's start with doing it on a single series and then ramp up complexity to multivariate cases

In [8]:
from kats.detectors.bocpd import BOCPDetector, BOCPDModelType, TrendChangeParameters
from kats.consts import TimeSeriesData

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
test_prod = TimeSeriesGenerator(size=365, amountOfVariables=10).toDataFrame()


In [10]:
# Add a time column to test_prod
test_prod["time"] = test_prod.index
test_prod = TimeSeriesData(test_prod)


In [11]:
det = BOCPDetector(test_prod)
changepoints = det.detector(model=BOCPDModelType.NORMAL_KNOWN_MODEL)







TypeError: ufunc 'add' output (typecode 'O') could not be coerced to provided output parameter (typecode 'd') according to the casting rule ''same_kind''