In [20]:
import numpy as np
import pandas as pd
from catboost import CatBoostRegressor

from ts_package.change_point import ChangePointDetector

In [24]:
import warnings


warnings.filterwarnings('ignore')

%load_ext autoreload
%autoreload 2

In [5]:
model = CatBoostRegressor().load_model('../data/artifacts/catboost_regressor.cbm')
model.get_params()

{'min_data_in_leaf': 82,
 'random_strength': 0.5023749544,
 'depth': 4,
 'random_seed': 777,
 'loss_function': 'RMSE',
 'iterations': 1460,
 'verbose': 100}

In [38]:
X_train = pd.read_csv('../data/train.csv', parse_dates=['Date'], index_col='Date')
y_train = pd.read_csv('../data/train_y.csv', parse_dates=['Date'], index_col='Date')['Balance']
X_test = pd.read_csv('../data/test.csv', parse_dates=['Date'], index_col='Date')
X_train.head()

Unnamed: 0_level_0,Налог на прибыль (при уплате только ежеквартальных авансовых платежей),Зарплаты и авансы (не позднее 15 и 30 числа каждого месяца),Foreign Exchange Reserves: actual,Retail Sales YoY: forecastCorrection,Markit Services PMI: actual,Business Confidence: forecastCorrection,BZ=F,IMOEX.ME,Key_rate,Euro,Dollar,Gold,Balance lag 1,Balance lag 2,Balance lag 3,Balance lag 7,Balance lag 20,Balance lag 28,weekday,quarter,is_weekend
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-09,0.0,0.6,390.6,-1.1,58.4,-4.0,54.939999,2211.25,10.0,63.0176,59.8495,2264.82,,,,,,,0,1,0
2017-01-10,0.0,0.666667,390.6,-1.1,58.4,-4.0,53.639999,2237.48999,10.0,63.5227,59.9672,2280.67,-0.155904,,,,,,1,1,0
2017-01-11,0.0,0.733333,390.6,-1.1,58.4,-4.0,55.099998,2218.610107,10.0,63.3733,60.143,2297.0,-0.125572,-0.155904,,,,,2,1,0
2017-01-12,0.0,0.8,390.6,-1.1,58.4,-4.0,56.009998,2212.0,10.0,63.1847,59.3784,2308.2,0.007767,-0.125572,-0.155904,,,,3,1,0
2017-01-13,0.0,0.866667,390.6,-1.1,58.4,-4.0,55.450001,2195.189941,10.0,63.1565,59.3602,2283.58,0.800391,0.007767,-0.125572,,,,4,1,0


Симулируем работу на новых данных:

In [44]:
# Получаем последнее состояние детектора разладок
currunt_state_observations_size = 21
probable_change_point_area_size = 7
window_size = currunt_state_observations_size + probable_change_point_area_size

detector = ChangePointDetector(
    currunt_state_observations_size=currunt_state_observations_size,
    probable_change_point_area_size=probable_change_point_area_size,
    threshold_coef=3
)
detected_train_y = detector.detect_periods(y_train.values)
detected_state = detected_train_y[-1]
print('Last detected state of change point detector:', last_detected_state)

# Симулируем работу на тестовой out of time выборке
for forecast_horizon in np.arange(0, X_test.shape[0], window_size)[1:]:
    forecast_horizon_data = X_test.iloc[:forecast_horizon, :]
    forecast = pd.Series(model.predict(forecast_horizon_data), index=forecast_horizon_data.index)
    full_time_series = pd.concat(
        (
            y_train,
            forecast
        ),
        ignore_index=False
    )
    detected_full_time_series = detector.detect_periods(full_time_series.values)
    previous_detected_state = detected_state
    detected_state = detected_full_time_series[-1]
    print(f'For forecast horizon = {forecast_horizon} detected state is {detected_state}')
    if detected_state != previous_detected_state:
        print('Detected change point!')
        print('Suggest to make recalibration!')
else:
    forecast_horizon_data = X_test
    forecast = pd.Series(model.predict(forecast_horizon_data), index=forecast_horizon_data.index)
    full_time_series = pd.concat(
        (
            y_train,
            forecast
        ),
        ignore_index=False
    )
    detected_full_time_series = detector.detect_periods(full_time_series.values)
    previous_detected_state = detected_state
    detected_state = detected_full_time_series[-1]
    print(f'For forecast horizon = FULL detected state is {detected_state}')
    if detected_state != previous_detected_state:
        print('Detected change point!')
        print('Suggest to make recalibration!')

Last detected state of change point detector: 1.0
For forecast horizon = 28 detected state is 1.0
For forecast horizon = 56 detected state is 1.0
For forecast horizon = 84 detected state is 1.0
For forecast horizon = FULL detected state is 1.0
