In [1]:
import warnings

import numpy as np
import pandas as pd
from darts.models.forecasting.nhits import NHiTSModel
from darts import TimeSeries
import torch
from typing import Callable
from tqdm import tqdm

from helpers import predict, load_agent, quality, clip
from preprocess import preprocess_stats
from rl.sim_enviroment import SimulatedCustomEnv

from evidently.report import Report
from evidently.metric_preset import DataDriftPreset
from evidently.options import DataDriftOptions

The `LightGBM` module could not be imported. To enable LightGBM support in Darts, follow the detailed instructions in the installation guide: https://github.com/unit8co/darts/blob/master/INSTALL.md
The `Prophet` module could not be imported. To enable Prophet support in Darts, follow the detailed instructions in the installation guide: https://github.com/unit8co/darts/blob/master/INSTALL.md
The `CatBoost` module could not be imported. To enable CatBoost support in Darts, follow the detailed instructions in the installation guide: https://github.com/unit8co/darts/blob/master/INSTALL.md


In [4]:
df = pd.read_csv('drift_scores_rewards_all.csv', index_col=0)

In [5]:
df[df.mom_reward_max >= 0]

Unnamed: 0,cell_id,drift_score,quality_avg,quality_min,quality_max,quality_std,cum_reward_avg,cum_reward_max,cum_reward_std,mom_reward_avg,mom_reward_min,mom_reward_max,mom_reward_std
7,"(13312,)",1.0,0.827806,0.683533,1.014011,0.051522,-1488446.0,5680,1681490.0,-9521.92953,-24235,155,7988.393527
8,"(13311,)",1.0,0.766681,0.637207,1.072541,0.050251,-523652.9,15,449387.8,-2489.77349,-4475,10,1283.988759
15,"(22975,)",0.875,0.85106,0.708378,0.965337,0.028175,-560112.9,760,516105.7,-2929.211864,-6100,65,1796.577395
43,"(24233,)",1.0,0.779225,0.680422,1.3894,0.045879,-2962463.0,95,2776694.0,-15662.177966,-32200,15,9789.926311
102,"(9737,)",0.875,0.836526,0.688055,1.260287,0.039131,-472372.7,310,458104.5,-2602.398305,-5740,35,1746.029977
192,"(42857,)",0.875,0.901327,0.631919,1.170445,0.036514,-1261592.0,15,1248601.0,-7061.771186,-15505,10,4867.344108
221,"(8916,)",0.875,0.86885,0.690507,0.966925,0.03412,-1782831.0,30,1786634.0,-10402.563667,-24855,15,7679.013739
248,"(42856,)",0.875,0.925625,0.667989,1.060651,0.031276,-2937212.0,5,2752871.0,-15626.655348,-32880,5,9852.050424
249,"(42855,)",0.875,0.889259,0.685598,1.073196,0.036912,-1147484.0,0,1169676.0,-6754.558574,-16105,0,5021.314465
262,"(41872,)",0.875,0.802087,0.709027,1.287416,0.05567,-3419002.0,5,3045949.0,-17262.640068,-33845,5,9851.629123


In [24]:
leaned_cells = [int(i.split(',')[0][1:]) for i in df[df.mom_reward_max >= 0].cell_id]

In [19]:
data = pd.read_csv('data/dataset_full.csv', index_col=0)


In [25]:
data[data['Cell ID'].isin(leaned_cells)]

Unnamed: 0,Cell ID,DATA,Number of Available\nTCH,HR Usage Rate,"TCH Blocking Rate, BH","TCH Traffic (Erl), BH",Param 1,Param 2
36,755,2020-10-02,1.88,97.31,1.16,16.21,12,20
38,756,2020-10-02,3.88,92.37,0.00,19.04,12,20
48,13311,2020-10-02,6.00,98.46,0.13,34.06,85,97
49,13312,2020-10-02,6.00,85.87,0.00,26.85,5,35
87,723,2020-10-02,6.12,95.03,0.03,49.65,86,97
...,...,...,...,...,...,...,...,...
896,51357,2022-05-22,20.00,100.00,0.00,11.73,70,90
915,7905,2022-05-22,13.00,25.00,0.00,4.92,12,21
917,7906,2022-05-22,13.00,57.00,0.00,5.09,12,21
919,7907,2022-05-22,20.00,54.00,0.00,10.77,12,21


In [29]:
data.columns

Index(['Cell ID', 'DATA', 'Number of Available\nTCH', 'HR Usage Rate',
       'TCH Blocking Rate, BH', 'TCH Traffic (Erl), BH', 'Param 1', 'Param 2'],
      dtype='object')

In [30]:
cols = ['Number of Available\nTCH', 'HR Usage Rate',
       'TCH Blocking Rate, BH', 'TCH Traffic (Erl), BH', 'Param 1', 'Param 2']
reff = data[data['Cell ID'].isin(leaned_cells)][cols]

In [31]:
reff

Unnamed: 0,Number of Available\nTCH,HR Usage Rate,"TCH Blocking Rate, BH","TCH Traffic (Erl), BH",Param 1,Param 2
36,1.88,97.31,1.16,16.21,12,20
38,3.88,92.37,0.00,19.04,12,20
48,6.00,98.46,0.13,34.06,85,97
49,6.00,85.87,0.00,26.85,5,35
87,6.12,95.03,0.03,49.65,86,97
...,...,...,...,...,...,...
896,20.00,100.00,0.00,11.73,70,90
915,13.00,25.00,0.00,4.92,12,21
917,13.00,57.00,0.00,5.09,12,21
919,20.00,54.00,0.00,10.77,12,21


In [41]:
drift_scores = []

for cell in tqdm(data['Cell ID'].unique()):
    cell_data = data[data['Cell ID'] == cell][cols]

    data_drift_report = Report(metrics=[
        DataDriftPreset(),
    ])

    n = min(len(cell_data), len(reff), 1000)

    data_drift_report.run(reference_data=reff.sample(n=n), current_data=cell_data.sample(n=n),)
    drift = data_drift_report.as_dict()['metrics'][0]['result']['share_of_drifted_columns']

    drift_scores.append((cell, drift))

100%|██████████| 1043/1043 [03:00<00:00,  5.77it/s]


In [39]:
drift_scores = pd.DataFrame(drift_scores)
drift_scores

Unnamed: 0,0,1
0,25771,0.833333
1,25772,0.833333
2,25773,0.833333
3,3361,1.000000
4,3363,0.833333
...,...,...
1038,8521,1.000000
1039,8522,1.000000
1040,8523,0.833333
1041,13322,0.833333


In [38]:
drift_scores.describe()

Unnamed: 0,0,1
count,1043.0,1043.0
mean,14699.149569,0.849952
std,12028.075917,0.052051
min,701.0,0.666667
25%,5627.0,0.833333
50%,10942.0,0.833333
75%,24232.5,0.833333
max,51357.0,1.0


In [42]:
drift_scores = pd.DataFrame(drift_scores)
drift_scores

Unnamed: 0,0,1
0,25771,1.000000
1,25772,0.833333
2,25773,0.833333
3,3361,1.000000
4,3363,1.000000
...,...,...
1038,8521,1.000000
1039,8522,0.833333
1040,8523,0.833333
1041,13322,0.833333


In [43]:
drift_scores.describe()

Unnamed: 0,0,1
count,1043.0,1043.0
mean,14699.149569,0.948386
std,12028.075917,0.077786
min,701.0,0.666667
25%,5627.0,0.833333
50%,10942.0,1.0
75%,24232.5,1.0
max,51357.0,1.0


In [47]:
data_drift_report.as_pandas()['DatasetDriftMetric']

Unnamed: 0,type,drift_share,number_of_columns,number_of_drifted_columns,share_of_drifted_columns,dataset_drift
0,evidently.metrics.data_drift.dataset_drift_met...,0.5,6,5,0.833333,True
