In [98]:
import warnings

import numpy as np
import pandas as pd
from darts.models.forecasting.nhits import NHiTSModel
from darts import TimeSeries
import torch
from typing import Callable, List, Optional, Dict, Union

from evidently.test_preset import DataDriftTestPreset
from evidently.test_suite import TestSuite
from tqdm import tqdm

from helpers import predict, load_agent, quality, clip
from preprocess import preprocess_stats
from rl.sim_enviroment import SimulatedCustomEnv

from evidently.report import Report
from evidently.metric_preset import DataDriftPreset
from evidently.options import DataDriftOptions

# Drift

In [90]:
stat_tests = [
    'ks', # <= 1000 Kolmogorov–Smirnov
    'wasserstein', # > 1000 Wasserstein distance (normed)
    'kl_div', # Kullback-Leibler divergence
    'psi', # Population Stability Index
    'jensenshannon',  #  > 1000 Jensen-Shannon distance
    # 'anderson', # Anderson-Darling test
    'cramer_von_mises', # Cramer-Von-Mises test
    'hellinger', # Hellinger Distance (normed)
    'mannw', # Mann-Whitney U-rank test
    'ed', # Energy distance
    # 'es', # Epps-Singleton tes
    't_test', # T-Test
    'emperical_mmd', # Emperical-MMD
]

In [99]:
class DriftCalculator:
    def __init__(self, drift_metrics: List[Union[str, Callable]], report_names: List[str]):
        self.drift_stat_tests = drift_metrics
        self.reports: List[Report] = []
        self.reports_names = report_names
        self.reference = pd.DataFrame()
        self.fi: Optional[Dict[str, float]] = None

        self._renew_reports()

    def _renew_reports(self):
        self.reports: List[Report] = [Report(metrics=[DataDriftPreset(num_stattest=t),]) for t in self.drift_stat_tests]

    def set_reference(self, new_ref: pd.DataFrame) -> None:
        self.reference = new_ref

    def set_fi(self, fi: Dict[str, float]):
        self.fi = fi

    def get_drift(self,
                  current_data: pd.DataFrame,
                  reference_data: Optional[pd.DataFrame] = None,
                  sample: bool =False,
                  weighted: bool = False) -> Dict[str, float]:
        reff = self.reference

        if reference_data is not None:
            reff = reference_data

        scores = {}
        for report, report_name in zip(self.reports, self.reports_names):
            n_ref = len(reff)
            n_cur = len(current_data)

            if sample:
                n = min(len(current_data), len(reff), 1000)
                n_ref = n
                n_cur = n

            report.run(reference_data=reff.sample(n=n_ref), current_data=current_data.sample(n=n_cur),)
            drift_score = report.as_dict()['metrics'][0]['result']['share_of_drifted_columns']

            drift_statuses = { k: v['drift_detected'] for k,v in report.as_dict()['metrics'][1]['result']['drift_by_columns'].items()}

            if weighted:
                drift_score = 0
                for column, status in drift_statuses:
                    drift_score += int(status) * self.fi[column]
                drift_score /= sum(self.fi.values())

            scores[report_name] = drift_score

        self._renew_reports()

        return scores

## Data

In [26]:
def preprocess(data: pd.DataFrame):
    df = data.copy()
    cols = ['HR Usage Rate', 'TCH Blocking Rate, BH', 'Number of Available\nTCH',
               'TCH Traffic (Erl), BH', 'Lower_limit', 'Upper_limit']
    df.drop(columns='DATA', inplace=True, errors='ignore')
    df.rename(columns={'Param 1': cols[-2], 'Param 2': cols[-1]}, inplace=True)

    df = df[cols].drop(columns=['Cell ID', 'LAC'], errors='ignore')
    df.rename_axis(None, axis=1, inplace=True)
    df.reset_index(drop=True, inplace=True)

    return df

In [27]:
train_df = pd.read_csv('data/new_full_param_data.csv', index_col=0)
test_df = pd.read_csv('data/test_data.csv', index_col=0)

In [41]:
preprocess(train_df)

Unnamed: 0,HR Usage Rate,"TCH Blocking Rate, BH",Number of Available\nTCH,"TCH Traffic (Erl), BH",Lower_limit,Upper_limit
0,85.03,0.0,2.0,2.19,36.0,39.0
1,86.20,0.0,2.0,3.07,36.0,39.0
2,85.11,0.0,2.0,2.26,36.0,39.0
3,72.78,0.0,2.0,3.29,36.0,39.0
4,70.09,0.0,2.0,2.81,36.0,39.0
...,...,...,...,...,...,...
274869,44.00,0.0,13.0,2.53,26.0,43.0
274870,22.00,0.0,13.0,1.86,26.0,43.0
274871,24.00,0.0,13.0,1.68,26.0,43.0
274872,24.00,0.0,13.0,1.68,26.0,43.0


In [42]:
test_df

Unnamed: 0,Cell ID,DATA,Number of Available\nTCH,HR Usage Rate,"TCH Blocking Rate, BH","TCH Traffic (Erl), BH",Param 1,Param 2
0,25771,2023-01-02,13,96.0,0.00,5.47,46,54
1,25772,2023-01-02,13,75.0,0.00,5.34,25,43
2,25773,2023-01-02,21,72.0,0.00,7.71,27,47
3,3361,2023-01-02,21,49.0,0.00,9.02,20,30
4,3362,2023-01-02,6,92.0,0.00,1.38,11,21
...,...,...,...,...,...,...,...,...
193066,12746,2023-03-28,12,52.0,0.00,1.26,8,16
193067,12747,2023-03-28,12,50.0,0.00,1.37,41,49
193068,12781,2023-03-28,12,100.0,0.00,3.70,40,60
193069,12782,2023-03-28,12,99.0,0.00,4.92,40,60


In [None]:
{'col'}

## Test

In [100]:
# reports = [Report(metrics=[DataDriftPreset(num_stattest=t),]) for t in stat_tests]

drift_calc = DriftCalculator(stat_tests, stat_tests)

In [34]:
drift_calc.get_drift(current_data=train_df.iloc[5_000:10_000], reference_data=train_df.iloc[:10_000])


p-value floored: true value smaller than 0.001


p-value floored: true value smaller than 0.001


p-value floored: true value smaller than 0.001


p-value floored: true value smaller than 0.001


p-value floored: true value smaller than 0.001


p-value floored: true value smaller than 0.001


p-value floored: true value smaller than 0.001


p-value floored: true value smaller than 0.001



{'ks': 1.0,
 'wasserstein': 1.0,
 'kl_div': 1.0,
 'psi': 1.0,
 'jensenshannon': 1.0,
 'anderson': 1.0,
 'cramer_von_mises': 1.0,
 'hellinger': 1.0,
 'mannw': 1.0,
 'ed': 1.0,
 'es': 1.0,
 't_test': 1.0,
 'emperical_mmd': 1.0}

In [102]:
%%time
ref = train_df.iloc[:5_000]
cur = train_df.iloc[8_000:10_000]
# add ref as ground destribution
cur = pd.concat([ref.sample(n=len(ref) - len(cur)), cur])
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    print(drift_calc.get_drift(current_data=cur, reference_data=ref, sample=True))

{'ks': 0.8333333333333334, 'wasserstein': 0.5, 'kl_div': 0.3333333333333333, 'psi': 0.6666666666666666, 'jensenshannon': 0.5, 'cramer_von_mises': 0.8333333333333334, 'hellinger': 0.16666666666666666, 'mannw': 0.0, 'ed': 0.8333333333333334, 't_test': 0.8333333333333334, 'emperical_mmd': 0.8333333333333334}
CPU times: user 1min 11s, sys: 10.6 s, total: 1min 22s
Wall time: 1min 3s


In [96]:
{'ks': 0.5, 'wasserstein': 0.16666666666666666, 'kl_div': 0.0, 'psi': 0.16666666666666666, 'jensenshannon': 0.3333333333333333, 'cramer_von_mises': 0.5, 'hellinger': 0.0, 'mannw': 0.16666666666666666, 'ed': 0.8333333333333334, 't_test': 0.0, 'emperical_mmd': 0.3333333333333333}

Help on method run in module evidently.report.report:

run(*, reference_data: Optional[pandas.core.frame.DataFrame], current_data: pandas.core.frame.DataFrame, column_mapping: Optional[evidently.pipeline.column_mapping.ColumnMapping] = None) -> None method of evidently.report.report.Report instance



In [53]:
train_df

Unnamed: 0,HR Usage Rate,"TCH Blocking Rate, BH",Number of Available\nTCH,"TCH Traffic (Erl), BH",Param 1,Param 2
0,85.03,0.0,2.0,2.19,36.0,39.0
1,86.20,0.0,2.0,3.07,36.0,39.0
2,85.11,0.0,2.0,2.26,36.0,39.0
3,72.78,0.0,2.0,3.29,36.0,39.0
4,70.09,0.0,2.0,2.81,36.0,39.0
...,...,...,...,...,...,...
274869,44.00,0.0,13.0,2.53,26.0,43.0
274870,22.00,0.0,13.0,1.86,26.0,43.0
274871,24.00,0.0,13.0,1.68,26.0,43.0
274872,24.00,0.0,13.0,1.68,26.0,43.0


# Recreation

In [2]:
import pandas as pd

df_drift = pd.read_csv('data/generated/drift_scores_rewards_new_agent_data.csv', index_col=0)

In [20]:
df_drift.iloc[:, 1:].corr()

Unnamed: 0,drift_score,quality_avg,quality_min,quality_max,quality_std,cum_reward_avg,cum_reward_max,cum_reward_std,mom_reward_avg,mom_reward_min,mom_reward_max,mom_reward_std
drift_score,1.0,-0.47164,-0.708299,-0.067496,0.596059,-0.499947,-0.843222,0.156023,-0.474723,-0.690755,-0.024553,0.632465
quality_avg,-0.47164,1.0,0.724102,0.412364,-0.889875,0.884463,0.646852,-0.798368,0.955415,0.687823,0.155274,-0.899913
quality_min,-0.708299,0.724102,1.0,0.062546,-0.904919,0.704572,0.774727,-0.480986,0.678135,0.946917,0.028832,-0.896215
quality_max,-0.067496,0.412364,0.062546,1.0,-0.083369,0.057209,0.092393,-0.026939,0.426063,0.050618,0.328023,-0.150275
quality_std,0.596059,-0.889875,-0.904919,-0.083369,1.0,-0.862622,-0.735547,0.722047,-0.829046,-0.857725,-0.060532,0.97921
cum_reward_avg,-0.499947,0.884463,0.704572,0.057209,-0.862622,1.0,0.70596,-0.897436,0.901052,0.692539,0.006831,-0.875675
cum_reward_max,-0.843222,0.646852,0.774727,0.092393,-0.735547,0.70596,1.0,-0.334839,0.662403,0.761788,0.046269,-0.787753
cum_reward_std,0.156023,-0.798368,-0.480986,-0.026939,0.722047,-0.897436,-0.334839,1.0,-0.803681,-0.470792,0.01377,0.704857
mom_reward_avg,-0.474723,0.955415,0.678135,0.426063,-0.829046,0.901052,0.662403,-0.803681,1.0,0.666113,0.192112,-0.868089
mom_reward_min,-0.690755,0.687823,0.946917,0.050618,-0.857725,0.692539,0.761788,-0.470792,0.666113,1.0,0.020013,-0.876983


In [21]:
df_drift

Unnamed: 0,cell_id,drift_score,quality_avg,quality_min,quality_max,quality_std,cum_reward_avg,cum_reward_max,cum_reward_std,mom_reward_avg,mom_reward_min,mom_reward_max,mom_reward_std
0,"(12433,)",0.875,0.986810,0.815285,0.994834,0.021982,459.305773,1326.846882,471.739451,7.172145,-209.578655,10.0,22.465573
1,"(12432,)",1.000,0.968164,0.611951,0.994660,0.055229,-53.044192,430.000000,292.859884,-3.600677,-221.913229,10.0,48.416792
2,"(12431,)",1.000,0.982890,0.778722,0.994640,0.030055,556.500134,973.029266,283.586535,4.202350,-210.752173,10.0,28.206538
3,"(701,)",1.000,0.973765,0.784269,0.991803,0.022313,62.500349,290.000000,113.271976,-0.513965,-214.569595,10.0,21.089389
4,"(12745,)",0.875,0.986498,0.944354,0.997024,0.007626,798.967391,1600.000000,450.224618,8.695652,-20.000000,10.0,6.134626
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1018,"(7922,)",1.000,0.969121,0.448388,0.995826,0.070234,43.527431,350.000000,143.378173,-0.168758,-281.494361,10.0,33.542313
1019,"(26932,)",0.875,0.955532,0.947500,0.977140,0.010024,45.000000,80.000000,24.494897,10.000000,10.000000,10.0,0.000000
1020,"(26934,)",1.000,0.466225,0.319652,0.685985,0.157964,-884.914643,-71.152060,654.797701,-236.773705,-421.764061,10.0,154.537568
1021,"(26931,)",1.000,0.921952,0.893379,0.951869,0.021630,22.500000,50.000000,19.086270,-1.250000,-20.000000,10.0,15.526475


In [2]:
train_data = pd.read_csv('data/new_full_param_data.csv', index_col=0)
train_data

Unnamed: 0,HR Usage Rate,"TCH Blocking Rate, BH",Number of Available\nTCH,"TCH Traffic (Erl), BH",Param 1,Param 2
0,85.03,0.0,2.0,2.19,36.0,39.0
1,86.20,0.0,2.0,3.07,36.0,39.0
2,85.11,0.0,2.0,2.26,36.0,39.0
3,72.78,0.0,2.0,3.29,36.0,39.0
4,70.09,0.0,2.0,2.81,36.0,39.0
...,...,...,...,...,...,...
274869,44.00,0.0,13.0,2.53,26.0,43.0
274870,22.00,0.0,13.0,1.86,26.0,43.0
274871,24.00,0.0,13.0,1.68,26.0,43.0
274872,24.00,0.0,13.0,1.68,26.0,43.0


In [None]:
data = pd.read_csv('data/')

In [None]:
reff = train_data
drift_scores = []

for cell in tqdm(data['Cell ID'].unique()):
    cell_data = data[data['Cell ID'] == cell][cols]


    data_drift_report = Report(metrics=[
        DataDriftPreset(),
    ])

    n = min(len(cell_data), len(reff), 1000)

    data_drift_report.run(reference_data=reff.sample(n=n), current_data=cell_data.sample(n=n),)
    drift = data_drift_report.as_dict()['metrics'][0]['result']['share_of_drifted_columns']

    drift_scores.append((cell, drift))

In [3]:

data_drift_report = Report(metrics=[
    DataDriftPreset(),
])

data_drift_report.run(reference_data=train_data, current_data=train_data.iloc[:10_000],)

In [22]:
sum({ k: v['drift_detected'] for k,v in data_drift_report.as_dict()['metrics'][1]['result']['drift_by_columns'].items()}.values())

6

In [5]:
data_drift_test_suite = TestSuite(tests=[
   DataDriftTestPreset(),
])

data_drift_test_suite.run(reference_data=train_data, current_data=train_data.iloc[:10_000])
data_drift_test_suite

In [14]:
data_drift_test_suite.as_dict()

{'tests': [{'name': 'Share of Drifted Columns',
   'description': 'The drift is detected for 100% features (6 out of 6). The test threshold is lt=0.3',
   'status': 'FAIL',
   'group': 'data_drift',
   'parameters': {'condition': {'lt': 0.3},
    'features': {'HR Usage Rate': {'stattest': 'Wasserstein distance (normed)',
      'score': 0.873,
      'threshold': 0.1,
      'detected': True},
     'Number of Available\nTCH': {'stattest': 'Wasserstein distance (normed)',
      'score': 0.195,
      'threshold': 0.1,
      'detected': True},
     'Param 1': {'stattest': 'Wasserstein distance (normed)',
      'score': 0.33,
      'threshold': 0.1,
      'detected': True},
     'Param 2': {'stattest': 'Wasserstein distance (normed)',
      'score': 0.375,
      'threshold': 0.1,
      'detected': True},
     'TCH Blocking Rate, BH': {'stattest': 'Wasserstein distance (normed)',
      'score': 0.135,
      'threshold': 0.1,
      'detected': True},
     'TCH Traffic (Erl), BH': {'stattest': '

In [15]:
data_drift_report.show()

# Review cell agent rewards

In [40]:
ds_test = pd.read_csv('data/generated/drift_scores_rewards_new_agent_train-test_no_sample.csv', index_col=0)
ds_train = pd.read_csv('data/generated/drift_scores_rewards_new_agent_train-train_no_sample.csv', index_col=0)

## Test

In [41]:
ds_test

Unnamed: 0,cell_id,drift_score,quality_avg,quality_min,quality_max,quality_std,cum_reward_avg,cum_reward_max,cum_reward_std,mom_reward_avg,mom_reward_min,mom_reward_max,mom_reward_std
0,12083,1.0,0.938013,0.788178,0.966074,0.028063,608.948202,1207.645230,316.178354,5.853255,-129.222951,10.0,18.248177
1,12086,1.0,0.959537,0.664859,0.994064,0.050416,-264.134989,350.000000,393.319951,-5.468896,-217.747710,10.0,44.819021
2,12472,1.0,0.920239,0.495793,0.991855,0.097394,-1305.232005,-20.000000,962.661219,-17.577496,-269.207217,10.0,63.255099
3,12471,1.0,0.902759,0.321151,0.996716,0.143830,-1963.455994,50.000000,1389.198569,-26.061547,-398.833829,10.0,83.780603
4,12097,1.0,0.992244,0.888126,0.996904,0.011857,857.826087,1720.000000,497.177445,9.347826,-20.000000,10.0,4.386853
...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,41798,1.0,0.992314,0.982041,0.997012,0.003670,915.000000,1820.000000,526.830143,10.000000,10.000000,10.0,0.000000
93,24461,1.0,0.912222,0.408314,0.995005,0.127392,-1132.487031,105.930617,903.092628,-15.175110,-273.822333,10.0,69.428609
94,752,1.0,0.989371,0.962648,0.994787,0.003382,871.381215,1730.000000,500.035303,9.502762,-20.000000,10.0,3.840753
95,782,1.0,0.982556,0.842098,0.995773,0.027042,413.955080,746.376656,178.660681,3.435510,-199.673491,10.0,32.090957


In [42]:
ds_test.describe()

Unnamed: 0,cell_id,drift_score,quality_avg,quality_min,quality_max,quality_std,cum_reward_avg,cum_reward_max,cum_reward_std,mom_reward_avg,mom_reward_min,mom_reward_max,mom_reward_std
count,97.0,97.0,97.0,97.0,97.0,97.0,97.0,97.0,97.0,97.0,97.0,97.0,97.0
mean,12245.371134,0.974227,0.946243,0.681222,0.992024,0.058301,-367.937847,829.286493,737.95376,-6.197041,-180.101472,9.690722,36.265108
std,10224.8214,0.060573,0.057266,0.239723,0.013812,0.059298,1752.617846,725.729413,842.644393,23.035985,145.2073,3.046038,34.514361
min,721.0,0.833333,0.722807,0.266383,0.870611,0.002085,-6725.652643,-218.856386,75.625585,-116.493282,-491.849328,-20.0,0.0
25%,3371.0,1.0,0.933964,0.462082,0.991906,0.010453,-1049.433023,40.0,317.050025,-14.94257,-273.822333,10.0,7.131916
50%,12084.0,1.0,0.971406,0.741239,0.994964,0.028063,413.95508,773.463507,477.866658,3.767686,-217.782818,10.0,23.564447
75%,13315.0,1.0,0.983661,0.915992,0.996341,0.097272,836.630435,1600.0,683.52915,8.695652,-20.0,10.0,62.160016
max,41798.0,1.0,0.992974,0.984201,0.997855,0.22484,925.0,1840.0,4200.012443,10.0,10.0,10.0,126.791858


In [44]:
ds_test[ds_test.columns[1:]].corr()

Unnamed: 0,drift_score,quality_avg,quality_min,quality_max,quality_std,cum_reward_avg,cum_reward_max,cum_reward_std,mom_reward_avg,mom_reward_min,mom_reward_max,mom_reward_std
drift_score,1.0,-0.167015,0.311934,-0.085447,-0.015961,-0.159057,0.07025,0.248586,-0.141625,0.271059,-0.043652,-0.002695
quality_avg,-0.167015,1.0,0.703766,0.433769,-0.894567,0.885869,0.70763,-0.80771,0.967619,0.689232,0.400284,-0.908476
quality_min,0.311934,0.703766,1.0,0.050639,-0.905704,0.71316,0.807121,-0.525844,0.675935,0.960497,0.078483,-0.889497
quality_max,-0.085447,0.433769,0.050639,1.0,-0.141585,0.03187,0.125896,0.002635,0.466241,0.023087,0.901825,-0.208966
quality_std,-0.015961,-0.894567,-0.905704,-0.141585,1.0,-0.884414,-0.796498,0.760906,-0.856382,-0.877951,-0.154389,0.979108
cum_reward_avg,-0.159057,0.885869,0.71316,0.03187,-0.884414,1.0,0.736423,-0.928909,0.871856,0.724348,0.007354,-0.89237
cum_reward_max,0.07025,0.70763,0.807121,0.125896,-0.796498,0.736423,1.0,-0.444502,0.71307,0.780141,0.14817,-0.840019
cum_reward_std,0.248586,-0.80771,-0.525844,0.002635,0.760906,-0.928909,-0.444502,1.0,-0.785478,-0.55658,0.052773,0.74513
mom_reward_avg,-0.141625,0.967619,0.675935,0.466241,-0.856382,0.871856,0.71307,-0.785478,1.0,0.674518,0.491212,-0.888613
mom_reward_min,0.271059,0.689232,0.960497,0.023087,-0.877951,0.724348,0.780141,-0.55658,0.674518,1.0,0.053859,-0.87977


## Train

In [45]:
ds_train

Unnamed: 0,cell_id,drift_score,quality_avg,quality_min,quality_max,quality_std,cum_reward_avg,cum_reward_max,cum_reward_std,mom_reward_avg,mom_reward_min,mom_reward_max,mom_reward_std
0,23001,1.000,0.991352,0.938395,0.996186,0.003894,3904.891165,7720.000000,2247.875782,9.884763,-20.000000,10.0,1.856944
1,12035,1.000,0.979946,0.957076,0.997497,0.005402,3811.280410,7630.000000,2183.794721,9.769526,-20.000000,10.0,2.621048
2,41885,1.000,0.982089,0.961500,0.997387,0.003781,3756.734955,7510.000000,2138.960979,9.615877,-20.000000,10.0,3.375014
3,24924,1.000,0.951347,0.693796,0.997023,0.017881,2987.833345,5841.081703,1715.843416,7.478978,-207.459785,10.0,12.933655
4,26443,1.000,0.988465,0.971459,0.995463,0.002975,3910.000000,7810.000000,2255.995715,10.000000,10.000000,10.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
931,14001,1.000,0.994984,0.986135,0.997998,0.002073,795.000000,1580.000000,457.547812,10.000000,10.000000,10.0,0.000000
932,7922,0.875,0.969371,0.448388,0.995826,0.070248,45.410949,350.000000,142.031835,-0.144611,-281.494361,10.0,33.472770
933,26931,1.000,0.921972,0.893379,0.952390,0.020155,33.750000,60.000000,16.850180,2.500000,-20.000000,10.0,13.887301
934,26932,1.000,0.955071,0.947810,0.977140,0.009811,45.000000,80.000000,24.494897,10.000000,10.000000,10.0,0.000000


In [46]:
ds_train.describe()

Unnamed: 0,cell_id,drift_score,quality_avg,quality_min,quality_max,quality_std,cum_reward_avg,cum_reward_max,cum_reward_std,mom_reward_avg,mom_reward_min,mom_reward_max,mom_reward_std
count,936.0,936.0,936.0,936.0,936.0,936.0,936.0,936.0,936.0,936.0,936.0,936.0,936.0
mean,14881.276709,0.982105,0.957216,0.705592,0.992353,0.047059,127.590007,1292.144373,716.82963,-2.140731,-171.900969,10.0,30.356915
std,12190.306158,0.043803,0.044418,0.218857,0.011448,0.049107,1718.587085,1829.446195,759.542727,17.58114,127.163743,0.0,28.22419
min,701.0,0.875,0.465959,0.24561,0.683136,0.000419,-12431.032016,-275.430973,16.85018,-238.241392,-549.568747,10.0,0.0
25%,5701.75,1.0,0.947116,0.495885,0.991136,0.010146,-464.108877,80.0,286.518314,-6.126009,-259.272352,10.0,8.434877
50%,10921.5,1.0,0.974498,0.766357,0.994176,0.026532,438.641304,881.281271,456.032381,4.255046,-214.581604,10.0,20.881504
75%,24321.25,1.0,0.984593,0.917097,0.995959,0.073211,822.445652,1630.0,736.035923,8.405728,-20.0,10.0,47.417075
max,51357.0,1.0,0.994984,0.991635,0.999542,0.258031,3910.0,7810.0,7420.583521,10.0,10.0,10.0,154.67415


In [47]:
ds_train[ds_train.columns[1:]].corr()

Unnamed: 0,drift_score,quality_avg,quality_min,quality_max,quality_std,cum_reward_avg,cum_reward_max,cum_reward_std,mom_reward_avg,mom_reward_min,mom_reward_max,mom_reward_std
drift_score,1.0,-0.06334,0.310739,-0.032366,-0.086441,-0.017363,0.136341,0.235475,-0.063576,0.267073,,-0.085072
quality_avg,-0.06334,1.0,0.715694,0.427813,-0.883483,0.794856,0.39854,-0.554112,0.951554,0.674919,,-0.895727
quality_min,0.310739,0.715694,1.0,0.072163,-0.899004,0.624013,0.415884,-0.299903,0.666306,0.946219,,-0.89004
quality_max,-0.032366,0.427813,0.072163,1.0,-0.087027,0.115405,0.14119,0.04255,0.436829,0.062312,,-0.15508
quality_std,-0.086441,-0.883483,-0.899004,-0.087027,1.0,-0.772757,-0.432215,0.484711,-0.817623,-0.848623,,0.979277
cum_reward_avg,-0.017363,0.794856,0.624013,0.115405,-0.772757,1.0,0.767955,-0.270453,0.8098,0.608627,,-0.800894
cum_reward_max,0.136341,0.39854,0.415884,0.14119,-0.432215,0.767955,1.0,0.402596,0.399566,0.410317,,-0.479946
cum_reward_std,0.235475,-0.554112,-0.299903,0.04255,0.484711,-0.270453,0.402596,1.0,-0.5671,-0.286097,,0.451921
mom_reward_avg,-0.063576,0.951554,0.666306,0.436829,-0.817623,0.8098,0.399566,-0.5671,1.0,0.650554,,-0.860034
mom_reward_min,0.267073,0.674919,0.946219,0.062312,-0.848623,0.608627,0.410317,-0.286097,0.650554,1.0,,-0.86746


## Recreate