In [202]:
from functools import partial
from pathlib import Path
from typing import List, Dict, Callable

import pandas as pd
import numpy as np
import json

In [204]:
from functools import wraps, WRAPPER_ASSIGNMENTS

try:
    wraps(partial(wraps))(wraps)
except AttributeError:
    @wraps(wraps)
    def wraps(obj, attr_names=WRAPPER_ASSIGNMENTS, wraps=wraps):
        return wraps(obj, assigned=(name for name in attr_names if hasattr(obj, name)))

In [3]:
def get_total_score(x: str) -> float:
    js = json.loads(x)
    size = len(js.items())

    return sum([js[f]['drift_detected'] for f,v in js.items()]) / size

def get_regress_score(x: str) -> float:
    js = json.loads(x)

    # TODO: use different aggregation methods
    return sum([js[f]['drift_score'] for f,v in js.items()])

In [78]:
def get_weighted_aggregation(x: str, fi: Dict, method: Callable = sum):
    js = json.loads(x)

    return method([js[f]['drift_score'] * fi.get(f, 1) for f,v in js.items()])


In [87]:
def get_weighted_thresh_agg(x: str, fi: Dict, method: Callable = sum):
    js = json.loads(x)

    return method([js[f]['drift_score'] * fi.get(f, 1) // js[f]['stattest_threshold'] for f,v in js.items()])


In [220]:
fi = pd.read_csv('data/generated/fi_2020-2023.csv', index_col=0)
fi.rename(index={'Lower_limit': 'Param 1', 'Upper_limit': 'Param 2'}, inplace=True)
fi = fi / fi.max()
fi = fi.to_dict()['importance']
fi

{'HR Usage Rate': 1.0,
 'TCH Blocking Rate, BH': 0.05306688095919128,
 'Number of Available\nTCH': 0.9145249190288962,
 'TCH Traffic (Erl), BH': 0.48950080102867455,
 'Param 1': 0.934843836886342,
 'Param 2': 0.8525678849265464}

In [229]:
get_weighted_thresh_sum = partial(get_weighted_thresh_agg, fi=fi, method=np.average)
get_weighted_thresh_sum.__name__ = 'get_weighted_thresh_sum'

get_thresh_sum = partial(get_weighted_thresh_agg, fi={}, method=np.average)
get_thresh_sum.__name__ = 'get_thresh_sum'

get_weighted_mean = partial(get_weighted_aggregation, fi=fi, method=np.sum)
get_weighted_mean.__name__ = 'get_weighted_mean'

In [216]:
get_weighted_thresh_sum.__name__

'get_weighted_thresh_sum'

In [4]:
rewards = pd.read_csv('data/generated/drift/by_cell_agent/drift_scores_rewards_new_agent_train-test_no_sample20-23.csv', index_col=0)
rewards

Unnamed: 0,cell_id,drift_score,quality_avg,quality_min,quality_max,quality_std,cum_reward_avg,cum_reward_max,cum_reward_std,mom_reward_avg,mom_reward_min,mom_reward_max,mom_reward_std
0,10932,0.875,0.991378,0.973693,0.994549,0.002269,1040.000000,2070.0,598.999165,10.000000,10.0,10.0,0.000000
1,12762,0.875,0.986723,0.790479,0.992018,0.019664,1010.000000,2040.0,598.999165,9.855072,-20.0,10.0,2.085144
2,12781,0.875,0.987593,0.949088,0.992366,0.004489,1040.000000,2070.0,598.999165,10.000000,10.0,10.0,0.000000
3,12782,0.875,0.986304,0.972973,0.992206,0.003253,1040.000000,2070.0,598.999165,10.000000,10.0,10.0,0.000000
4,12783,1.000,0.949899,0.779899,0.991859,0.049240,925.797101,1920.0,558.289010,9.275362,-20.0,10.0,4.617035
...,...,...,...,...,...,...,...,...,...,...,...,...,...
989,5683,0.875,0.992266,0.967622,0.993875,0.002159,800.000000,1590.0,460.434577,10.000000,10.0,10.0,0.000000
990,5682,0.875,0.991784,0.980978,0.994072,0.001335,800.000000,1590.0,460.434577,10.000000,10.0,10.0,0.000000
991,12771,0.875,0.990560,0.978193,0.994569,0.002849,570.000000,1130.0,327.643099,10.000000,10.0,10.0,0.000000
992,12772,1.000,0.968540,0.751103,0.986172,0.028634,550.884956,1100.0,315.678288,9.734513,-20.0,10.0,2.822163


In [154]:
rewards.describe()

Unnamed: 0,cell_id,drift_score,quality_avg,quality_min,quality_max,quality_std,cum_reward_avg,cum_reward_max,cum_reward_std,mom_reward_avg,mom_reward_min,mom_reward_max,mom_reward_std
count,994.0,994.0,994.0,994.0,994.0,994.0,994.0,994.0,994.0,994.0,994.0,994.0,994.0
mean,14601.775654,0.909079,0.985404,0.87405,0.993143,0.015381,982.392422,1989.825568,584.887036,9.735159,-3.908601,10.0,1.624021
std,11994.634963,0.055692,0.010261,0.141991,0.001913,0.020892,144.25108,182.222544,35.048051,0.850586,28.322576,0.0,3.75817
min,701.0,0.875,0.874052,0.379827,0.969809,0.000385,-765.904569,-152.168731,283.425739,-3.487866,-172.640971,10.0,0.0
25%,5071.25,0.875,0.983476,0.820343,0.992224,0.002218,965.126812,1980.0,588.948638,9.855072,-20.0,10.0,0.0
50%,10931.5,0.875,0.988649,0.938276,0.99346,0.006363,1040.0,2070.0,598.999165,10.0,10.0,10.0,0.0
75%,24234.5,1.0,0.991324,0.976942,0.994365,0.019126,1040.0,2070.0,598.999165,10.0,10.0,10.0,2.085144
max,42857.0,1.0,0.993943,0.992224,0.996884,0.156021,1040.0,2070.0,598.999165,10.0,10.0,10.0,39.482828


In [434]:
rows = [c for c in rewards.columns if c not in ['cell_id', 'drift_score']]

# NOT Sampled | no ref window

In [435]:
orig_df = pd.read_csv('data/generated/drift/by_cell_agent/run_7/by_train_regressive__sampled-drift-None_no-window_.csv', index_col=0)
orig_df

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test,cell_id
0,"{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",25771
1,"{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",22944
2,"{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",26335
3,"{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",26332
4,"{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",26336
...,...,...,...,...,...,...,...,...,...,...,...
989,"{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",5683
990,"{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",5682
991,"{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",12772
992,"{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",12771


In [436]:
stat_tests = [c for c in orig_df.columns if c not in ['cell_id']]
cols = [c for c in orig_df.columns if c not in rows + ['cell_id', 'drift_score']]

drift_scores_df = orig_df[['cell_id']].copy()

drift_scores_df[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_total_score, x)))
drift_scores_df

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,0.833333,0.833333,0.833333,0.833333,0.833333,0.833333,1.0,0.833333,0.833333,0.833333
1,22944,0.833333,1.000000,0.833333,0.833333,0.833333,0.833333,1.0,0.833333,0.833333,0.833333
2,26335,1.000000,1.000000,1.000000,0.833333,0.833333,0.833333,1.0,0.833333,1.000000,0.666667
3,26332,1.000000,1.000000,1.000000,0.833333,0.833333,0.833333,1.0,0.833333,1.000000,0.833333
4,26336,0.833333,0.833333,0.833333,0.833333,0.833333,0.833333,1.0,0.833333,0.833333,0.833333
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,0.833333,0.833333,0.833333,0.833333,0.833333,0.833333,1.0,0.833333,0.833333,0.833333
990,5682,0.833333,0.833333,0.833333,0.833333,0.833333,0.833333,1.0,0.833333,0.833333,0.666667
991,12772,1.000000,1.000000,1.000000,0.833333,0.833333,0.833333,1.0,0.833333,1.000000,0.833333
992,12771,0.833333,1.000000,0.833333,0.833333,0.833333,0.833333,1.0,0.833333,0.833333,0.833333


In [450]:
merged = rewards.merge(drift_scores_df, left_on='cell_id', right_on='cell_id', how='inner')
merged.corr().loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,-0.637122,-0.237932,-0.637122,-0.490077,-0.601974,-0.594971,,-0.568248,-0.562093,-0.305755
quality_min,-0.609597,-0.056428,-0.609597,-0.164067,-0.547614,-0.547616,,-0.564737,-0.50225,-0.325514
quality_max,-0.289934,-0.168453,-0.289934,-0.067899,-0.125177,-0.119252,,-0.115056,-0.309426,-0.129502
quality_std,0.642976,0.09376,0.642976,0.266944,0.676731,0.676923,,0.680283,0.531415,0.357839
cum_reward_avg,-0.395991,-0.090844,-0.395991,-0.337963,-0.650346,-0.641243,,-0.646223,-0.318352,-0.293259
cum_reward_max,-0.362477,-0.079346,-0.362477,-0.35704,-0.622673,-0.614825,,-0.607506,-0.291314,-0.26986
cum_reward_std,-0.190325,-0.045423,-0.190325,-0.389688,-0.401941,-0.398805,,-0.366093,-0.15487,-0.135084
mom_reward_avg,-0.412066,-0.100554,-0.412066,-0.373832,-0.685072,-0.677233,,-0.669658,-0.335622,-0.292552
mom_reward_min,-0.507615,-0.092995,-0.507615,-0.148054,-0.58546,-0.574075,,-0.597115,-0.414229,-0.353414
mom_reward_max,,,,,,,,,,


## Regress

In [232]:
drift_regress_df = orig_df[['cell_id']].copy()

drift_regress_df[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_regress_score, x)))
drift_regress_df

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,4.624301,7.704001e-02,4.624301,22.958075,35.822121,3.235612,0.000232,3.521493,18.980594,0.492405
1,22944,8.513001,9.403760e-03,8.513001,27.952476,40.319391,3.606562,0.000232,3.914418,30.671887,0.227054
2,26335,4.499932,1.413220e-29,4.499932,21.171273,32.038256,3.149558,0.000232,3.409220,18.491783,0.834270
3,26332,4.831547,1.235722e-11,4.831547,22.517829,34.787749,3.141581,0.000232,3.376812,19.523089,0.133723
4,26336,5.083292,6.001640e-01,5.083292,24.718289,36.286952,3.366172,0.000232,3.548588,20.341524,0.349846
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,4.143555,1.532900e-01,4.143555,19.642791,27.837917,3.156555,0.000266,3.152360,16.366687,0.319351
990,5682,3.914246,7.217293e-02,3.914246,22.653205,31.603491,3.302213,0.000266,3.133916,14.968536,0.711898
991,12772,5.306576,1.792384e-23,5.306576,26.507881,38.908958,3.585255,0.000317,3.772797,21.300469,0.275323
992,12771,4.427045,3.724132e-02,4.427045,20.072339,27.145024,3.041050,0.000317,2.924274,17.377473,0.493994


In [451]:
merged = rewards.merge(drift_regress_df, left_on='cell_id', right_on='cell_id', how='inner')
merged.corr().loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,0.471126,0.471126,-0.312807,-0.195999,-0.194386,-0.251328,0.376411,-0.320963,-0.255732,0.165047
quality_min,0.274305,0.274305,-0.124886,-0.079814,-0.078085,-0.12982,0.172852,-0.151544,-0.09004,0.140211
quality_max,0.321323,0.321323,-0.443729,-0.279935,-0.273698,-0.304984,0.258963,-0.511766,-0.434838,0.110777
quality_std,-0.290633,-0.290633,0.140916,0.068604,0.066929,0.124389,-0.19539,0.151355,0.085725,-0.17568
cum_reward_avg,0.119125,0.119125,-0.121683,0.011818,0.001602,-0.032761,0.096436,-0.069121,-0.021238,0.185554
cum_reward_max,0.086665,0.086665,-0.07602,0.055559,0.036465,0.01744,0.068441,-0.024708,0.018937,0.16092
cum_reward_std,0.007445,0.007445,0.023873,0.123184,0.08937,0.109717,0.007924,0.056699,0.08664,0.0601
mom_reward_avg,0.147492,0.147492,-0.185318,-0.047611,-0.050396,-0.09567,0.115788,-0.133507,-0.076902,0.198603
mom_reward_min,0.207099,0.207099,-0.172969,-0.091985,-0.090711,-0.142737,0.149511,-0.164839,-0.105582,0.2079
mom_reward_max,,,,,,,,,,


In [452]:
merged.corr(method='kendall').loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,0.296715,0.296715,-0.199532,-0.18029,-0.174925,-0.208958,0.218392,-0.261365,-0.178807,0.070504
quality_min,0.139238,0.139238,-0.075237,-0.066629,-0.067513,-0.088481,0.060397,-0.103037,-0.067302,0.044353
quality_max,0.201837,0.201837,-0.260684,-0.153941,-0.144636,-0.169429,0.15112,-0.322452,-0.242196,0.095321
quality_std,-0.171504,-0.171504,0.075253,0.074078,0.075115,0.096213,-0.084712,0.102907,0.067594,-0.040973
cum_reward_avg,0.06737,0.06737,-0.032906,0.016998,0.006211,-0.013203,0.059845,-0.029208,-0.001634,0.097937
cum_reward_max,0.062335,0.062335,-0.02983,0.02287,0.011399,-0.007001,0.054914,-0.024409,0.001754,0.095863
cum_reward_std,0.049447,0.049447,-0.017572,0.040077,0.026756,0.014783,0.041833,-0.008585,0.010553,0.075573
mom_reward_avg,0.142624,0.142624,-0.121708,-0.076962,-0.078869,-0.107476,0.108329,-0.138401,-0.086325,0.123304
mom_reward_min,0.137326,0.137326,-0.120682,-0.080947,-0.082915,-0.110658,0.104568,-0.140271,-0.087483,0.117375
mom_reward_max,,,,,,,,,,


In [448]:
merged.corr(method='spearman').loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,0.442206,0.442206,-0.308746,-0.266359,-0.25815,-0.309366,0.333467,-0.399266,-0.279376,0.102846
quality_min,0.213028,0.213028,-0.113352,-0.100923,-0.101402,-0.132812,0.101972,-0.156933,-0.101539,0.066528
quality_max,0.306087,0.306087,-0.399548,-0.224842,-0.211464,-0.249401,0.226621,-0.475049,-0.373939,0.149633
quality_std,-0.26157,-0.26157,0.112873,0.113084,0.113887,0.145749,-0.141753,0.156776,0.101579,-0.060014
cum_reward_avg,0.093485,0.093485,-0.04342,0.022577,0.00799,-0.018156,0.082193,-0.040746,-0.001528,0.130301
cum_reward_max,0.085431,0.085431,-0.038677,0.030676,0.015317,-0.009432,0.074844,-0.032689,0.002849,0.126445
cum_reward_std,0.065849,0.065849,-0.022843,0.054104,0.036872,0.019841,0.056788,-0.008502,0.014264,0.100502
mom_reward_avg,0.188594,0.188594,-0.158871,-0.101068,-0.10359,-0.141259,0.142331,-0.181779,-0.113113,0.158887
mom_reward_min,0.174437,0.174437,-0.151795,-0.102666,-0.105229,-0.139787,0.132048,-0.177872,-0.109935,0.146979
mom_reward_max,,,,,,,,,,


## Weighted

In [453]:
df_weighted = orig_df[['cell_id']].copy()

df_weighted[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_weighted_mean, x)))
df_weighted

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,3.977416,4.088273e-03,3.977416,20.452360,32.004431,2.794267,0.000054,3.022653,17.056337,0.026398
1,22944,7.044659,4.990282e-04,7.044659,23.046348,33.491390,2.987221,0.000054,3.242961,26.542095,0.012049
2,26335,3.799549,6.917724e-30,3.799549,18.993445,28.726259,2.727743,0.000054,2.943685,16.232136,0.194804
3,26332,4.064677,6.557590e-13,4.064677,19.940895,30.928673,2.684200,0.000054,2.882820,17.064314,0.009713
4,26336,4.288557,3.184883e-02,4.288557,21.115263,31.305614,2.845763,0.000054,3.024424,17.891876,0.018692
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,3.488157,8.134623e-03,3.488157,17.234414,24.606040,2.685131,0.000062,2.688601,14.475203,0.017136
990,5682,3.272016,3.829992e-03,3.272016,19.933170,27.829984,2.782681,0.000062,2.672843,13.133915,0.399837
991,12772,4.296801,1.639180e-23,4.296801,22.625169,33.255036,2.972496,0.000073,3.164523,18.203419,0.018272
992,12771,3.711831,1.976281e-03,3.711831,17.890696,23.978053,2.554521,0.000073,2.487802,15.204722,0.026221


In [454]:
merged = rewards.merge(df_weighted, left_on='cell_id', right_on='cell_id', how='inner')
merged.corr().loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,-0.218367,0.177167,-0.218367,-0.192197,-0.209108,-0.237964,0.02321,-0.187263,-0.224754,-0.040953
quality_min,-0.055198,0.017541,-0.055198,-0.007003,-0.049867,-0.060698,0.050217,-0.027469,-0.074103,-0.038077
quality_max,-0.440871,0.161293,-0.440871,-0.495184,-0.394346,-0.484783,-0.043007,-0.435226,-0.426809,0.009177
quality_std,0.051539,-0.053727,0.051539,0.002137,0.048255,0.049537,-0.036818,0.018146,0.06914,0.038016
cum_reward_avg,-0.071043,0.078705,-0.071043,-0.024641,-0.04487,-0.041092,-0.229756,0.000779,-0.056655,-0.011181
cum_reward_max,-0.069228,0.08003,-0.069228,-0.024867,-0.044238,-0.036326,-0.420374,0.003255,-0.055142,-0.004983
cum_reward_std,-0.058445,0.067664,-0.058445,-0.029643,-0.037538,-0.024048,-0.699903,-0.004574,-0.04513,0.000916
mom_reward_avg,-0.072226,0.081552,-0.072226,-0.009314,-0.042213,-0.025731,0.029132,0.001893,-0.058037,-0.027096
mom_reward_min,-0.09253,0.066422,-0.09253,-0.035813,-0.073279,-0.074978,0.042849,-0.041277,-0.090398,-0.007448
mom_reward_max,,,,,,,,,,


## Adaptive threshold

In [455]:
df_thresh = orig_df[['cell_id']].copy()

df_thresh[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_thresh_sum, x)))
df_thresh

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,7.000000,0.166667,7.000000,37.833333,59.333333,5.000000,0.0,5.333333,31.166667,1.500000
1,22944,13.500000,0.000000,13.500000,46.000000,66.666667,5.500000,0.0,6.166667,50.666667,0.666667
2,26335,7.000000,0.000000,7.000000,34.833333,52.833333,4.666667,0.0,5.166667,30.500000,2.666667
3,26332,7.666667,0.000000,7.666667,37.000000,57.500000,4.833333,0.0,5.166667,32.000000,0.333333
4,26336,8.000000,2.000000,8.000000,40.666667,60.000000,5.166667,0.0,5.333333,33.500000,1.000000
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,6.166667,0.500000,6.166667,32.333333,46.000000,4.833333,0.0,4.666667,26.666667,1.000000
990,5682,6.000000,0.166667,6.000000,37.333333,52.333333,5.000000,0.0,4.666667,24.500000,2.166667
991,12772,8.333333,0.000000,8.333333,43.666667,64.333333,5.500000,0.0,5.666667,35.166667,0.833333
992,12771,6.833333,0.000000,6.833333,33.000000,44.666667,4.500000,0.0,4.333333,28.333333,1.500000


In [456]:
merged_thresh = rewards.merge(df_thresh, left_on='cell_id', right_on='cell_id', how='inner')
merged_thresh.corr().loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,-0.357426,0.160971,-0.357426,-0.158185,-0.19687,-0.28778,,-0.205309,-0.256556,0.185042
quality_min,-0.159107,0.001548,-0.159107,0.030098,-0.026508,-0.103673,,-0.035293,-0.085222,0.132811
quality_max,-0.464991,0.148022,-0.464991,-0.458375,-0.385747,-0.473477,,-0.419358,-0.439653,0.024481
quality_std,0.183933,-0.038463,0.183933,-0.034921,0.026324,0.111122,,0.035314,0.083577,-0.185098
cum_reward_avg,-0.21808,0.070949,-0.21808,-0.002802,-0.036455,-0.109175,,-0.026757,-0.072041,0.202109
cum_reward_max,-0.211665,0.073039,-0.211665,-0.01096,-0.041082,-0.113546,,-0.025675,-0.072635,0.181275
cum_reward_std,-0.155647,0.063993,-0.155647,-0.036306,-0.047657,-0.098319,,-0.029484,-0.063955,0.082754
mom_reward_avg,-0.231018,0.073735,-0.231018,0.014274,-0.032292,-0.097886,,-0.028652,-0.074152,0.202957
mom_reward_min,-0.213339,0.054464,-0.213339,-0.007558,-0.057478,-0.125871,,-0.064911,-0.102805,0.210486
mom_reward_max,,,,,,,,,,


## Adaptive weighted threshold

In [457]:
df_weighted_thresh = orig_df[['cell_id']].copy()

df_weighted_thresh[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_weighted_thresh_sum, x)))
df_weighted_thresh

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,6.000000,0.0,6.000000,33.666667,52.833333,4.000000,0.0,4.500000,27.833333,0.000000
1,22944,11.500000,0.0,11.500000,37.833333,55.333333,4.666667,0.0,5.000000,43.833333,0.000000
2,26335,6.000000,0.0,6.000000,31.166667,47.500000,4.000000,0.0,4.500000,26.666667,0.500000
3,26332,6.500000,0.0,6.500000,32.666667,51.166667,4.000000,0.0,4.333333,28.000000,0.000000
4,26336,6.500000,0.0,6.500000,34.833333,51.666667,4.333333,0.0,4.666667,29.500000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,5.500000,0.0,5.500000,28.166667,40.666667,4.000000,0.0,4.000000,23.833333,0.000000
990,5682,5.000000,0.0,5.000000,32.833333,46.166667,4.333333,0.0,4.000000,21.500000,1.166667
991,12772,6.833333,0.0,6.833333,37.166667,55.000000,4.666667,0.0,4.833333,29.666667,0.000000
992,12771,5.833333,0.0,5.833333,29.500000,39.333333,4.000000,0.0,3.500000,24.833333,0.000000


In [458]:
df_weighted_thresh = rewards.merge(df_weighted_thresh, left_on='cell_id', right_on='cell_id', how='inner')
df_weighted_thresh.corr().loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,-0.213458,0.077264,-0.213458,-0.193965,-0.210069,-0.222015,,-0.18709,-0.221811,-0.056543
quality_min,-0.052668,0.022701,-0.052668,-0.007369,-0.050906,-0.053816,,-0.021935,-0.073065,-0.052806
quality_max,-0.436506,0.060829,-0.436506,-0.497782,-0.391744,-0.507905,,-0.421709,-0.426321,0.005736
quality_std,0.046358,-0.037293,0.046358,0.002474,0.049185,0.04008,,0.0157,0.067652,0.056855
cum_reward_avg,-0.061274,0.055352,-0.061274,-0.025487,-0.044342,-0.036235,,0.001267,-0.055313,-0.030251
cum_reward_max,-0.060003,0.064245,-0.060003,-0.0263,-0.043403,-0.035758,,-0.001113,-0.054025,-0.022117
cum_reward_std,-0.053672,0.071255,-0.053672,-0.032329,-0.036246,-0.0333,,-0.018949,-0.044433,-0.006861
mom_reward_avg,-0.063323,0.044378,-0.063323,-0.009913,-0.042437,-0.011417,,0.001451,-0.056466,-0.046564
mom_reward_min,-0.087439,0.047013,-0.087439,-0.035922,-0.074186,-0.068434,,-0.036868,-0.089294,-0.027219
mom_reward_max,,,,,,,,,,


# Sampled | no ref window

In [459]:
orig_df = pd.read_csv('data/generated/drift/by_cell_agent/run_8/by_train_regressive__sampled-drift-1000_no-window_.csv', index_col=0)
orig_df

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test,cell_id
0,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",25771
1,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",22944
2,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",26335
3,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",26332
4,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",26336
...,...,...,...,...,...,...,...,...,...,...,...
989,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",5683
990,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",5682
991,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",12772
992,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",12771


In [460]:
stat_tests = [c for c in orig_df.columns if c not in ['cell_id']]

drift_scores_df = orig_df[['cell_id']].copy()

drift_scores_df[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_total_score, x)))
drift_scores_df

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,0.833333,0.833333,1.000000,0.833333,0.833333,0.833333,0.833333,0.833333,0.833333,0.833333
1,22944,0.833333,0.833333,1.000000,0.833333,0.833333,0.833333,1.000000,0.833333,0.833333,1.000000
2,26335,1.000000,1.000000,1.000000,0.833333,0.833333,0.833333,1.000000,0.833333,1.000000,0.833333
3,26332,1.000000,1.000000,0.833333,0.833333,0.833333,0.833333,1.000000,0.833333,1.000000,0.833333
4,26336,0.833333,0.833333,1.000000,0.833333,0.833333,0.833333,0.833333,0.833333,1.000000,0.833333
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,0.833333,0.833333,1.000000,1.000000,1.000000,1.000000,0.833333,0.833333,0.833333,0.833333
990,5682,0.833333,0.833333,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.833333,0.666667
991,12772,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.833333,1.000000,0.833333
992,12771,0.833333,0.833333,1.000000,1.000000,1.000000,1.000000,0.833333,0.833333,1.000000,0.833333


## Regress

In [461]:
drift_regress_df = orig_df[['cell_id']].copy()

drift_regress_df[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_regress_score, x)))
drift_regress_df

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,3.079131e-01,3.079131e-01,4.722307,24.082304,33.420538,3.240035,5.997323e-01,3.647950,19.233543,0.300340
1,22944,5.118422e-02,5.118422e-02,8.025195,27.685188,37.685185,3.444991,4.313094e-03,3.791377,28.990798,0.017917
2,26335,6.851186e-19,6.851186e-19,4.638261,22.144719,30.847093,3.038247,1.376431e-08,3.435655,19.269641,0.626102
3,26332,1.557643e-05,1.557643e-05,4.843760,24.054648,34.649028,3.128291,1.881595e-05,3.137929,19.579952,0.588086
4,26336,9.408428e-01,9.408428e-01,5.134385,27.714290,40.759644,3.496689,4.612117e-01,3.714086,20.356294,0.069908
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,5.978521e-01,5.978521e-01,4.372890,19.661666,26.520277,3.149742,1.485134e-01,3.354724,17.385946,0.079615
990,5682,4.246977e-01,4.246977e-01,4.022376,21.141692,29.058497,3.275769,8.441780e-02,3.436627,15.521926,0.657484
991,12772,4.561161e-13,4.561161e-13,5.345316,28.000094,41.591981,3.941070,6.541882e-09,3.871215,22.074685,0.478839
992,12771,3.889566e-01,3.889566e-01,4.186046,19.951571,27.358941,3.183168,1.133992e-01,3.019522,15.583456,0.192811


In [462]:
merged = rewards.merge(drift_scores_df, left_on='cell_id', right_on='cell_id', how='inner')
merged.corr().loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,-0.424538,-0.424538,-0.190444,-0.039181,-0.199857,-0.191157,-0.203164,-0.377565,-0.414596,-0.164997
quality_min,-0.255248,-0.255248,-0.102831,0.0516,-0.157566,-0.126014,-0.052265,-0.319663,-0.338615,-0.068507
quality_max,-0.279873,-0.279873,-0.08749,-0.031366,-0.08325,-0.081799,-0.159179,-0.16585,-0.234196,-0.135416
quality_std,0.273979,0.273979,0.144991,-0.031526,0.190877,0.168346,0.066317,0.37815,0.373403,0.110883
cum_reward_avg,-0.134304,-0.134304,-0.126914,-0.017974,-0.176619,-0.178549,-0.040758,-0.297126,-0.248986,-0.123675
cum_reward_max,-0.11323,-0.11323,-0.119712,-0.036196,-0.185314,-0.187395,-0.033857,-0.287874,-0.234487,-0.117879
cum_reward_std,-0.044786,-0.044786,-0.067688,-0.067855,-0.152582,-0.159116,-0.022443,-0.191684,-0.139756,-0.063917
mom_reward_avg,-0.151409,-0.151409,-0.129724,0.00475,-0.163469,-0.161461,-0.032783,-0.303016,-0.244599,-0.132228
mom_reward_min,-0.201955,-0.201955,-0.118787,0.026007,-0.157962,-0.132619,-0.053819,-0.30087,-0.288348,-0.112795
mom_reward_max,,,,,,,,,,


In [463]:
merged = rewards.merge(drift_regress_df, left_on='cell_id', right_on='cell_id', how='inner')
merged.corr().loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,0.297615,0.297615,-0.589596,-0.210046,-0.266939,-0.351282,0.141016,-0.273001,-0.256067,0.056579
quality_min,0.120718,0.120718,-0.380062,-0.009968,-0.060353,-0.115577,-0.004668,-0.037538,-0.091151,-0.052659
quality_max,0.218051,0.218051,-0.409225,-0.52989,-0.517721,-0.508303,0.113757,-0.580612,-0.435715,0.036375
quality_std,-0.152431,-0.152431,0.45315,-0.004484,0.050613,0.110168,-0.010642,0.035832,0.089215,-0.000251
cum_reward_avg,0.11375,0.11375,-0.429257,-0.002089,-0.038938,-0.077639,0.021199,-0.037631,-0.071612,0.061397
cum_reward_max,0.111734,0.111734,-0.421586,-0.015745,-0.051952,-0.089582,0.030078,-0.045912,-0.071239,0.053611
cum_reward_std,0.087194,0.087194,-0.305814,-0.051637,-0.076613,-0.099376,0.051823,-0.068963,-0.060995,0.020542
mom_reward_avg,0.114607,0.114607,-0.487808,0.0139,-0.026984,-0.057711,0.004255,-0.022134,-0.075014,0.070669
mom_reward_min,0.120834,0.120834,-0.403051,-0.024133,-0.066102,-0.106339,0.011949,-0.049155,-0.106702,0.026121
mom_reward_max,,,,,,,,,,


In [464]:
merged.corr(method='kendall').loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,0.27384,0.27384,-0.297157,-0.187901,-0.224657,-0.255878,0.204455,-0.222574,-0.18168,-0.033832
quality_min,0.12161,0.12161,-0.184185,-0.030303,-0.069855,-0.100362,0.070832,-0.050857,-0.07965,-0.1292
quality_max,0.176551,0.176551,-0.275994,-0.327502,-0.317889,-0.308495,0.130392,-0.371269,-0.24323,0.028471
quality_std,-0.151766,-0.151766,0.18861,0.0304,0.073405,0.105817,-0.093558,0.050808,0.079755,0.12736
cum_reward_avg,0.113295,0.113295,-0.199545,-0.058977,-0.082897,-0.103541,0.09185,-0.073061,-0.077319,0.007944
cum_reward_max,0.11015,0.11015,-0.197882,-0.061501,-0.084249,-0.104761,0.087798,-0.074061,-0.079145,0.005789
cum_reward_std,0.101535,0.101535,-0.174431,-0.067928,-0.084504,-0.101291,0.078713,-0.074318,-0.078822,-0.007226
mom_reward_avg,0.135238,0.135238,-0.233973,-0.026533,-0.061537,-0.089096,0.107102,-0.063611,-0.085264,0.011011
mom_reward_min,0.130957,0.130957,-0.232921,-0.030235,-0.06525,-0.09131,0.102776,-0.062915,-0.087501,-0.001798
mom_reward_max,,,,,,,,,,


In [465]:
merged.corr(method='spearman').loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,0.410713,0.410713,-0.44175,-0.284429,-0.33549,-0.375792,0.31689,-0.34201,-0.282388,-0.038906
quality_min,0.19011,0.19011,-0.273402,-0.046382,-0.105522,-0.149445,0.115255,-0.07713,-0.120462,-0.173799
quality_max,0.261356,0.261356,-0.422805,-0.472391,-0.457898,-0.446872,0.196947,-0.534808,-0.374099,0.043577
quality_std,-0.236429,-0.236429,0.279101,0.046161,0.109935,0.157597,-0.151867,0.077976,0.120525,0.167438
cum_reward_avg,0.152651,0.152651,-0.266344,-0.080133,-0.11255,-0.139261,0.123983,-0.09835,-0.104972,0.012883
cum_reward_max,0.148062,0.148062,-0.262781,-0.083831,-0.114186,-0.140844,0.11822,-0.098987,-0.106768,0.009579
cum_reward_std,0.138362,0.138362,-0.233986,-0.091818,-0.113866,-0.136484,0.10684,-0.099007,-0.1056,-0.010218
mom_reward_avg,0.177786,0.177786,-0.303065,-0.03473,-0.081096,-0.117266,0.140338,-0.083662,-0.112239,0.018796
mom_reward_min,0.165986,0.165986,-0.292194,-0.038838,-0.083087,-0.115828,0.129763,-0.080209,-0.110417,0.000938
mom_reward_max,,,,,,,,,,


## Weighted

In [466]:
df_weighted = orig_df[['cell_id']].copy()

df_weighted[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_weighted_mean, x)))
df_weighted

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,1.633999e-02,1.633999e-02,4.029667,21.366584,29.552923,2.718403,3.182593e-02,3.065037,17.248688,0.016212
1,22944,2.716187e-03,2.716187e-03,6.593849,22.859572,31.166296,2.805924,2.289159e-04,3.083434,25.013443,0.000951
2,26335,8.154165e-20,8.154165e-20,3.910988,19.834605,27.534005,2.592289,1.197789e-08,2.935333,16.899078,0.052183
3,26332,8.266015e-07,8.266015e-07,4.095031,21.032704,30.419708,2.660322,1.015362e-06,2.669777,17.083524,0.032158
4,26336,4.992759e-02,4.992759e-02,4.271981,23.624190,35.096746,2.922867,2.447508e-02,3.073695,17.843151,0.007580
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,3.172614e-02,3.172614e-02,3.635384,16.284754,22.056833,2.491108,7.881150e-03,2.776852,15.421736,0.004227
990,5682,2.253738e-02,2.253738e-02,3.284668,18.007659,24.716205,2.602252,4.479797e-03,2.732143,13.632681,0.554719
991,12772,4.171296e-13,4.171296e-13,4.385296,23.473197,32.708054,2.884853,3.776105e-09,3.215354,18.857023,0.026845
992,12771,2.064072e-02,2.064072e-02,3.438170,17.236805,22.756305,2.427005,6.017887e-03,2.475873,13.466465,0.010275


In [467]:
merged = rewards.merge(df_weighted, left_on='cell_id', right_on='cell_id', how='inner')
merged.corr().loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,0.278472,0.278472,-0.266748,-0.189598,-0.191926,-0.213463,0.136583,-0.21457,-0.224669,-0.042652
quality_min,0.112469,0.112469,-0.096508,-0.025115,-0.040586,-0.05648,0.001258,-0.030177,-0.08158,-0.053588
quality_max,0.210485,0.210485,-0.448952,-0.523476,-0.480063,-0.489761,0.115349,-0.568401,-0.42251,0.011734
quality_std,-0.140424,-0.140424,0.101305,0.008204,0.025178,0.034297,-0.01445,0.013288,0.075933,0.050872
cum_reward_avg,0.100654,0.100654,-0.109548,-0.007912,-0.021556,-0.01047,0.018686,-0.010261,-0.055916,-0.039789
cum_reward_max,0.095485,0.095485,-0.105529,-0.013221,-0.027122,-0.012417,0.021222,-0.012169,-0.052914,-0.030187
cum_reward_std,0.067984,0.067984,-0.081489,-0.031874,-0.042173,-0.020472,0.030562,-0.028387,-0.040359,-0.004637
mom_reward_avg,0.106141,0.106141,-0.119219,0.00789,-0.007752,0.006137,0.011461,0.008299,-0.058802,-0.045852
mom_reward_min,0.117241,0.117241,-0.131796,-0.035848,-0.050999,-0.06065,0.022397,-0.038854,-0.095314,-0.032467
mom_reward_max,,,,,,,,,,


## Adaptive threshold

In [468]:
df_thresh = orig_df[['cell_id']].copy()

df_thresh[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_thresh_sum, x)))
df_thresh

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,1.000000,1.000000,7.500000,39.666667,55.166667,4.666667,0.833333,5.500000,31.666667,1.000000
1,22944,0.166667,0.166667,12.666667,45.500000,62.166667,5.000000,0.000000,5.833333,47.666667,0.000000
2,26335,0.000000,0.000000,7.333333,36.500000,51.000000,4.500000,0.000000,5.166667,31.666667,2.000000
3,26332,0.000000,0.000000,7.833333,39.666667,57.166667,4.833333,0.000000,4.666667,32.333333,1.833333
4,26336,3.000000,3.000000,8.166667,45.666667,67.333333,5.333333,0.666667,5.500000,33.500000,0.166667
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,1.833333,1.833333,6.666667,32.333333,43.833333,4.666667,0.166667,5.000000,28.333333,0.166667
990,5682,1.333333,1.333333,6.166667,34.666667,48.000000,5.000000,0.000000,5.000000,25.333333,2.000000
991,12772,0.000000,0.000000,8.333333,46.000000,68.833333,5.833333,0.000000,6.000000,36.500000,1.500000
992,12771,1.166667,1.166667,6.500000,32.833333,45.166667,5.000000,0.166667,4.500000,25.666667,0.500000


In [469]:
merged_thresh = rewards.merge(df_thresh, left_on='cell_id', right_on='cell_id', how='inner')
merged_thresh.corr().loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,0.29349,0.29349,-0.588523,-0.20976,-0.267157,-0.363177,0.119967,-0.285686,-0.256315,0.050682
quality_min,0.118232,0.118232,-0.377821,-0.009785,-0.060268,-0.132654,-0.018863,-0.054121,-0.092284,-0.055041
quality_max,0.214027,0.214027,-0.40972,-0.530101,-0.517892,-0.518138,0.100457,-0.572099,-0.43489,0.029091
quality_std,-0.150232,-0.150232,0.450706,-0.004815,0.050455,0.128499,0.0052,0.053163,0.090172,0.002443
cum_reward_avg,0.113538,0.113538,-0.427551,-0.002278,-0.038644,-0.099169,0.012202,-0.04938,-0.071984,0.05927
cum_reward_max,0.111846,0.111846,-0.419613,-0.015782,-0.051915,-0.110932,0.021806,-0.059747,-0.071709,0.050915
cum_reward_std,0.088076,0.088076,-0.304137,-0.051261,-0.077131,-0.114161,0.046524,-0.081009,-0.061508,0.017378
mom_reward_avg,0.114543,0.114543,-0.485821,0.013538,-0.026719,-0.077027,-0.005658,-0.034334,-0.07512,0.068321
mom_reward_min,0.119515,0.119515,-0.401106,-0.024204,-0.065986,-0.123972,0.000666,-0.062682,-0.107464,0.023367
mom_reward_max,,,,,,,,,,


## Adaptive weighted threshold

In [470]:
df_weighted_thresh = orig_df[['cell_id']].copy()

df_weighted_thresh[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_weighted_thresh_sum, x)))
df_weighted_thresh

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,0.0,0.0,6.333333,35.333333,48.666667,4.166667,0.0,4.666667,28.333333,0.000000
1,22944,0.0,0.0,10.333333,37.666667,51.500000,4.333333,0.0,4.833333,41.166667,0.000000
2,26335,0.0,0.0,6.166667,32.500000,45.666667,4.000000,0.0,4.333333,27.666667,0.000000
3,26332,0.0,0.0,6.500000,34.833333,50.166667,4.000000,0.0,4.166667,28.000000,0.000000
4,26336,0.0,0.0,6.666667,39.000000,58.000000,4.500000,0.0,4.666667,29.333333,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,0.0,0.0,5.666667,26.500000,36.333333,3.500000,0.0,4.333333,25.333333,0.000000
990,5682,0.0,0.0,5.166667,29.666667,40.666667,3.833333,0.0,4.166667,22.333333,1.666667
991,12772,0.0,0.0,7.000000,38.500000,54.000000,4.333333,0.0,5.000000,31.000000,0.000000
992,12771,0.0,0.0,5.333333,28.166667,37.333333,3.666667,0.0,3.500000,22.000000,0.000000


In [471]:
df_weighted_thresh = rewards.merge(df_weighted_thresh, left_on='cell_id', right_on='cell_id', how='inner')
df_weighted_thresh.corr().loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,0.087895,0.087895,-0.250424,-0.187382,-0.190637,-0.183898,0.021747,-0.202746,-0.2214,-0.050375
quality_min,-0.031732,-0.031732,-0.081777,-0.024214,-0.039608,-0.038832,0.017092,-0.016232,-0.079592,-0.0511
quality_max,0.107047,0.107047,-0.441658,-0.522064,-0.479536,-0.474315,0.023944,-0.545168,-0.420563,0.007736
quality_std,0.002214,0.002214,0.084516,0.007102,0.024126,0.017084,-0.015103,0.00111,0.073686,0.052876
cum_reward_avg,0.033324,0.033324,-0.094681,-0.007129,-0.020945,0.005801,-0.000913,0.006238,-0.054556,-0.049623
cum_reward_max,0.037709,0.037709,-0.092755,-0.012134,-0.026519,0.002138,-0.012031,0.000742,-0.051641,-0.038482
cum_reward_std,0.04064,0.04064,-0.07797,-0.030275,-0.041605,-0.013199,-0.032244,-0.028164,-0.039612,-0.007622
mom_reward_avg,0.03632,0.03632,-0.10318,0.008387,-0.007252,0.022251,0.01714,0.023843,-0.057073,-0.056039
mom_reward_min,0.009308,0.009308,-0.114712,-0.035106,-0.050038,-0.044556,0.027033,-0.021977,-0.093905,-0.037104
mom_reward_max,,,,,,,,,,


# NOT Sampled | ref window 1000

In [472]:
orig_df = pd.read_csv('data/generated/drift/by_cell_agent/run_9/by_train_regressive_sampled_ref_sampled_drift_None_window_1k_.csv', index_col=0)
orig_df

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test,cell_id
0,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",25771
1,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",22944
2,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",26335
3,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",26332
4,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",26336
...,...,...,...,...,...,...,...,...,...,...,...
989,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",5683
990,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",5682
991,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",12772
992,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",12771


In [473]:
stat_tests = [c for c in orig_df.columns if c not in ['cell_id']]

drift_scores_df = orig_df[['cell_id']].copy()

drift_scores_df[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_total_score, x)))
drift_scores_df

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,0.833333,0.833333,0.833333,0.333333,0.333333,0.666667,0.833333,0.666667,0.833333,0.333333
1,22944,0.833333,0.833333,0.833333,0.666667,0.833333,0.833333,0.833333,0.833333,0.833333,0.833333
2,26335,1.000000,1.000000,0.833333,0.333333,0.666667,0.666667,1.000000,0.666667,0.833333,0.500000
3,26332,0.833333,0.833333,0.833333,0.333333,0.333333,0.500000,1.000000,0.500000,0.833333,0.333333
4,26336,0.833333,0.833333,0.833333,0.333333,0.500000,0.666667,0.833333,0.666667,0.833333,0.666667
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,0.833333,0.833333,0.833333,0.000000,0.166667,0.333333,0.833333,0.166667,0.833333,0.500000
990,5682,0.833333,0.833333,0.833333,0.000000,0.166667,0.333333,0.833333,0.166667,0.833333,0.333333
991,12772,0.833333,0.833333,0.833333,0.000000,0.333333,0.333333,1.000000,0.333333,0.833333,0.333333
992,12771,0.666667,0.666667,0.500000,0.000000,0.000000,0.000000,0.833333,0.000000,0.833333,0.500000


In [474]:
merged = rewards.merge(drift_scores_df, left_on='cell_id', right_on='cell_id', how='inner')
merged.corr().loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,-0.363035,-0.363035,-0.167696,-0.139181,-0.227776,-0.210051,-0.401887,-0.167724,-0.658532,-0.067503
quality_min,-0.172905,-0.172905,-0.026208,-0.047355,-0.078718,-0.06249,-0.224097,-0.048254,-0.318144,-0.02386
quality_max,-0.288841,-0.288841,-0.155968,-0.221176,-0.400287,-0.387029,-0.27613,-0.339965,-0.256183,-0.0985
quality_std,0.182195,0.182195,0.057885,0.048518,0.071516,0.045824,0.228868,0.015388,0.404745,0.044666
cum_reward_avg,-0.039938,-0.039938,-0.084559,-0.013991,-0.002224,0.030284,-0.079679,0.085593,-0.319357,-0.074955
cum_reward_max,0.002123,0.002123,-0.046364,0.0017,0.042286,0.093565,-0.045885,0.164037,-0.32364,-0.066569
cum_reward_std,0.076139,0.076139,0.044128,0.029085,0.117629,0.189696,0.030012,0.271259,-0.263117,-0.034213
mom_reward_avg,-0.083481,-0.083481,-0.126632,-0.045192,-0.042301,-0.016681,-0.095366,0.027903,-0.374736,-0.083465
mom_reward_min,-0.144777,-0.144777,-0.100193,-0.06152,-0.079756,-0.064085,-0.168136,-0.029686,-0.301267,-0.074351
mom_reward_max,,,,,,,,,,


## Regress

In [475]:
drift_regress_df = orig_df[['cell_id']].copy()

drift_regress_df[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_regress_score, x)))
drift_regress_df

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,0.982398,0.982398,0.960471,0.451576,1.207548,0.735160,0.964230,0.651383,4.016921,1.416430
1,22944,0.993703,0.993703,1.840785,0.564467,1.428007,0.890411,0.649128,0.863567,6.645657,0.928893
2,26335,0.002921,0.002921,1.030921,0.469831,1.236459,0.786129,0.001701,0.665784,4.065196,1.085661
3,26332,0.129271,0.129271,1.025962,0.439040,1.213825,0.731236,0.041973,0.611452,4.083087,1.338510
4,26336,0.999996,0.999996,1.092780,0.421420,1.013801,0.735792,1.000000,0.732338,4.397999,1.009215
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,1.030972,1.030972,0.660164,0.177187,0.383696,0.467604,0.963062,0.450402,2.648350,1.547301
990,5682,1.008528,1.008528,0.634895,0.211529,0.460843,0.530260,0.857598,0.499305,2.412234,2.479824
991,12772,0.165679,0.165679,0.639066,0.224386,0.567605,0.533942,0.029817,0.497937,2.487203,1.923277
992,12771,1.101117,1.101117,0.571814,0.097409,0.200283,0.368162,0.724677,0.344425,2.160948,1.311986


In [476]:
merged = rewards.merge(drift_regress_df, left_on='cell_id', right_on='cell_id', how='inner')
merged.corr().loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,0.475799,0.475799,-0.317088,-0.195896,-0.193888,-0.252069,0.372561,-0.321274,-0.256465,0.170458
quality_min,0.28155,0.28155,-0.130931,-0.07829,-0.076862,-0.134011,0.173911,-0.158813,-0.089466,0.149854
quality_max,0.322711,0.322711,-0.435875,-0.277573,-0.271719,-0.302254,0.263727,-0.509997,-0.42289,0.075982
quality_std,-0.296754,-0.296754,0.146476,0.067245,0.065984,0.126664,-0.195504,0.156091,0.086583,-0.188803
cum_reward_avg,0.125468,0.125468,-0.130947,0.012455,0.001649,-0.034917,0.098385,-0.076119,-0.023618,0.194885
cum_reward_max,0.092153,0.092153,-0.084261,0.05704,0.03714,0.015838,0.068785,-0.030378,0.018833,0.174953
cum_reward_std,0.009733,0.009733,0.020613,0.125892,0.091248,0.110069,0.005212,0.056018,0.09109,0.081167
mom_reward_avg,0.151651,0.151651,-0.195205,-0.04764,-0.050832,-0.096609,0.11089,-0.139095,-0.081895,0.204265
mom_reward_min,0.217481,0.217481,-0.18219,-0.090793,-0.090151,-0.145259,0.151179,-0.170116,-0.109001,0.200604
mom_reward_max,,,,,,,,,,


In [477]:
merged.corr(method='kendall').loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,0.307592,0.307592,-0.203969,-0.180935,-0.17531,-0.212042,0.213286,-0.26117,-0.178232,0.059306
quality_min,0.149305,0.149305,-0.087018,-0.068254,-0.068943,-0.09496,0.054747,-0.113087,-0.071962,0.057228
quality_max,0.206425,0.206425,-0.255938,-0.153669,-0.14477,-0.16808,0.155991,-0.315239,-0.238582,0.072769
quality_std,-0.183095,-0.183095,0.086855,0.076497,0.077121,0.103673,-0.081267,0.112844,0.071971,-0.055169
cum_reward_avg,0.065241,0.065241,-0.034446,0.019681,0.007898,-0.015784,0.058631,-0.033125,0.001593,0.089345
cum_reward_max,0.06078,0.06078,-0.031155,0.025943,0.013409,-0.009195,0.053897,-0.027166,0.004955,0.088534
cum_reward_std,0.04797,0.04797,-0.017639,0.044602,0.030764,0.015041,0.046022,-0.007577,0.015015,0.073827
mom_reward_avg,0.148213,0.148213,-0.126819,-0.075584,-0.078131,-0.111493,0.100049,-0.14392,-0.088601,0.093884
mom_reward_min,0.145546,0.145546,-0.127019,-0.079831,-0.082329,-0.115057,0.097139,-0.146874,-0.090664,0.088763
mom_reward_max,,,,,,,,,,


In [478]:
merged.corr(method='spearman').loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,0.454545,0.454545,-0.3132,-0.266581,-0.25816,-0.313027,0.327054,-0.40081,-0.27757,0.08755
quality_min,0.227158,0.227158,-0.130537,-0.103464,-0.104137,-0.142117,0.096797,-0.173012,-0.108977,0.086253
quality_max,0.314429,0.314429,-0.395554,-0.223887,-0.210567,-0.247222,0.235795,-0.469802,-0.370153,0.114935
quality_std,-0.276935,-0.276935,0.130112,0.116869,0.117317,0.15628,-0.13714,0.172018,0.109195,-0.083149
cum_reward_avg,0.088617,0.088617,-0.046514,0.025773,0.010286,-0.021202,0.080069,-0.046304,0.001691,0.119265
cum_reward_max,0.08122,0.08122,-0.04161,0.034426,0.018101,-0.011771,0.072661,-0.037035,0.006704,0.117724
cum_reward_std,0.062547,0.062547,-0.023539,0.060488,0.042182,0.02069,0.061736,-0.00777,0.020283,0.09864
mom_reward_avg,0.19414,0.19414,-0.165766,-0.099459,-0.102601,-0.14612,0.131642,-0.189318,-0.116377,0.121931
mom_reward_min,0.184346,0.184346,-0.159875,-0.101288,-0.104409,-0.14536,0.122982,-0.186335,-0.114414,0.112027
mom_reward_max,,,,,,,,,,


## Weighted

In [479]:
df_weighted = orig_df[['cell_id']].copy()

df_weighted[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_weighted_mean, x)))
df_weighted

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,0.053242,0.053242,0.837810,0.406493,1.090307,0.643900,0.051703,0.579648,3.612372,0.405363
1,22944,0.052733,0.052733,1.540434,0.469304,1.193881,0.734600,0.034447,0.719573,5.758513,0.049293
2,26335,0.001397,0.001397,0.853264,0.414276,1.092814,0.663215,0.000833,0.559662,3.569114,0.680015
3,26332,0.034133,0.034133,0.860315,0.392744,1.091433,0.627425,0.017868,0.522639,3.564637,0.750837
4,26336,0.053067,0.053067,0.924606,0.370442,0.893458,0.628192,0.053067,0.630391,3.865623,0.159107
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,0.080664,0.080664,0.560986,0.160405,0.349551,0.404227,0.053368,0.389940,2.344384,0.574251
990,5682,0.060941,0.060941,0.531448,0.183708,0.404231,0.438665,0.047162,0.418950,2.110993,1.349273
991,12772,0.141562,0.141562,0.520843,0.192651,0.492185,0.447597,0.026094,0.411649,2.121407,1.106324
992,12771,0.144790,0.144790,0.478254,0.079410,0.163883,0.290477,0.086498,0.281411,1.886415,0.376215


In [480]:
merged = rewards.merge(df_weighted, left_on='cell_id', right_on='cell_id', how='inner')
merged.corr().loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,0.11622,0.11622,-0.221922,-0.184348,-0.181266,-0.196638,0.130541,-0.249846,-0.225963,-0.019741
quality_min,0.016402,0.016402,-0.064799,-0.076243,-0.071693,-0.080497,0.030966,-0.079161,-0.079829,-0.029016
quality_max,0.153119,0.153119,-0.421709,-0.273153,-0.266342,-0.297268,0.142143,-0.515172,-0.410315,0.018571
quality_std,-0.008698,-0.008698,0.058726,0.06288,0.058198,0.065709,-0.032668,0.066975,0.073665,0.017174
cum_reward_avg,-0.07539,-0.07539,-0.023423,0.02112,0.012847,0.022843,-0.015538,0.005465,-0.010265,0.001988
cum_reward_max,-0.119091,-0.119091,0.021394,0.065781,0.048043,0.070835,-0.052695,0.045644,0.033461,-0.001386
cum_reward_std,-0.177528,-0.177528,0.096538,0.133957,0.099887,0.145122,-0.115157,0.098843,0.106294,-0.011912
mom_reward_avg,-0.039536,-0.039536,-0.079834,-0.038591,-0.038907,-0.036497,0.015322,-0.054514,-0.066426,0.003034
mom_reward_min,0.028101,0.028101,-0.100962,-0.085138,-0.081723,-0.088882,0.050694,-0.085355,-0.097893,0.009469
mom_reward_max,,,,,,,,,,


## Adaptive threshold

In [481]:
df_thresh = orig_df[['cell_id']].copy()

df_thresh[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_thresh_sum, x)))
df_thresh

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,3.166667,3.166667,1.166667,0.333333,1.500000,1.000000,1.500000,0.833333,6.166667,4.333333
1,22944,3.166667,3.166667,2.500000,0.666667,2.000000,1.000000,1.000000,1.000000,10.500000,3.000000
2,26335,0.000000,0.000000,1.166667,0.333333,1.833333,1.000000,0.000000,0.833333,6.166667,3.166667
3,26332,0.166667,0.166667,1.166667,0.333333,1.500000,0.833333,0.000000,0.666667,6.333333,4.166667
4,26336,3.166667,3.166667,1.500000,0.333333,1.333333,0.833333,1.500000,1.000000,6.833333,3.166667
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,3.166667,3.166667,0.833333,0.000000,0.166667,0.333333,1.500000,0.166667,4.000000,4.833333
990,5682,3.166667,3.166667,0.833333,0.000000,0.166667,0.333333,1.333333,0.166667,3.500000,7.833333
991,12772,0.333333,0.333333,0.833333,0.000000,0.500000,0.333333,0.000000,0.333333,3.666667,6.000000
992,12771,3.333333,3.333333,0.500000,0.000000,0.000000,0.000000,1.000000,0.000000,3.166667,4.000000


In [482]:
merged_thresh = rewards.merge(df_thresh, left_on='cell_id', right_on='cell_id', how='inner')
merged_thresh.corr().loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,0.473558,0.473558,-0.268451,-0.150925,-0.187368,-0.176784,0.368773,-0.224563,-0.241336,0.172924
quality_min,0.284462,0.284462,-0.075787,-0.048825,-0.066966,-0.072433,0.177382,-0.072122,-0.079582,0.154426
quality_max,0.315738,0.315738,-0.383537,-0.234667,-0.291747,-0.289404,0.254136,-0.457721,-0.414066,0.072927
quality_std,-0.299153,-0.299153,0.089697,0.048272,0.057092,0.057929,-0.19846,0.050718,0.07544,-0.194341
cum_reward_avg,0.127201,0.127201,-0.090904,-0.018189,0.003264,0.022585,0.100769,0.039307,-0.016139,0.199106
cum_reward_max,0.094785,0.094785,-0.059984,-0.001104,0.035001,0.079076,0.071944,0.09441,0.025133,0.179202
cum_reward_std,0.013499,0.013499,0.005497,0.029122,0.081323,0.166782,0.008658,0.164439,0.093427,0.08389
mom_reward_avg,0.152239,0.152239,-0.143061,-0.049427,-0.044676,-0.036879,0.1118,-0.02176,-0.073354,0.207599
mom_reward_min,0.219297,0.219297,-0.127054,-0.068102,-0.080243,-0.07659,0.15283,-0.060327,-0.09988,0.204073
mom_reward_max,,,,,,,,,,


## Adaptive weighted threshold

In [483]:
df_weighted_thresh = orig_df[['cell_id']].copy()

df_weighted_thresh[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_weighted_thresh_sum, x)))
df_weighted_thresh

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,0.166667,0.166667,1.000000,0.333333,1.333333,0.666667,0.0,0.666667,5.666667,1.166667
1,22944,0.166667,0.166667,2.166667,0.333333,1.500000,0.833333,0.0,0.833333,9.166667,0.000000
2,26335,0.000000,0.000000,1.000000,0.333333,1.666667,0.833333,0.0,0.666667,5.500000,2.000000
3,26332,0.000000,0.000000,1.000000,0.333333,1.333333,0.500000,0.0,0.333333,5.500000,2.166667
4,26336,0.166667,0.166667,1.000000,0.333333,1.166667,0.666667,0.0,0.833333,6.000000,0.333333
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,0.166667,0.166667,0.666667,0.000000,0.166667,0.166667,0.0,0.166667,3.500000,1.500000
990,5682,0.166667,0.166667,0.666667,0.000000,0.166667,0.166667,0.0,0.166667,3.000000,4.000000
991,12772,0.333333,0.333333,0.333333,0.000000,0.500000,0.333333,0.0,0.333333,3.166667,3.333333
992,12771,0.333333,0.333333,0.500000,0.000000,0.000000,0.000000,0.0,0.000000,2.833333,0.833333


In [484]:
df_weighted_thresh = rewards.merge(df_weighted_thresh, left_on='cell_id', right_on='cell_id', how='inner')
df_weighted_thresh.corr().loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,0.101956,0.101956,-0.217768,-0.122262,-0.18828,-0.228,0.027429,-0.180497,-0.220587,-0.033242
quality_min,0.010631,0.010631,-0.078541,-0.071222,-0.080241,-0.095728,-0.007168,-0.07397,-0.079388,-0.038467
quality_max,0.132239,0.132239,-0.381457,-0.176849,-0.304853,-0.419674,0.048797,-0.405248,-0.406023,0.008819
quality_std,-0.004047,-0.004047,0.068502,0.061309,0.068581,0.076744,0.012082,0.058713,0.071542,0.028198
cum_reward_avg,-0.068306,-0.068306,-0.033777,-0.003434,-0.007714,0.010029,-0.03557,0.026813,-0.00573,-0.009558
cum_reward_max,-0.111266,-0.111266,0.001046,0.002866,0.02349,0.060644,-0.064324,0.073759,0.038422,-0.011187
cum_reward_std,-0.170473,-0.170473,0.064682,0.01239,0.074891,0.139933,-0.111259,0.141258,0.111627,-0.016409
mom_reward_avg,-0.033025,-0.033025,-0.073425,-0.032567,-0.050572,-0.04018,-0.010817,-0.031248,-0.06187,-0.0099
mom_reward_min,0.030503,0.030503,-0.111172,-0.060619,-0.091442,-0.09139,0.016621,-0.062914,-0.096072,-0.001697
mom_reward_max,,,,,,,,,,


# Sampled | ref window 1000

In [644]:
orig_df = pd.read_csv('data/generated/drift/by_cell_agent/run_10/by_train_regressive_sampled_ref_sampled_drift_1000_window_1k_.csv', index_col=0)
orig_df

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test,cell_id
0,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",25771
1,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",22944
2,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",26335
3,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",26332
4,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",26336
...,...,...,...,...,...,...,...,...,...,...,...
989,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",5683
990,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",5682
991,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",12772
992,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",12771


In [645]:
stat_tests = [c for c in orig_df.columns if c not in ['cell_id']]

drift_scores_df = orig_df[['cell_id']].copy()

drift_scores_df[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_total_score, x)))
drift_scores_df

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,0.833333,0.833333,0.833333,0.333333,0.666667,0.666667,0.833333,0.666667,0.833333,0.333333
1,22944,0.833333,0.833333,0.833333,0.666667,0.833333,0.833333,0.833333,0.833333,0.833333,0.833333
2,26335,1.000000,1.000000,0.833333,0.333333,0.666667,0.666667,1.000000,0.666667,0.833333,0.333333
3,26332,0.833333,0.833333,0.833333,0.333333,0.500000,0.500000,1.000000,0.500000,0.833333,0.500000
4,26336,0.833333,0.833333,0.833333,0.333333,0.500000,0.500000,0.833333,0.500000,0.833333,0.666667
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,0.833333,0.833333,0.833333,0.000000,0.166667,0.333333,0.833333,0.166667,0.833333,0.500000
990,5682,0.833333,0.833333,0.833333,0.000000,0.166667,0.333333,0.833333,0.166667,0.833333,0.333333
991,12772,0.833333,0.833333,0.666667,0.000000,0.333333,0.333333,1.000000,0.333333,0.833333,0.333333
992,12771,0.500000,0.500000,0.500000,0.000000,0.000000,0.000000,0.833333,0.000000,0.833333,0.500000


In [646]:
merged = rewards.merge(drift_scores_df, left_on='cell_id', right_on='cell_id', how='inner')
merged.corr().loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,-0.376859,-0.376859,-0.169591,-0.144724,-0.204794,-0.218141,-0.431804,-0.164857,-0.644308,-0.058966
quality_min,-0.169135,-0.169135,-0.030597,-0.058929,-0.086779,-0.09688,-0.242163,-0.04705,-0.307701,-0.020529
quality_max,-0.292821,-0.292821,-0.169905,-0.214416,-0.391022,-0.399785,-0.27739,-0.334296,-0.243065,-0.083813
quality_std,0.186516,0.186516,0.053594,0.058657,0.073291,0.07126,0.259379,0.016284,0.388794,0.039709
cum_reward_avg,-0.039843,-0.039843,-0.062911,-0.021165,-0.014133,0.023681,-0.114763,0.083357,-0.334514,-0.073656
cum_reward_max,0.000968,0.000968,-0.015782,-0.005276,0.031407,0.091598,-0.085073,0.151372,-0.322789,-0.069768
cum_reward_std,0.072364,0.072364,0.075513,0.023326,0.115067,0.201886,-0.012616,0.23875,-0.238053,-0.046022
mom_reward_avg,-0.088497,-0.088497,-0.120992,-0.052661,-0.049517,-0.025838,-0.149171,0.027287,-0.375475,-0.084462
mom_reward_min,-0.130615,-0.130615,-0.087005,-0.073064,-0.090329,-0.093317,-0.188588,-0.032592,-0.289662,-0.065963
mom_reward_max,,,,,,,,,,


## Regress

In [498]:
drift_regress_df = orig_df[['cell_id']].copy()

drift_regress_df[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_regress_score, x)))
drift_regress_df

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,0.956775,0.956775,1.000947,0.459818,1.227711,0.753709,0.869889,0.684726,4.118585,1.010199
1,22944,0.993703,0.993703,1.871576,0.565735,1.432259,0.881881,0.659963,0.867693,6.775952,0.933537
2,26335,0.008560,0.008560,0.972398,0.447312,1.197083,0.744173,0.002929,0.622257,4.027799,1.738669
3,26332,0.095651,0.095651,1.062993,0.450684,1.233759,0.744082,0.031452,0.616991,4.197333,1.067710
4,26336,0.999973,0.999973,1.080400,0.437691,1.056070,0.751227,1.000000,0.733859,4.406156,0.962531
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,1.008668,1.008668,0.712356,0.180921,0.392181,0.480816,0.980576,0.467823,2.736920,0.910720
990,5682,0.996578,0.996578,0.614831,0.204794,0.446266,0.511031,0.604538,0.496222,2.429843,2.640766
991,12772,0.207865,0.207865,0.661447,0.231554,0.584835,0.543354,0.036564,0.504612,2.703010,1.804978
992,12771,1.206066,1.206066,0.508996,0.082378,0.170140,0.322147,1.049649,0.311194,2.015124,1.422181


In [499]:
merged = rewards.merge(drift_regress_df, left_on='cell_id', right_on='cell_id', how='inner')
merged.corr().loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,0.471126,0.471126,-0.312807,-0.195999,-0.194386,-0.251328,0.376411,-0.320963,-0.255732,0.165047
quality_min,0.274305,0.274305,-0.124886,-0.079814,-0.078085,-0.12982,0.172852,-0.151544,-0.09004,0.140211
quality_max,0.321323,0.321323,-0.443729,-0.279935,-0.273698,-0.304984,0.258963,-0.511766,-0.434838,0.110777
quality_std,-0.290633,-0.290633,0.140916,0.068604,0.066929,0.124389,-0.19539,0.151355,0.085725,-0.17568
cum_reward_avg,0.119125,0.119125,-0.121683,0.011818,0.001602,-0.032761,0.096436,-0.069121,-0.021238,0.185554
cum_reward_max,0.086665,0.086665,-0.07602,0.055559,0.036465,0.01744,0.068441,-0.024708,0.018937,0.16092
cum_reward_std,0.007445,0.007445,0.023873,0.123184,0.08937,0.109717,0.007924,0.056699,0.08664,0.0601
mom_reward_avg,0.147492,0.147492,-0.185318,-0.047611,-0.050396,-0.09567,0.115788,-0.133507,-0.076902,0.198603
mom_reward_min,0.207099,0.207099,-0.172969,-0.091985,-0.090711,-0.142737,0.149511,-0.164839,-0.105582,0.2079
mom_reward_max,,,,,,,,,,


In [500]:
merged.corr(method='kendall').loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,0.296715,0.296715,-0.199532,-0.18029,-0.174925,-0.208958,0.218392,-0.261365,-0.178807,0.070504
quality_min,0.139238,0.139238,-0.075237,-0.066629,-0.067513,-0.088481,0.060397,-0.103037,-0.067302,0.044353
quality_max,0.201837,0.201837,-0.260684,-0.153941,-0.144636,-0.169429,0.15112,-0.322452,-0.242196,0.095321
quality_std,-0.171504,-0.171504,0.075253,0.074078,0.075115,0.096213,-0.084712,0.102907,0.067594,-0.040973
cum_reward_avg,0.06737,0.06737,-0.032906,0.016998,0.006211,-0.013203,0.059845,-0.029208,-0.001634,0.097937
cum_reward_max,0.062335,0.062335,-0.02983,0.02287,0.011399,-0.007001,0.054914,-0.024409,0.001754,0.095863
cum_reward_std,0.049447,0.049447,-0.017572,0.040077,0.026756,0.014783,0.041833,-0.008585,0.010553,0.075573
mom_reward_avg,0.142624,0.142624,-0.121708,-0.076962,-0.078869,-0.107476,0.108329,-0.138401,-0.086325,0.123304
mom_reward_min,0.137326,0.137326,-0.120682,-0.080947,-0.082915,-0.110658,0.104568,-0.140271,-0.087483,0.117375
mom_reward_max,,,,,,,,,,


In [501]:
merged.corr(method='spearman').loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,0.442206,0.442206,-0.308746,-0.266359,-0.25815,-0.309366,0.333467,-0.399266,-0.279376,0.102846
quality_min,0.213028,0.213028,-0.113352,-0.100923,-0.101402,-0.132812,0.101972,-0.156933,-0.101539,0.066528
quality_max,0.306087,0.306087,-0.399548,-0.224842,-0.211464,-0.249401,0.226621,-0.475049,-0.373939,0.149633
quality_std,-0.26157,-0.26157,0.112873,0.113084,0.113887,0.145749,-0.141753,0.156776,0.101579,-0.060014
cum_reward_avg,0.093485,0.093485,-0.04342,0.022577,0.00799,-0.018156,0.082193,-0.040746,-0.001528,0.130301
cum_reward_max,0.085431,0.085431,-0.038677,0.030676,0.015317,-0.009432,0.074844,-0.032689,0.002849,0.126445
cum_reward_std,0.065849,0.065849,-0.022843,0.054104,0.036872,0.019841,0.056788,-0.008502,0.014264,0.100502
mom_reward_avg,0.188594,0.188594,-0.158871,-0.101068,-0.10359,-0.141259,0.142331,-0.181779,-0.113113,0.158887
mom_reward_min,0.174437,0.174437,-0.151795,-0.102666,-0.105229,-0.139787,0.132048,-0.177872,-0.109935,0.146979
mom_reward_max,,,,,,,,,,


## Weighted

In [502]:
df_weighted = orig_df[['cell_id']].copy()

df_weighted[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_weighted_mean, x)))
df_weighted

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,0.051957,0.051957,0.853906,0.414207,1.110201,0.653979,0.046703,0.592298,3.708187,0.423733
1,22944,0.052733,0.052733,1.562574,0.469713,1.196860,0.733897,0.035022,0.721307,5.870352,0.049540
2,26335,0.004215,0.004215,0.830895,0.399285,1.064499,0.643096,0.001436,0.537781,3.547887,0.759306
3,26332,0.011661,0.011661,0.893978,0.402643,1.107987,0.638254,0.007173,0.527759,3.664790,0.394179
4,26336,0.053066,0.053066,0.921858,0.384771,0.930966,0.636741,0.053067,0.632632,3.883982,0.156101
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,0.060378,0.060378,0.586777,0.163779,0.358007,0.407592,0.053456,0.392734,2.423370,0.406949
990,5682,0.060061,0.060061,0.518345,0.178481,0.392277,0.431083,0.034257,0.416556,2.129213,1.435655
991,12772,0.183872,0.183872,0.535775,0.196814,0.502577,0.452802,0.032943,0.414797,2.299909,1.026943
992,12771,0.213819,0.213819,0.433903,0.069349,0.143001,0.270271,0.131244,0.264042,1.760948,0.358493


In [503]:
merged = rewards.merge(df_weighted, left_on='cell_id', right_on='cell_id', how='inner')
merged.corr().loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,0.111313,0.111313,-0.217276,-0.183549,-0.18103,-0.195419,0.128911,-0.247091,-0.224413,-0.011932
quality_min,0.019097,0.019097,-0.061715,-0.077037,-0.072327,-0.079798,0.031121,-0.074804,-0.080048,-0.012691
quality_max,0.143882,0.143882,-0.430963,-0.275066,-0.268045,-0.29884,0.131997,-0.520536,-0.423233,0.054123
quality_std,-0.014255,-0.014255,0.055408,0.063194,0.05838,0.065233,-0.035668,0.062384,0.072268,0.010462
cum_reward_avg,-0.071164,-0.071164,-0.018869,0.02052,0.012938,0.022321,-0.015756,0.011074,-0.008143,0.008945
cum_reward_max,-0.116757,-0.116757,0.025052,0.064119,0.0474,0.068904,-0.054562,0.049643,0.033408,-0.000886
cum_reward_std,-0.182143,-0.182143,0.097127,0.13078,0.097932,0.140952,-0.120771,0.098292,0.102074,-0.02626
mom_reward_avg,-0.031121,-0.031121,-0.073488,-0.038037,-0.038064,-0.036322,0.027264,-0.048523,-0.061438,0.007449
mom_reward_min,0.02593,0.02593,-0.094772,-0.085723,-0.081829,-0.088464,0.049258,-0.081276,-0.094488,0.033666
mom_reward_max,,,,,,,,,,


## Other funcs

In [504]:
orig_df

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test,cell_id
0,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",25771
1,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",22944
2,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",26335
3,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",26332
4,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",26336
...,...,...,...,...,...,...,...,...,...,...,...
989,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",5683
990,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",5682
991,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",12772
992,"{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.1, ...","{""HR Usage Rate"": {""stattest_threshold"": 0.05,...",12771


In [505]:
df_weighted = orig_df[['cell_id']].copy()

df_weighted[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_weighted_thresh_sum, x)))
rewards.merge(df_weighted, left_on='cell_id', right_on='cell_id', how='inner').corr()

Unnamed: 0,cell_id,drift_score,quality_avg,quality_min,quality_max,quality_std,cum_reward_avg,cum_reward_max,cum_reward_std,mom_reward_avg,...,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
cell_id,1.0,-0.042054,0.036162,0.018046,-0.001332,-0.023063,0.000967,-0.002974,-0.010015,-0.015267,...,-0.054343,-0.054343,0.038493,-0.021033,0.003415,0.01923,-0.050858,-0.016945,0.031267,-0.066421
drift_score,-0.042054,1.0,-0.637122,-0.609597,-0.289934,0.642976,-0.395991,-0.362477,-0.190325,-0.412066,...,-0.168455,-0.168455,0.099684,0.074052,0.091173,0.138272,-0.048019,0.116801,0.143216,0.052575
quality_avg,0.036162,-0.637122,1.0,0.689293,0.462465,-0.782348,0.615313,0.612029,0.471206,0.680864,...,0.092192,0.092192,-0.184896,-0.120235,-0.17135,-0.20487,0.031558,-0.199563,-0.220401,-0.02392
quality_min,0.018046,-0.609597,0.689293,1.0,0.197964,-0.955504,0.643323,0.588608,0.291974,0.660707,...,0.005724,0.005724,-0.049483,-0.06951,-0.074409,-0.09658,-0.011101,-0.108475,-0.079303,-0.022389
quality_max,-0.001332,-0.289934,0.462465,0.197964,1.0,-0.190559,0.1189,0.117325,0.090388,0.110648,...,0.121829,0.121829,-0.391141,-0.181834,-0.299173,-0.389244,0.044881,-0.40256,-0.420961,0.043639
quality_std,-0.023063,0.642976,-0.782348,-0.955504,-0.190559,1.0,-0.760596,-0.714535,-0.413002,-0.800001,...,-0.002345,-0.002345,0.034835,0.057188,0.060908,0.073123,0.009476,0.087751,0.070194,0.0219
cum_reward_avg,0.000967,-0.395991,0.615313,0.643323,0.1189,-0.760596,1.0,0.963393,0.626701,0.936032,...,-0.070348,-0.070348,0.002097,0.001236,-0.004789,0.016436,-0.019697,-0.024301,-0.006032,-0.001784
cum_reward_max,-0.002974,-0.362477,0.612029,0.588608,0.117325,-0.714535,0.963393,1.0,0.804616,0.884176,...,-0.113418,-0.113418,0.039685,0.009578,0.02433,0.067042,-0.046494,0.021993,0.035056,-0.011192
cum_reward_std,-0.010015,-0.190325,0.471206,0.291974,0.090388,-0.413002,0.626701,0.804616,1.0,0.53523,...,-0.174652,-0.174652,0.09863,0.021328,0.072157,0.144894,-0.09301,0.104316,0.103111,-0.032927
mom_reward_avg,-0.015267,-0.412066,0.680864,0.660707,0.110648,-0.800001,0.936032,0.884176,0.53523,1.0,...,-0.034209,-0.034209,-0.040757,-0.024502,-0.045051,-0.037606,0.013528,-0.064343,-0.057509,-0.003617


In [506]:
df_weighted

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,0.166667,0.166667,1.000000,0.333333,1.500000,0.833333,0.0,0.833333,6.000000,1.000000
1,22944,0.166667,0.166667,2.166667,0.333333,1.500000,0.833333,0.0,0.833333,9.500000,0.000000
2,26335,0.000000,0.000000,1.000000,0.333333,1.500000,0.833333,0.0,0.666667,5.500000,2.166667
3,26332,0.000000,0.000000,1.000000,0.333333,1.500000,0.666667,0.0,0.333333,5.833333,1.000000
4,26336,0.166667,0.166667,1.000000,0.333333,1.166667,0.500000,0.0,0.666667,6.000000,0.333333
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,0.166667,0.166667,0.666667,0.000000,0.166667,0.166667,0.0,0.166667,3.500000,1.000000
990,5682,0.166667,0.166667,0.666667,0.000000,0.166667,0.166667,0.0,0.166667,3.166667,4.166667
991,12772,0.500000,0.500000,0.333333,0.000000,0.500000,0.333333,0.0,0.333333,3.500000,3.000000
992,12771,0.500000,0.500000,0.166667,0.000000,0.000000,0.000000,0.0,0.000000,2.666667,1.000000


## Adaptive threshold

In [507]:
df_thresh = orig_df[['cell_id']].copy()

df_thresh[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_thresh_sum, x)))
df_thresh

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,3.166667,3.166667,1.166667,0.333333,1.833333,1.000000,1.333333,0.833333,6.333333,3.166667
1,22944,3.166667,3.166667,2.500000,0.666667,2.000000,1.000000,1.000000,1.000000,11.000000,3.000000
2,26335,0.000000,0.000000,1.166667,0.333333,1.833333,0.833333,0.000000,0.833333,6.166667,5.500000
3,26332,0.166667,0.166667,1.500000,0.333333,1.833333,0.833333,0.000000,0.666667,6.500000,3.166667
4,26336,3.166667,3.166667,1.500000,0.333333,1.333333,0.833333,1.500000,0.833333,6.833333,3.000000
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,3.166667,3.166667,0.833333,0.000000,0.166667,0.333333,1.500000,0.166667,4.000000,2.666667
990,5682,3.166667,3.166667,0.833333,0.000000,0.166667,0.333333,1.000000,0.166667,3.666667,8.333333
991,12772,0.500000,0.500000,0.666667,0.000000,0.500000,0.333333,0.000000,0.333333,4.166667,5.666667
992,12771,3.666667,3.666667,0.500000,0.000000,0.000000,0.000000,1.500000,0.000000,3.000000,4.333333


In [508]:
merged_thresh = rewards.merge(df_thresh, left_on='cell_id', right_on='cell_id', how='inner')
merged_thresh.corr().loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,0.468463,0.468463,-0.259121,-0.150117,-0.180873,-0.178103,0.372056,-0.220031,-0.243119,0.166351
quality_min,0.275802,0.275802,-0.086665,-0.057692,-0.073549,-0.080717,0.172363,-0.068739,-0.083771,0.142302
quality_max,0.313838,0.313838,-0.393508,-0.221369,-0.294765,-0.294992,0.251381,-0.453909,-0.431314,0.108123
quality_std,-0.291986,-0.291986,0.092411,0.055491,0.058775,0.064493,-0.19543,0.048431,0.07775,-0.178503
cum_reward_avg,0.120458,0.120458,-0.078567,-0.023672,0.005064,0.02898,0.100122,0.041435,-0.017151,0.187819
cum_reward_max,0.089253,0.089253,-0.041244,-0.006576,0.036672,0.085396,0.072952,0.090143,0.022588,0.162934
cum_reward_std,0.011943,0.011943,0.02788,0.023567,0.084054,0.172061,0.013392,0.149329,0.088998,0.060395
mom_reward_avg,0.147133,0.147133,-0.131456,-0.055309,-0.040133,-0.034952,0.118969,-0.018074,-0.071583,0.200328
mom_reward_min,0.206509,0.206509,-0.124126,-0.076257,-0.083539,-0.084982,0.148693,-0.059025,-0.100154,0.210528
mom_reward_max,,,,,,,,,,


## Adaptive weighted threshold

In [509]:
df_weighted_thresh = orig_df[['cell_id']].copy()

df_weighted_thresh[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_weighted_thresh_sum, x)))
df_weighted_thresh

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,0.166667,0.166667,1.000000,0.333333,1.500000,0.833333,0.0,0.833333,6.000000,1.000000
1,22944,0.166667,0.166667,2.166667,0.333333,1.500000,0.833333,0.0,0.833333,9.500000,0.000000
2,26335,0.000000,0.000000,1.000000,0.333333,1.500000,0.833333,0.0,0.666667,5.500000,2.166667
3,26332,0.000000,0.000000,1.000000,0.333333,1.500000,0.666667,0.0,0.333333,5.833333,1.000000
4,26336,0.166667,0.166667,1.000000,0.333333,1.166667,0.500000,0.0,0.666667,6.000000,0.333333
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,0.166667,0.166667,0.666667,0.000000,0.166667,0.166667,0.0,0.166667,3.500000,1.000000
990,5682,0.166667,0.166667,0.666667,0.000000,0.166667,0.166667,0.0,0.166667,3.166667,4.166667
991,12772,0.500000,0.500000,0.333333,0.000000,0.500000,0.333333,0.0,0.333333,3.500000,3.000000
992,12771,0.500000,0.500000,0.166667,0.000000,0.000000,0.000000,0.0,0.000000,2.666667,1.000000


In [510]:
df_weighted_thresh = rewards.merge(df_weighted_thresh, left_on='cell_id', right_on='cell_id', how='inner')
df_weighted_thresh.corr().loc[rows, cols].style.background_gradient(cmap ='coolwarm', vmin=-1, vmax=1, axis=1)

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
quality_avg,0.092192,0.092192,-0.184896,-0.120235,-0.17135,-0.20487,0.031558,-0.199563,-0.220401,-0.02392
quality_min,0.005724,0.005724,-0.049483,-0.06951,-0.074409,-0.09658,-0.011101,-0.108475,-0.079303,-0.022389
quality_max,0.121829,0.121829,-0.391141,-0.181834,-0.299173,-0.389244,0.044881,-0.40256,-0.420961,0.043639
quality_std,-0.002345,-0.002345,0.034835,0.057188,0.060908,0.073123,0.009476,0.087751,0.070194,0.0219
cum_reward_avg,-0.070348,-0.070348,0.002097,0.001236,-0.004789,0.016436,-0.019697,-0.024301,-0.006032,-0.001784
cum_reward_max,-0.113418,-0.113418,0.039685,0.009578,0.02433,0.067042,-0.046494,0.021993,0.035056,-0.011192
cum_reward_std,-0.174652,-0.174652,0.09863,0.021328,0.072157,0.144894,-0.09301,0.104316,0.103111,-0.032927
mom_reward_avg,-0.034209,-0.034209,-0.040757,-0.024502,-0.045051,-0.037606,0.013528,-0.064343,-0.057509,-0.003617
mom_reward_min,0.020843,0.020843,-0.080335,-0.059394,-0.084429,-0.089299,0.016047,-0.11192,-0.091126,0.023451
mom_reward_max,,,,,,,,,,


# Get data for learning

In [185]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.cross_decomposition import PLSRegression
from sklearn.model_selection import train_test_split
from joblib import dump, load
import os.path as osp
import numpy as np

In [153]:
def get_by_feature_score(x: str) -> Dict[str, any]:
    js = json.loads(x)

    return {f: js[f]['drift_score'] for f,v in js.items()}

In [368]:
def get_weighted_poly_aggregation(x: str, fi: Dict, method: Callable = lambda x: x) -> Dict:
    js = json.loads(x)

    return method({f: js[f]['drift_score'] * fi.get(f, 1) for f,v in js.items()})


In [369]:
def get_weighted_poly_thresh_agg(x: str, fi: Dict, method: Callable = lambda x: x) -> Dict:
    js = json.loads(x)

    return method({f: js[f]['drift_score'] * fi.get(f, 1) // js[f]['stattest_threshold'] for f,v in js.items()})


In [370]:
get_weighted_poly = partial(get_weighted_poly_aggregation, fi=fi, method=lambda x: x)
get_weighted_poly.__name__ = 'get_weighted_poly'

get_weighted_thresh_poly = partial(get_weighted_poly_thresh_agg, fi=fi, method=lambda x: x)
get_weighted_thresh_poly.__name__ = 'get_weighted_thresh_poly'

get_thresh_poly = partial(get_weighted_poly_thresh_agg, fi={}, method=lambda x: x)
get_thresh_poly.__name__ = 'get_thresh_poly'

In [371]:
def get_models_results(df: pd.DataFrame,
                       rewards_df: pd.DataFrame,
                       features_l: List[str],
                       target: str,
                       stat_ts: List[str],
                       save_path: str,
                       data_func: Callable) -> pd.DataFrame:
    df_4_train = df[['cell_id']].copy()
    df_4_train[stat_ts] = df[stat_ts].apply(lambda x: list(map(data_func, x)))

    results = []

    for st in stat_ts:
        train_df = pd.DataFrame(df_4_train[st].tolist(), index=df_4_train['cell_id'])
        train_df = train_df.merge(rewards_df[['cell_id', target]], left_on='cell_id', right_on='cell_id', how='inner')

        X_train, X_test, y_train, y_test = train_test_split(train_df[features_l], train_df[target], test_size=0.3, random_state=3407)

        # Linear Regression
        model_lr = LinearRegression()
        model_lr.fit(X_train, y_train)
        lr_train_score = model_lr.score(X_train, y_train)
        lr_test_score = model_lr.score(X_test, y_test)
        # dumping model
        lr_path = osp.join(save_path, f'lr_{st}.joblib')
        dump(model_lr, lr_path)

        # Polynomial
        transformer = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)
        X_train_polynom = transformer.fit_transform(X_train)
        X_test_polynom = transformer.transform(X_test)
        model_pol_lr = LinearRegression().fit(X_train_polynom, y_train)
        pr_train_score = model_pol_lr.score(X_train_polynom, y_train)
        pr_test_score = model_pol_lr.score(X_test_polynom, y_test)
        # dumping model
        pol_lr_path = osp.join(save_path, f'pol_lr_{st}.joblib')
        trans_path = osp.join(save_path, f'transformer_{st}.joblib')
        dump(model_pol_lr, pol_lr_path)
        dump(transformer, trans_path)


        # save results
        results.append((st,
                        lr_train_score, lr_test_score,
                        pr_train_score, pr_test_score,
                        lr_path,
                        pol_lr_path,
                        trans_path))

    res_df = pd.DataFrame(results, columns=['stat_test',
                                            'lr_train_score', 'lr_test_score',
                                            'pol_lr_train_score', 'pol_lr_test_score',
                                            'lr_path',
                                            'pol_lr_path',
                                            'trans_path'])

    return res_df

In [159]:
features = list(fi.keys())
features

['HR Usage Rate',
 'TCH Blocking Rate, BH',
 'Number of Available\nTCH',
 'TCH Traffic (Erl), BH',
 'Param 1',
 'Param 2']

In [160]:
rewards.columns


Index(['cell_id', 'drift_score', 'quality_avg', 'quality_min', 'quality_max',
       'quality_std', 'cum_reward_avg', 'cum_reward_max', 'cum_reward_std',
       'mom_reward_avg', 'mom_reward_min', 'mom_reward_max', 'mom_reward_std'],
      dtype='object')

In [166]:
label = 'quality_avg'

In [372]:
path = 'data/generated/models/one_drift_0'
get_models_results(orig_df, rewards, features, label, stat_tests, path, get_by_feature_score)

Unnamed: 0,stat_test,lr_train_score,lr_test_score,pol_lr_train_score,pol_lr_test_score,lr_path,pol_lr_path,trans_path
0,default,0.688865,0.731751,0.801007,0.747313,data/generated/models/one_drift_0/lr_default.j...,data/generated/models/one_drift_0/pol_lr_defau...,data/generated/models/one_drift_0/transformer_...
1,ks,0.033693,-0.248335,0.034097,-0.277876,data/generated/models/one_drift_0/lr_ks.joblib,data/generated/models/one_drift_0/pol_lr_ks.jo...,data/generated/models/one_drift_0/transformer_...
2,wasserstein,0.688865,0.731751,0.801007,0.747313,data/generated/models/one_drift_0/lr_wasserste...,data/generated/models/one_drift_0/pol_lr_wasse...,data/generated/models/one_drift_0/transformer_...
3,kl_div,0.634255,0.712781,0.660252,0.619108,data/generated/models/one_drift_0/lr_kl_div.jo...,data/generated/models/one_drift_0/pol_lr_kl_di...,data/generated/models/one_drift_0/transformer_...
4,psi,0.656159,0.736471,0.682675,0.686402,data/generated/models/one_drift_0/lr_psi.joblib,data/generated/models/one_drift_0/pol_lr_psi.j...,data/generated/models/one_drift_0/transformer_...
5,jensenshannon,0.688177,0.78868,0.724999,0.780046,data/generated/models/one_drift_0/lr_jensensha...,data/generated/models/one_drift_0/pol_lr_jense...,data/generated/models/one_drift_0/transformer_...
6,cramer_von_mises,0.035772,-0.016107,0.074289,-0.002753,data/generated/models/one_drift_0/lr_cramer_vo...,data/generated/models/one_drift_0/pol_lr_crame...,data/generated/models/one_drift_0/transformer_...
7,hellinger,0.590323,0.65202,0.608425,0.657898,data/generated/models/one_drift_0/lr_hellinger...,data/generated/models/one_drift_0/pol_lr_helli...,data/generated/models/one_drift_0/transformer_...
8,ed,0.755561,0.698535,0.772892,0.71446,data/generated/models/one_drift_0/lr_ed.joblib,data/generated/models/one_drift_0/pol_lr_ed.jo...,data/generated/models/one_drift_0/transformer_...
9,t_test,0.145621,0.010137,0.159913,-0.187565,data/generated/models/one_drift_0/lr_t_test.jo...,data/generated/models/one_drift_0/pol_lr_t_tes...,data/generated/models/one_drift_0/transformer_...


## Getting one drift all run

In [199]:
fi

{'HR Usage Rate': 1.0,
 'TCH Blocking Rate, BH': 0.05306688095919128,
 'Number of Available\nTCH': 0.9145249190288962,
 'TCH Traffic (Erl), BH': 0.48950080102867455,
 'Param 1': 0.934843836886342,
 'Param 2': 0.8525678849265464}

In [200]:
stat_tests

['default',
 'ks',
 'wasserstein',
 'kl_div',
 'psi',
 'jensenshannon',
 'cramer_von_mises',
 'hellinger',
 'ed',
 't_test']

In [373]:
datasets_list = [('not-sampled_no-ref-window', 'data/generated/drift/by_cell_agent/run_7/by_train_regressive__sampled-drift-None_no-window_.csv'),
             ('sampled_no-ref-window', 'data/generated/drift/by_cell_agent/run_8/by_train_regressive__sampled-drift-1000_no-window_.csv'),
             ('not-sampled_ref-window-1k', 'data/generated/drift/by_cell_agent/run_9/by_train_regressive_sampled_ref_sampled_drift_None_window_1k_.csv'),
             ('sampled_ref-window-1k', 'data/generated/drift/by_cell_agent/run_10/by_train_regressive_sampled_ref_sampled_drift_1000_window_1k_.csv')]
path = 'data/generated/models/one_drift_1'
label = 'quality_avg'
features = ['HR Usage Rate',
 'TCH Blocking Rate, BH',
 'Number of Available\nTCH',
 'TCH Traffic (Erl), BH',
 'Param 1',
 'Param 2']
one_drift_res = pd.DataFrame()

for dataset_name, dataset_path in datasets_list:
    dataset = pd.read_csv(dataset_path, index_col=0)

    for form_data_func in [get_by_feature_score, get_weighted_poly, get_thresh_poly, get_weighted_thresh_poly]:
        data_type_name = form_data_func.__name__

        # regress_df = dataset[['cell_id']].copy()
        # regress_df[stat_tests] = dataset[stat_tests].apply(lambda x: list(map(form_data_func, x)))
        # creating path to save data
        saving_path = osp.join(path, dataset_name, data_type_name)
        Path(saving_path).mkdir(exist_ok=True, parents=True)

        res = get_models_results(dataset, rewards, features, label, stat_tests, saving_path, form_data_func)
        res['dataset_name'] = [dataset_name] * len(res)
        res['data_type_name'] = [data_type_name] * len(res)

        one_drift_res = pd.concat([one_drift_res, res])

one_drift_res.to_csv(osp.join(path, 'one_drift_res.csv'))
one_drift_res

Unnamed: 0,stat_test,lr_train_score,lr_test_score,pol_lr_train_score,pol_lr_test_score,lr_path,pol_lr_path,trans_path,dataset_name,data_type_name
0,default,0.688865,0.731751,0.801007,0.747313,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
1,ks,0.033693,-0.248335,0.034097,-0.277876,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
2,wasserstein,0.688865,0.731751,0.801007,0.747313,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
3,kl_div,0.634255,0.712781,0.660252,0.619108,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
4,psi,0.656159,0.736471,0.682675,0.686402,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
...,...,...,...,...,...,...,...,...,...,...
5,jensenshannon,0.078874,0.044337,0.106421,0.057136,data/generated/models/one_drift_1/sampled_ref-...,data/generated/models/one_drift_1/sampled_ref-...,data/generated/models/one_drift_1/sampled_ref-...,sampled_ref-window-1k,get_weighted_thresh_poly
6,cramer_von_mises,0.001684,-0.018597,0.001684,-0.018597,data/generated/models/one_drift_1/sampled_ref-...,data/generated/models/one_drift_1/sampled_ref-...,data/generated/models/one_drift_1/sampled_ref-...,sampled_ref-window-1k,get_weighted_thresh_poly
7,hellinger,0.067041,0.057086,0.095648,0.040025,data/generated/models/one_drift_1/sampled_ref-...,data/generated/models/one_drift_1/sampled_ref-...,data/generated/models/one_drift_1/sampled_ref-...,sampled_ref-window-1k,get_weighted_thresh_poly
8,ed,0.054962,0.052217,0.097394,0.069285,data/generated/models/one_drift_1/sampled_ref-...,data/generated/models/one_drift_1/sampled_ref-...,data/generated/models/one_drift_1/sampled_ref-...,sampled_ref-window-1k,get_weighted_thresh_poly


In [513]:
one_drift_res.groupby(by=['dataset_name', 'data_type_name'], ).max()

Unnamed: 0_level_0,Unnamed: 1_level_0,stat_test,lr_train_score,lr_test_score,pol_lr_train_score,pol_lr_test_score,lr_path,pol_lr_path,trans_path
dataset_name,data_type_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
not-sampled_no-ref-window,get_by_feature_score,wasserstein,0.755561,0.78868,0.801007,0.780046,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...
not-sampled_no-ref-window,get_thresh_poly,wasserstein,0.753709,0.749779,0.811042,0.758648,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...
not-sampled_no-ref-window,get_weighted_poly,wasserstein,0.755561,0.78868,0.801007,0.780046,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...
not-sampled_no-ref-window,get_weighted_thresh_poly,wasserstein,0.241575,0.076635,0.29854,0.091618,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...
not-sampled_ref-window-1k,get_by_feature_score,wasserstein,0.73601,0.67295,0.75547,0.688277,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...
not-sampled_ref-window-1k,get_thresh_poly,wasserstein,0.623027,0.591182,0.646358,0.599114,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...
not-sampled_ref-window-1k,get_weighted_poly,wasserstein,0.73601,0.67295,0.75547,0.688277,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...
not-sampled_ref-window-1k,get_weighted_thresh_poly,wasserstein,0.142977,0.154185,0.148519,0.1511,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...
sampled_no-ref-window,get_by_feature_score,wasserstein,0.744106,0.697483,0.757915,0.704923,data/generated/models/one_drift_1/sampled_no-r...,data/generated/models/one_drift_1/sampled_no-r...,data/generated/models/one_drift_1/sampled_no-r...
sampled_no-ref-window,get_thresh_poly,wasserstein,0.72846,0.680858,0.740027,0.688093,data/generated/models/one_drift_1/sampled_no-r...,data/generated/models/one_drift_1/sampled_no-r...,data/generated/models/one_drift_1/sampled_no-r...


In [421]:
ind_cols = ['dataset_name', 'data_type_name']
not_ind_cols = [c for c in one_drift_res.columns if c not in ind_cols]
one_drift_res_agg = pd.DataFrame(columns=not_ind_cols, index=one_drift_res.groupby(by=ind_cols, ).max().index)

for ind in one_drift_res_agg.index:
    sub = one_drift_res[(one_drift_res[ind_cols[0]] == ind[0]) & (one_drift_res[ind_cols[1]] == ind[1])].drop_duplicates(
        subset=['lr_test_score'], keep='last'
    )
    if list(sub.stat_test.unique()) != [s for s in stat_tests if s != 'default']:
        print(ind, 'has not all tests: ', list(sub.stat_test.unique()))

    lr_max_ind, pol_max_ind = sub[['lr_test_score', 'pol_lr_test_score']].idxmax()
    if lr_max_ind != pol_max_ind:
        if sub.loc[lr_max_ind, 'lr_test_score'] - sub.loc[pol_max_ind, 'lr_test_score'
        ] >= sub.loc[pol_max_ind, 'pol_lr_test_score'] - sub.loc[lr_max_ind, 'pol_lr_test_score']:
            lr_max_ind = lr_max_ind
        else:
            lr_max_ind = pol_max_ind

    one_drift_res_agg.loc[ind] = sub.loc[lr_max_ind, not_ind_cols]

one_drift_res_agg

Unnamed: 0_level_0,Unnamed: 1_level_0,stat_test,lr_train_score,lr_test_score,pol_lr_train_score,pol_lr_test_score,lr_path,pol_lr_path,trans_path
dataset_name,data_type_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
not-sampled_no-ref-window,get_by_feature_score,jensenshannon,0.688177,0.78868,0.724999,0.780046,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...
not-sampled_no-ref-window,get_thresh_poly,wasserstein,0.708998,0.749779,0.811042,0.758648,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...
not-sampled_no-ref-window,get_weighted_poly,jensenshannon,0.688177,0.78868,0.724999,0.780046,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...
not-sampled_no-ref-window,get_weighted_thresh_poly,ed,0.059928,0.066713,0.099287,0.091618,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...
not-sampled_ref-window-1k,get_by_feature_score,ed,0.73601,0.67295,0.75547,0.688277,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...
not-sampled_ref-window-1k,get_thresh_poly,ed,0.623027,0.591182,0.646358,0.599114,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...
not-sampled_ref-window-1k,get_weighted_poly,ed,0.73601,0.67295,0.75547,0.688277,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...
not-sampled_ref-window-1k,get_weighted_thresh_poly,ks,0.142977,0.154185,0.148519,0.1511,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...
sampled_no-ref-window,get_by_feature_score,ed,0.744106,0.697483,0.757915,0.704923,data/generated/models/one_drift_1/sampled_no-r...,data/generated/models/one_drift_1/sampled_no-r...,data/generated/models/one_drift_1/sampled_no-r...
sampled_no-ref-window,get_thresh_poly,ed,0.72846,0.680858,0.740027,0.688093,data/generated/models/one_drift_1/sampled_no-r...,data/generated/models/one_drift_1/sampled_no-r...,data/generated/models/one_drift_1/sampled_no-r...


In [520]:
one_drift_res_agg[[c for c in one_drift_res_agg.columns if 'path' not in c.lower()]].style.background_gradient(subset=['lr_train_score', 'lr_test_score', 'pol_lr_train_score', 'pol_lr_test_score'], cmap ='coolwarm', vmin=-1, vmax=1, axis=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,stat_test,lr_train_score,lr_test_score,pol_lr_train_score,pol_lr_test_score
dataset_name,data_type_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
not-sampled_no-ref-window,get_by_feature_score,jensenshannon,0.688177,0.78868,0.724999,0.780046
not-sampled_no-ref-window,get_thresh_poly,wasserstein,0.708998,0.749779,0.811042,0.758648
not-sampled_no-ref-window,get_weighted_poly,jensenshannon,0.688177,0.78868,0.724999,0.780046
not-sampled_no-ref-window,get_weighted_thresh_poly,ed,0.059928,0.066713,0.099287,0.091618
not-sampled_ref-window-1k,get_by_feature_score,ed,0.73601,0.67295,0.75547,0.688277
not-sampled_ref-window-1k,get_thresh_poly,ed,0.623027,0.591182,0.646358,0.599114
not-sampled_ref-window-1k,get_weighted_poly,ed,0.73601,0.67295,0.75547,0.688277
not-sampled_ref-window-1k,get_weighted_thresh_poly,ks,0.142977,0.154185,0.148519,0.1511
sampled_no-ref-window,get_by_feature_score,ed,0.744106,0.697483,0.757915,0.704923
sampled_no-ref-window,get_thresh_poly,ed,0.72846,0.680858,0.740027,0.688093


In [414]:
one_drift_res[(one_drift_res.dataset_name == 'not-sampled_no-ref-window') & (one_drift_res.data_type_name == 'get_by_feature_score')]

Unnamed: 0,stat_test,lr_train_score,lr_test_score,pol_lr_train_score,pol_lr_test_score,lr_path,pol_lr_path,trans_path,dataset_name,data_type_name
0,default,0.688865,0.731751,0.801007,0.747313,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
1,ks,0.033693,-0.248335,0.034097,-0.277876,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
2,wasserstein,0.688865,0.731751,0.801007,0.747313,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
3,kl_div,0.634255,0.712781,0.660252,0.619108,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
4,psi,0.656159,0.736471,0.682675,0.686402,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
5,jensenshannon,0.688177,0.78868,0.724999,0.780046,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
6,cramer_von_mises,0.035772,-0.016107,0.074289,-0.002753,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
7,hellinger,0.590323,0.65202,0.608425,0.657898,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
8,ed,0.755561,0.698535,0.772892,0.71446,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
9,t_test,0.145621,0.010137,0.159913,-0.187565,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score


In [417]:
one_drift_res[(one_drift_res.dataset_name == 'not-sampled_no-ref-window')].drop_duplicates(subset=['lr_test_score'], keep='last')

Unnamed: 0,stat_test,lr_train_score,lr_test_score,pol_lr_train_score,pol_lr_test_score,lr_path,pol_lr_path,trans_path,dataset_name,data_type_name
0,default,0.688865,0.731751,0.801007,0.747313,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
1,ks,0.033693,-0.248335,0.034097,-0.277876,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
2,wasserstein,0.688865,0.731751,0.801007,0.747313,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
3,kl_div,0.634255,0.712781,0.660252,0.619108,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
4,psi,0.656159,0.736471,0.682675,0.686402,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
5,jensenshannon,0.688177,0.78868,0.724999,0.780046,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
6,cramer_von_mises,0.035772,-0.016107,0.074289,-0.002753,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
7,hellinger,0.590323,0.65202,0.608425,0.657898,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
8,ed,0.755561,0.698535,0.772892,0.71446,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
9,t_test,0.145621,0.010137,0.159913,-0.187565,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score


In [397]:
one_drift_res[(one_drift_res.dataset_name == 'not-sampled_no-ref-window') & (one_drift_res.data_type_name == 'get_by_feature_score')].idxmax()

stat_test             2
lr_train_score        8
lr_test_score         5
pol_lr_train_score    0
pol_lr_test_score     5
lr_path               2
pol_lr_path           2
trans_path            2
dataset_name          0
data_type_name        0
dtype: int64

In [394]:
one_drift_res[(one_drift_res.dataset_name == 'not-sampled_no-ref-window') & (one_drift_res.data_type_name == 'get_by_feature_score')]

Unnamed: 0,stat_test,lr_train_score,lr_test_score,pol_lr_train_score,pol_lr_test_score,lr_path,pol_lr_path,trans_path,dataset_name,data_type_name
0,default,0.688865,0.731751,0.801007,0.747313,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
1,ks,0.033693,-0.248335,0.034097,-0.277876,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
2,wasserstein,0.688865,0.731751,0.801007,0.747313,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
3,kl_div,0.634255,0.712781,0.660252,0.619108,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
4,psi,0.656159,0.736471,0.682675,0.686402,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
5,jensenshannon,0.688177,0.78868,0.724999,0.780046,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
6,cramer_von_mises,0.035772,-0.016107,0.074289,-0.002753,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
7,hellinger,0.590323,0.65202,0.608425,0.657898,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
8,ed,0.755561,0.698535,0.772892,0.71446,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
9,t_test,0.145621,0.010137,0.159913,-0.187565,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score


## Getting multi test

In [317]:
orig_df[stat_tests].apply(lambda x: list(map(get_weighted_thresh_poly, x)))

Unnamed: 0,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,"{'HR Usage Rate': 11.0, 'Number of Available T...","{'HR Usage Rate': 0.0, 'Number of Available TC...","{'HR Usage Rate': 11.0, 'Number of Available T...","{'HR Usage Rate': 37.0, 'Number of Available T...","{'HR Usage Rate': 48.0, 'Number of Available T...","{'HR Usage Rate': 5.0, 'Number of Available TC...","{'HR Usage Rate': 0.0, 'Number of Available TC...","{'HR Usage Rate': 6.0, 'Number of Available TC...","{'HR Usage Rate': 60.0, 'Number of Available T...","{'HR Usage Rate': 0.0, 'Number of Available TC..."
1,"{'HR Usage Rate': 12.0, 'Number of Available T...","{'HR Usage Rate': 0.0, 'Number of Available TC...","{'HR Usage Rate': 12.0, 'Number of Available T...","{'HR Usage Rate': 41.0, 'Number of Available T...","{'HR Usage Rate': 56.0, 'Number of Available T...","{'HR Usage Rate': 6.0, 'Number of Available TC...","{'HR Usage Rate': 0.0, 'Number of Available TC...","{'HR Usage Rate': 7.0, 'Number of Available TC...","{'HR Usage Rate': 66.0, 'Number of Available T...","{'HR Usage Rate': 0.0, 'Number of Available TC..."
2,"{'HR Usage Rate': 9.0, 'Number of Available TC...","{'HR Usage Rate': 0.0, 'Number of Available TC...","{'HR Usage Rate': 9.0, 'Number of Available TC...","{'HR Usage Rate': 27.0, 'Number of Available T...","{'HR Usage Rate': 40.0, 'Number of Available T...","{'HR Usage Rate': 5.0, 'Number of Available TC...","{'HR Usage Rate': 0.0, 'Number of Available TC...","{'HR Usage Rate': 6.0, 'Number of Available TC...","{'HR Usage Rate': 47.0, 'Number of Available T...","{'HR Usage Rate': 0.0, 'Number of Available TC..."
3,"{'HR Usage Rate': 5.0, 'Number of Available TC...","{'HR Usage Rate': 0.0, 'Number of Available TC...","{'HR Usage Rate': 5.0, 'Number of Available TC...","{'HR Usage Rate': 24.0, 'Number of Available T...","{'HR Usage Rate': 31.0, 'Number of Available T...","{'HR Usage Rate': 4.0, 'Number of Available TC...","{'HR Usage Rate': 0.0, 'Number of Available TC...","{'HR Usage Rate': 5.0, 'Number of Available TC...","{'HR Usage Rate': 27.0, 'Number of Available T...","{'HR Usage Rate': 0.0, 'Number of Available TC..."
4,"{'HR Usage Rate': 10.0, 'Number of Available T...","{'HR Usage Rate': 0.0, 'Number of Available TC...","{'HR Usage Rate': 10.0, 'Number of Available T...","{'HR Usage Rate': 25.0, 'Number of Available T...","{'HR Usage Rate': 37.0, 'Number of Available T...","{'HR Usage Rate': 5.0, 'Number of Available TC...","{'HR Usage Rate': 0.0, 'Number of Available TC...","{'HR Usage Rate': 5.0, 'Number of Available TC...","{'HR Usage Rate': 54.0, 'Number of Available T...","{'HR Usage Rate': 0.0, 'Number of Available TC..."
...,...,...,...,...,...,...,...,...,...,...
989,"{'HR Usage Rate': 9.0, 'Number of Available TC...","{'HR Usage Rate': 0.0, 'Number of Available TC...","{'HR Usage Rate': 9.0, 'Number of Available TC...","{'HR Usage Rate': 43.0, 'Number of Available T...","{'HR Usage Rate': 71.0, 'Number of Available T...","{'HR Usage Rate': 7.0, 'Number of Available TC...","{'HR Usage Rate': 0.0, 'Number of Available TC...","{'HR Usage Rate': 7.0, 'Number of Available TC...","{'HR Usage Rate': 50.0, 'Number of Available T...","{'HR Usage Rate': 0.0, 'Number of Available TC..."
990,"{'HR Usage Rate': 8.0, 'Number of Available TC...","{'HR Usage Rate': 0.0, 'Number of Available TC...","{'HR Usage Rate': 8.0, 'Number of Available TC...","{'HR Usage Rate': 50.0, 'Number of Available T...","{'HR Usage Rate': 80.0, 'Number of Available T...","{'HR Usage Rate': 7.0, 'Number of Available TC...","{'HR Usage Rate': 0.0, 'Number of Available TC...","{'HR Usage Rate': 7.0, 'Number of Available TC...","{'HR Usage Rate': 41.0, 'Number of Available T...","{'HR Usage Rate': 7.0, 'Number of Available TC..."
991,"{'HR Usage Rate': 12.0, 'Number of Available T...","{'HR Usage Rate': 0.0, 'Number of Available TC...","{'HR Usage Rate': 12.0, 'Number of Available T...","{'HR Usage Rate': 40.0, 'Number of Available T...","{'HR Usage Rate': 54.0, 'Number of Available T...","{'HR Usage Rate': 6.0, 'Number of Available TC...","{'HR Usage Rate': 0.0, 'Number of Available TC...","{'HR Usage Rate': 6.0, 'Number of Available TC...","{'HR Usage Rate': 63.0, 'Number of Available T...","{'HR Usage Rate': 0.0, 'Number of Available TC..."
992,"{'HR Usage Rate': 7.0, 'Number of Available TC...","{'HR Usage Rate': 0.0, 'Number of Available TC...","{'HR Usage Rate': 7.0, 'Number of Available TC...","{'HR Usage Rate': 31.0, 'Number of Available T...","{'HR Usage Rate': 41.0, 'Number of Available T...","{'HR Usage Rate': 5.0, 'Number of Available TC...","{'HR Usage Rate': 0.0, 'Number of Available TC...","{'HR Usage Rate': 5.0, 'Number of Available TC...","{'HR Usage Rate': 37.0, 'Number of Available T...","{'HR Usage Rate': 0.0, 'Number of Available TC..."


In [548]:
def get_models_poly_drift_results(df: pd.DataFrame,
                       rewards_df: pd.DataFrame,
                       features_l: List[str],
                       target: str,
                       stat_ts: List[str],
                       save_path: str, get_func: Callable) -> pd.DataFrame:
    df_4_train = df[['cell_id']].copy()
    df_4_train[stat_ts] = df[stat_ts].apply(lambda x: list(map(get_func, x)))
    f_size = len(features_l)

    results = []
    train_df = pd.DataFrame(index=df_4_train['cell_id'])

    for st in stat_ts:
        temp_df = pd.DataFrame(df_4_train[st].tolist(), index=df_4_train['cell_id'])
        # print(f'[{st}] /t train columns: ', train_df.columns)
        train_df = pd.merge(train_df, temp_df, left_on='cell_id', right_on='cell_id', how='inner', suffixes=('', '_' + st))

    train_df = train_df.merge(rewards_df[['cell_id', target]], left_on='cell_id', right_on='cell_id', how='inner')
    features_l = [f for f in train_df.columns if f not in [target, 'cell_id']]
    print(features_l)
    # print(train_df)
    X_train, X_test, y_train, y_test = train_test_split(train_df[features_l], train_df[target], test_size=0.3, random_state=3407)
    print('X_tr: ', X_train.shape, 'y_te: ', y_test.shape)

    # Linear Regression
    model_lr = LinearRegression()
    model_lr.fit(X_train, y_train)
    lr_train_score = model_lr.score(X_train, y_train)
    lr_test_score = model_lr.score(X_test, y_test)
    # dumping model
    lr_path = osp.join(save_path, f'lr_all_stats.joblib')
    dump(model_lr, lr_path)

    # Polynomial
    transformer = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)
    X_train_polynom = transformer.fit_transform(X_train)
    X_test_polynom = transformer.transform(X_test)
    model_pol_lr = LinearRegression().fit(X_train_polynom, y_train)
    pr_train_score = model_pol_lr.score(X_train_polynom, y_train)
    pr_test_score = model_pol_lr.score(X_test_polynom, y_test)
    # dumping model
    pol_lr_path = osp.join(save_path, f'pol_lr_all_stats.joblib')
    trans_path = osp.join(save_path, f'transformer_all_stats.joblib')
    dump(model_pol_lr, pol_lr_path)
    dump(transformer, trans_path)

    # Cross Decomposition
    reg = PLSRegression(n_components=f_size)
    reg.fit(X_train, y_train.to_numpy().ravel())
    plsr_train_score = reg.score(X_train, y_train.to_numpy().ravel())
    plsr_test_score = reg.score(X_test, y_test.to_numpy().ravel())
    # dumping model
    plsr_path = osp.join(save_path, f'plsr_all_stats.joblib')
    dump(reg, plsr_path)

    # save results
    results.append((lr_train_score, lr_test_score,
                    pr_train_score, pr_test_score,
                    plsr_train_score, plsr_test_score,
                    lr_path,
                    pol_lr_path,
                    trans_path,
                    plsr_path))

    res_df = pd.DataFrame(results, columns=['lr_train_score', 'lr_test_score',
                                            'pol_lr_train_score', 'pol_lr_test_score',
                                            'plsr_train_score', 'plsr_test_score',
                                            'lr_path',
                                            'pol_lr_path',
                                            'trans_path',
                                            'plsr_path'])

    return res_df

In [432]:
path = '/home/rid/Projects/Study/Magister/Dyploma/Bachelor/data/generated/models/poly_drift_0'
get_models_poly_drift_results(orig_df, rewards, features, label, stat_tests, path, get_by_feature_score)

X_tr:  (695, 60) y_te:  (299,)


Unnamed: 0,lr_train_score,lr_test_score,pol_lr_train_score,pol_lr_test_score,plsr_train_score,plsr_test_score,lr_path,pol_lr_path,trans_path,plsr_path
0,0.92764,0.764802,1.0,-13.398087,0.901254,0.841816,/home/rid/Projects/Study/Magister/Dyploma/Bach...,/home/rid/Projects/Study/Magister/Dyploma/Bach...,/home/rid/Projects/Study/Magister/Dyploma/Bach...,/home/rid/Projects/Study/Magister/Dyploma/Bach...


### poly drift all run

In [549]:
datasets_list = [('not-sampled_no-ref-window', 'data/generated/drift/by_cell_agent/run_7/by_train_regressive__sampled-drift-None_no-window_.csv'),
             ('sampled_no-ref-window', 'data/generated/drift/by_cell_agent/run_8/by_train_regressive__sampled-drift-1000_no-window_.csv'),
             ('not-sampled_ref-window-1k', 'data/generated/drift/by_cell_agent/run_9/by_train_regressive_sampled_ref_sampled_drift_None_window_1k_.csv'),
             ('sampled_ref-window-1k', 'data/generated/drift/by_cell_agent/run_10/by_train_regressive_sampled_ref_sampled_drift_1000_window_1k_.csv')]
path = 'data/generated/models/poly_drift_1'
label = 'quality_avg'
features = ['HR Usage Rate',
 'TCH Blocking Rate, BH',
 'Number of Available\nTCH',
 'TCH Traffic (Erl), BH',
 'Param 1',
 'Param 2']
poly_drift_res = pd.DataFrame()

for dataset_name, dataset_path in datasets_list:
    dataset = pd.read_csv(dataset_path, index_col=0)

    for form_data_func in [get_by_feature_score, get_weighted_poly, get_thresh_poly, get_weighted_thresh_poly]:
        data_type_name = form_data_func.__name__

        # regress_df = dataset[['cell_id']].copy()
        # regress_df[stat_tests] = dataset[stat_tests].apply(lambda x: list(map(form_data_func, x)))
        # creating path to save data
        saving_path = osp.join(path, dataset_name, data_type_name)
        Path(saving_path).mkdir(exist_ok=True, parents=True)

        res = get_models_poly_drift_results(dataset.copy(), rewards, features, label, stat_tests, saving_path, form_data_func)
        res['dataset_name'] = [dataset_name] * len(res)
        res['data_type_name'] = [data_type_name] * len(res)

        poly_drift_res = pd.concat([poly_drift_res, res])

poly_drift_res.to_csv(osp.join(path, 'poly_drift_res.csv'))
poly_drift_res

['HR Usage Rate', 'Number of Available\nTCH', 'Param 1', 'Param 2', 'TCH Blocking Rate, BH', 'TCH Traffic (Erl), BH', 'HR Usage Rate_ks', 'Number of Available\nTCH_ks', 'Param 1_ks', 'Param 2_ks', 'TCH Blocking Rate, BH_ks', 'TCH Traffic (Erl), BH_ks', 'HR Usage Rate_wasserstein', 'Number of Available\nTCH_wasserstein', 'Param 1_wasserstein', 'Param 2_wasserstein', 'TCH Blocking Rate, BH_wasserstein', 'TCH Traffic (Erl), BH_wasserstein', 'HR Usage Rate_kl_div', 'Number of Available\nTCH_kl_div', 'Param 1_kl_div', 'Param 2_kl_div', 'TCH Blocking Rate, BH_kl_div', 'TCH Traffic (Erl), BH_kl_div', 'HR Usage Rate_psi', 'Number of Available\nTCH_psi', 'Param 1_psi', 'Param 2_psi', 'TCH Blocking Rate, BH_psi', 'TCH Traffic (Erl), BH_psi', 'HR Usage Rate_jensenshannon', 'Number of Available\nTCH_jensenshannon', 'Param 1_jensenshannon', 'Param 2_jensenshannon', 'TCH Blocking Rate, BH_jensenshannon', 'TCH Traffic (Erl), BH_jensenshannon', 'HR Usage Rate_cramer_von_mises', 'Number of Available\nT

Unnamed: 0,lr_train_score,lr_test_score,pol_lr_train_score,pol_lr_test_score,plsr_train_score,plsr_test_score,lr_path,pol_lr_path,trans_path,plsr_path,dataset_name,data_type_name
0,0.92764,0.764802,1.0,-13.398087,0.901254,0.841816,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,not-sampled_no-ref-window,get_by_feature_score
0,0.927645,0.7628048,1.0,-104.622892,0.901254,0.841816,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,not-sampled_no-ref-window,get_weighted_poly
0,0.885123,0.8443703,1.0,-5.734349,0.869359,0.853034,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,not-sampled_no-ref-window,get_thresh_poly
0,0.369276,0.1120377,0.986066,-27035.432059,0.30521,0.07817,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,not-sampled_no-ref-window,get_weighted_thresh_poly
0,0.838078,0.6790309,1.0,-20.303241,0.802084,0.590888,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...,sampled_no-ref-window,get_by_feature_score
0,0.838078,0.6781198,1.0,-6.127637,0.802084,0.590888,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...,sampled_no-ref-window,get_weighted_poly
0,0.819654,0.753664,1.0,-3.406349,0.792692,0.745368,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...,sampled_no-ref-window,get_thresh_poly
0,0.478861,0.3384323,1.0,-743.255565,0.440223,0.333753,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...,sampled_no-ref-window,get_weighted_thresh_poly
0,0.875334,0.8183723,1.0,-2.057261,0.83624,0.781687,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,not-sampled_ref-window-1k,get_by_feature_score
0,0.875357,0.8185632,1.0,-4.99394,0.83624,0.781687,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,not-sampled_ref-window-1k,get_weighted_poly


In [511]:
poly_drift_res.groupby(by=['dataset_name', 'data_type_name']).max()

Unnamed: 0_level_0,Unnamed: 1_level_0,lr_train_score,lr_test_score,pol_lr_train_score,pol_lr_test_score,plsr_train_score,plsr_test_score,lr_path,pol_lr_path,trans_path,plsr_path
dataset_name,data_type_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
not-sampled_no-ref-window,get_by_feature_score,0.92764,0.764802,1.0,-13.398087,0.901254,0.841816,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...
not-sampled_no-ref-window,get_thresh_poly,0.885123,0.8443703,1.0,-5.734349,0.869359,0.853034,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...
not-sampled_no-ref-window,get_weighted_poly,0.927645,0.7628048,1.0,-104.622892,0.901254,0.841816,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...
not-sampled_no-ref-window,get_weighted_thresh_poly,0.369276,0.1120377,0.986066,-27035.432059,0.30521,0.07817,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...
not-sampled_ref-window-1k,get_by_feature_score,0.875334,0.8183723,1.0,-2.057261,0.83624,0.781687,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...
not-sampled_ref-window-1k,get_thresh_poly,0.780881,0.6925125,1.0,-20.821386,0.774923,0.704585,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...
not-sampled_ref-window-1k,get_weighted_poly,0.875357,0.8185632,1.0,-4.99394,0.83624,0.781687,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...
not-sampled_ref-window-1k,get_weighted_thresh_poly,0.267007,0.1263856,0.957952,-100.187317,0.253336,0.150204,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...
sampled_no-ref-window,get_by_feature_score,0.838078,0.6790309,1.0,-20.303241,0.802084,0.590888,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...
sampled_no-ref-window,get_thresh_poly,0.819654,0.753664,1.0,-3.406349,0.792692,0.745368,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...


In [512]:
ind_cols = ['dataset_name', 'data_type_name']
not_ind_cols = [c for c in poly_drift_res.columns if c not in ind_cols]
poly_drift_res_agg = pd.DataFrame(columns=not_ind_cols, index=poly_drift_res.groupby(by=ind_cols, ).max().index)

for ind in poly_drift_res_agg.index:
    sub = poly_drift_res[(poly_drift_res[ind_cols[0]] == ind[0]) & (poly_drift_res[ind_cols[1]] == ind[1])].drop_duplicates(
        subset=['lr_test_score'], keep='last'
    )

    lr_max_ind, pol_max_ind = sub[['lr_test_score', 'pol_lr_test_score']].idxmax()
    if lr_max_ind != pol_max_ind:
        if sub.loc[lr_max_ind, 'lr_test_score'] - sub.loc[pol_max_ind, 'lr_test_score'
        ] >= sub.loc[pol_max_ind, 'pol_lr_test_score'] - sub.loc[lr_max_ind, 'pol_lr_test_score']:
            lr_max_ind = lr_max_ind
        else:
            lr_max_ind = pol_max_ind

    poly_drift_res_agg.loc[ind] = sub.loc[lr_max_ind, not_ind_cols]

poly_drift_res_agg

Unnamed: 0_level_0,Unnamed: 1_level_0,lr_train_score,lr_test_score,pol_lr_train_score,pol_lr_test_score,plsr_train_score,plsr_test_score,lr_path,pol_lr_path,trans_path,plsr_path
dataset_name,data_type_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
not-sampled_no-ref-window,get_by_feature_score,0.92764,0.764802,1.0,-13.398087,0.901254,0.841816,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...
not-sampled_no-ref-window,get_thresh_poly,0.885123,0.84437,1.0,-5.734349,0.869359,0.853034,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...
not-sampled_no-ref-window,get_weighted_poly,0.927645,0.762805,1.0,-104.622892,0.901254,0.841816,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...
not-sampled_no-ref-window,get_weighted_thresh_poly,0.369276,0.112038,0.986066,-27035.432059,0.30521,0.07817,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...
not-sampled_ref-window-1k,get_by_feature_score,0.875334,0.818372,1.0,-2.057261,0.83624,0.781687,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...
not-sampled_ref-window-1k,get_thresh_poly,0.780881,0.692513,1.0,-20.821386,0.774923,0.704585,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...
not-sampled_ref-window-1k,get_weighted_poly,0.875357,0.818563,1.0,-4.99394,0.83624,0.781687,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...
not-sampled_ref-window-1k,get_weighted_thresh_poly,0.267007,0.126386,0.957952,-100.187317,0.253336,0.150204,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...,data/generated/models/poly_drift_1/not-sampled...
sampled_no-ref-window,get_by_feature_score,0.838078,0.679031,1.0,-20.303241,0.802084,0.590888,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...
sampled_no-ref-window,get_thresh_poly,0.819654,0.753664,1.0,-3.406349,0.792692,0.745368,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...,data/generated/models/poly_drift_1/sampled_no-...


In [531]:
poly_drift_res_agg[[c for c in poly_drift_res_agg.columns if 'path' not in c]].style.background_gradient(subset=['lr_train_score', 'lr_test_score', 'pol_lr_train_score', 'pol_lr_test_score', 'plsr_train_score', 'plsr_test_score'], cmap ='coolwarm', vmin=-1, vmax=1, axis=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,lr_train_score,lr_test_score,pol_lr_train_score,pol_lr_test_score,plsr_train_score,plsr_test_score
dataset_name,data_type_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
not-sampled_no-ref-window,get_by_feature_score,0.92764,0.764802,1.0,-13.398087,0.901254,0.841816
not-sampled_no-ref-window,get_thresh_poly,0.885123,0.84437,1.0,-5.734349,0.869359,0.853034
not-sampled_no-ref-window,get_weighted_poly,0.927645,0.762805,1.0,-104.622892,0.901254,0.841816
not-sampled_no-ref-window,get_weighted_thresh_poly,0.369276,0.112038,0.986066,-27035.432059,0.30521,0.07817
not-sampled_ref-window-1k,get_by_feature_score,0.875334,0.818372,1.0,-2.057261,0.83624,0.781687
not-sampled_ref-window-1k,get_thresh_poly,0.780881,0.692513,1.0,-20.821386,0.774923,0.704585
not-sampled_ref-window-1k,get_weighted_poly,0.875357,0.818563,1.0,-4.99394,0.83624,0.781687
not-sampled_ref-window-1k,get_weighted_thresh_poly,0.267007,0.126386,0.957952,-100.187317,0.253336,0.150204
sampled_no-ref-window,get_by_feature_score,0.838078,0.679031,1.0,-20.303241,0.802084,0.590888
sampled_no-ref-window,get_thresh_poly,0.819654,0.753664,1.0,-3.406349,0.792692,0.745368


In [426]:
one_drift_res[one_drift_res.stat_test == 'wasserstein']

Unnamed: 0,stat_test,lr_train_score,lr_test_score,pol_lr_train_score,pol_lr_test_score,lr_path,pol_lr_path,trans_path,dataset_name,data_type_name
2,wasserstein,0.688865,0.731751,0.801007,0.747313,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_by_feature_score
2,wasserstein,0.688865,0.731751,0.801007,0.747313,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_weighted_poly
2,wasserstein,0.708998,0.749779,0.811042,0.758648,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_thresh_poly
2,wasserstein,0.241575,0.057985,0.29854,0.057783,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_no-ref-window,get_weighted_thresh_poly
2,wasserstein,0.399368,0.296425,0.457889,-0.101424,data/generated/models/one_drift_1/sampled_no-r...,data/generated/models/one_drift_1/sampled_no-r...,data/generated/models/one_drift_1/sampled_no-r...,sampled_no-ref-window,get_by_feature_score
2,wasserstein,0.399368,0.296425,0.457889,-0.101424,data/generated/models/one_drift_1/sampled_no-r...,data/generated/models/one_drift_1/sampled_no-r...,data/generated/models/one_drift_1/sampled_no-r...,sampled_no-ref-window,get_weighted_poly
2,wasserstein,0.399207,0.293871,0.45324,-0.060839,data/generated/models/one_drift_1/sampled_no-r...,data/generated/models/one_drift_1/sampled_no-r...,data/generated/models/one_drift_1/sampled_no-r...,sampled_no-ref-window,get_thresh_poly
2,wasserstein,0.326533,0.215143,0.379458,-0.128695,data/generated/models/one_drift_1/sampled_no-r...,data/generated/models/one_drift_1/sampled_no-r...,data/generated/models/one_drift_1/sampled_no-r...,sampled_no-ref-window,get_weighted_thresh_poly
2,wasserstein,0.598487,0.573707,0.692916,0.59504,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_ref-window-1k,get_by_feature_score
2,wasserstein,0.598487,0.573707,0.692916,0.59504,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,data/generated/models/one_drift_1/not-sampled_...,not-sampled_ref-window-1k,get_weighted_poly


In [172]:
df_to_train = orig_df[['cell_id']].copy()

df_to_train[stat_tests] = orig_df[stat_tests].apply(lambda x: list(map(get_by_feature_score, x)))
df_to_train

Unnamed: 0,cell_id,default,ks,wasserstein,kl_div,psi,jensenshannon,cramer_von_mises,hellinger,ed,t_test
0,25771,"{'HR Usage Rate': 1.1976287701332842, 'Number ...","{'HR Usage Rate': 7.591622950770644e-104, 'Num...","{'HR Usage Rate': 1.1976287701332842, 'Number ...","{'HR Usage Rate': 3.7062426735260177, 'Number ...","{'HR Usage Rate': 4.86724178342445, 'Number of...","{'HR Usage Rate': 0.575628726830956, 'Number o...","{'HR Usage Rate': 4.905048662462441e-06, 'Numb...","{'HR Usage Rate': 0.6574633087573316, 'Number ...","{'HR Usage Rate': 6.065705893467136, 'Number o...","{'HR Usage Rate': 1.0419153009893417e-68, 'Num..."
1,22944,"{'HR Usage Rate': 1.2979277703511152, 'Number ...","{'HR Usage Rate': 1.1334687345106701e-135, 'Nu...","{'HR Usage Rate': 1.2979277703511152, 'Number ...","{'HR Usage Rate': 4.163990459381986, 'Number o...","{'HR Usage Rate': 5.622165134851706, 'Number o...","{'HR Usage Rate': 0.6279968022047079, 'Number ...","{'HR Usage Rate': 4.9088264401087045e-06, 'Num...","{'HR Usage Rate': 0.7136675083131901, 'Number ...","{'HR Usage Rate': 6.630028523223908, 'Number o...","{'HR Usage Rate': 2.2704136824497126e-80, 'Num..."
2,26335,"{'HR Usage Rate': 0.9071320123769095, 'Number ...","{'HR Usage Rate': 1.2653943129504837e-64, 'Num...","{'HR Usage Rate': 0.9071320123769095, 'Number ...","{'HR Usage Rate': 2.780294497033895, 'Number o...","{'HR Usage Rate': 4.074188776713164, 'Number o...","{'HR Usage Rate': 0.5595246271064998, 'Number ...","{'HR Usage Rate': 5.0004824249105795e-06, 'Num...","{'HR Usage Rate': 0.6009453204497797, 'Number ...","{'HR Usage Rate': 4.72124461485425, 'Number of...","{'HR Usage Rate': 1.8510901624030256e-36, 'Num..."
3,26332,"{'HR Usage Rate': 0.5379044436322102, 'Number ...","{'HR Usage Rate': 6.081317437220293e-21, 'Numb...","{'HR Usage Rate': 0.5379044436322102, 'Number ...","{'HR Usage Rate': 2.488766830623767, 'Number o...","{'HR Usage Rate': 3.1950646518384946, 'Number ...","{'HR Usage Rate': 0.44950735434077754, 'Number...","{'HR Usage Rate': 4.997305700693566e-06, 'Numb...","{'HR Usage Rate': 0.5019637836597516, 'Number ...","{'HR Usage Rate': 2.725551669592239, 'Number o...","{'HR Usage Rate': 0.0027064095018628203, 'Numb..."
4,26336,"{'HR Usage Rate': 1.061184487440827, 'Number o...","{'HR Usage Rate': 1.5027311605770971e-80, 'Num...","{'HR Usage Rate': 1.061184487440827, 'Number o...","{'HR Usage Rate': 2.5136201786415784, 'Number ...","{'HR Usage Rate': 3.7042953544828925, 'Number ...","{'HR Usage Rate': 0.5374976455558951, 'Number ...","{'HR Usage Rate': 5.001573155971606e-06, 'Numb...","{'HR Usage Rate': 0.5721575798050974, 'Number ...","{'HR Usage Rate': 5.416631967292511, 'Number o...","{'HR Usage Rate': 1.3959248434144886e-52, 'Num..."
...,...,...,...,...,...,...,...,...,...,...,...
989,5683,"{'HR Usage Rate': 0.9820104306067731, 'Number ...","{'HR Usage Rate': 2.5903946707991135e-76, 'Num...","{'HR Usage Rate': 0.9820104306067731, 'Number ...","{'HR Usage Rate': 4.388459114030215, 'Number o...","{'HR Usage Rate': 7.161338658841928, 'Number o...","{'HR Usage Rate': 0.7334613985642259, 'Number ...","{'HR Usage Rate': 5.669057461799021e-06, 'Numb...","{'HR Usage Rate': 0.7983310936310104, 'Number ...","{'HR Usage Rate': 5.008225431708757, 'Number o...","{'HR Usage Rate': 3.2955950015159177e-19, 'Num..."
990,5682,"{'HR Usage Rate': 0.8410234443432856, 'Number ...","{'HR Usage Rate': 1.874644384303983e-40, 'Numb...","{'HR Usage Rate': 0.8410234443432856, 'Number ...","{'HR Usage Rate': 5.097381368509053, 'Number o...","{'HR Usage Rate': 8.000810785035734, 'Number o...","{'HR Usage Rate': 0.7427547383910575, 'Number ...","{'HR Usage Rate': 5.667761503902469e-06, 'Numb...","{'HR Usage Rate': 0.7951676130364934, 'Number ...","{'HR Usage Rate': 4.129336781319746, 'Number o...","{'HR Usage Rate': 0.3640913158645145, 'Number ..."
991,12772,"{'HR Usage Rate': 1.2466948151441566, 'Number ...","{'HR Usage Rate': 8.341284159686772e-69, 'Numb...","{'HR Usage Rate': 1.2466948151441566, 'Number ...","{'HR Usage Rate': 4.079525407130097, 'Number o...","{'HR Usage Rate': 5.433990672069045, 'Number o...","{'HR Usage Rate': 0.6118123952959035, 'Number ...","{'HR Usage Rate': 6.790870074935285e-06, 'Numb...","{'HR Usage Rate': 0.6967151592616228, 'Number ...","{'HR Usage Rate': 6.38616077893821, 'Number of...","{'HR Usage Rate': 3.318201383246856e-42, 'Numb..."
992,12771,"{'HR Usage Rate': 0.7424200573217139, 'Number ...","{'HR Usage Rate': 2.028429063251877e-23, 'Numb...","{'HR Usage Rate': 0.7424200573217139, 'Number ...","{'HR Usage Rate': 3.194504550163467, 'Number o...","{'HR Usage Rate': 4.167343653707366, 'Number o...","{'HR Usage Rate': 0.5197651862025531, 'Number ...","{'HR Usage Rate': 6.788794320811142e-06, 'Numb...","{'HR Usage Rate': 0.5896442067849711, 'Number ...","{'HR Usage Rate': 3.7923316469803607, 'Number ...","{'HR Usage Rate': 5.156540359019711e-09, 'Numb..."


In [26]:
pd.DataFrame(df_to_train['default'].tolist(), index=df_to_train['cell_id'])

Unnamed: 0_level_0,HR Usage Rate,Number of Available\nTCH,Param 1,Param 2,"TCH Blocking Rate, BH","TCH Traffic (Erl), BH"
cell_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
25771,1.197629,0.784173,1.050900,0.926529,0.080808,0.584263
22944,1.297928,2.191368,1.613654,1.655267,0.082576,1.672209
26335,0.907132,0.785231,0.869361,1.265191,0.106585,0.566432
26332,0.537904,0.786818,1.401794,1.425661,0.117485,0.561885
26336,1.061184,1.181181,0.936464,1.021152,0.071666,0.811645
...,...,...,...,...,...,...
5683,0.982010,0.773861,0.785553,0.863654,0.077324,0.661153
5682,0.841023,0.779998,0.747635,0.817129,0.079045,0.649416
12772,1.246695,0.786818,1.030905,0.940693,0.165583,1.135882
12771,0.742420,1.032760,0.889983,1.003829,0.077832,0.680220


In [181]:

tmp_res = pd.DataFrame(index=df_to_train['cell_id'])

for st in stat_tests:
    tmp = pd.DataFrame(df_to_train[st].tolist(), index=df_to_train['cell_id'])
    tmp_res = pd.merge(tmp_res, tmp, left_on='cell_id', right_on='cell_id', how='inner', suffixes=('', '_' + st))

tmp_res

Unnamed: 0_level_0,HR Usage Rate,Number of Available\nTCH,Param 1,Param 2,"TCH Blocking Rate, BH","TCH Traffic (Erl), BH",HR Usage Rate_ks,Number of Available\nTCH_ks,Param 1_ks,Param 2_ks,...,Param 1_ed,Param 2_ed,"TCH Blocking Rate, BH_ed","TCH Traffic (Erl), BH_ed",HR Usage Rate_t_test,Number of Available\nTCH_t_test,Param 1_t_test,Param 2_t_test,"TCH Blocking Rate, BH_t_test","TCH Traffic (Erl), BH_t_test"
cell_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
25771,1.197629,0.784173,1.050900,0.926529,0.080808,0.584263,7.591623e-104,1.879251e-41,3.556443e-121,2.629422e-100,...,5.010297,4.445901,0.079335,1.360775,1.041915e-68,5.148241e-05,9.564843e-14,2.781442e-04,0.492073,2.322765e-06
22944,1.297928,2.191368,1.613654,1.655267,0.082576,1.672209,1.133469e-135,7.362552e-253,8.256022e-156,5.855831e-174,...,7.224778,7.620695,0.086742,3.940322,2.270414e-80,8.993711e-220,1.550624e-107,7.918434e-124,0.227054,7.029083e-128
26335,0.907132,0.785231,0.869361,1.265191,0.106585,0.566432,1.265394e-64,1.879251e-41,1.559945e-101,8.308986e-132,...,4.045081,6.129369,0.265323,1.309287,1.851090e-36,5.499490e-05,1.706601e-01,8.111307e-46,0.663555,1.918995e-08
26332,0.537904,0.786818,1.401794,1.425661,0.117485,0.561885,6.081317e-21,1.879251e-41,0.000000e+00,0.000000e+00,...,6.480524,6.782690,0.214598,1.293051,2.706410e-03,6.069208e-05,1.988346e-93,1.499059e-96,0.130952,3.308649e-06
26336,1.061184,1.181181,0.936464,1.021152,0.071666,0.811645,1.502731e-80,2.742067e-140,1.217028e-108,8.522726e-112,...,4.426446,4.973958,0.050677,2.145887,1.395925e-52,7.211670e-55,1.442323e-04,3.078707e-12,0.349701,1.347800e-31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5683,0.982010,0.773861,0.785553,0.863654,0.077324,0.661153,2.590395e-76,2.850837e-32,1.920628e-68,7.116561e-70,...,3.574599,4.120631,0.068662,1.597084,3.295595e-19,2.166344e-04,6.097183e-20,2.267497e-22,0.319129,5.772220e-06
5682,0.841023,0.779998,0.747635,0.817129,0.079045,0.649416,1.874644e-40,2.850837e-32,8.084292e-59,4.233801e-60,...,3.334335,3.834367,0.075792,1.583515,3.640913e-01,2.949286e-04,9.769606e-17,1.995662e-18,0.308480,3.903197e-02
12772,1.246695,0.786818,1.030905,0.940693,0.165583,1.135882,8.341284e-69,1.792384e-23,1.197157e-65,1.004326e-56,...,4.913287,4.528865,0.405727,3.039756,3.318201e-42,2.674240e-03,3.033114e-07,1.697265e-03,0.270951,8.943454e-27
12771,0.742420,1.032760,0.889983,1.003829,0.077832,0.680220,2.028429e-23,3.737322e-38,1.377859e-83,3.128631e-84,...,4.166003,4.881559,0.076101,1.713322,5.156540e-09,2.310881e-21,3.729645e-21,2.559426e-26,0.493979,1.406861e-05


## Test on linear regression

In [157]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.cross_decomposition import PLSRegression
from sklearn.model_selection import train_test_split
from joblib import dump, load
import os.path as osp
import numpy as np

In [100]:
save_path = '/home/rid/Projects/Study/Magister/Dyploma/Bachelor/data/generated/models'

In [322]:
df_default = pd.DataFrame(df_to_train['default'].tolist(), index=df_to_train['cell_id'])
df_default = df_default.merge(rewards[['cell_id', 'quality_avg']], left_on='cell_id', right_on='cell_id', how='inner')
df_default

Unnamed: 0,cell_id,HR Usage Rate,Number of Available\nTCH,Param 1,Param 2,"TCH Blocking Rate, BH","TCH Traffic (Erl), BH",quality_avg
0,25771,1.197629,0.784173,1.050900,0.926529,0.080808,0.584263,0.986406
1,22944,1.297928,2.191368,1.613654,1.655267,0.082576,1.672209,0.983747
2,26335,0.907132,0.785231,0.869361,1.265191,0.106585,0.566432,0.986885
3,26332,0.537904,0.786818,1.401794,1.425661,0.117485,0.561885,0.978928
4,26336,1.061184,1.181181,0.936464,1.021152,0.071666,0.811645,0.989507
...,...,...,...,...,...,...,...,...
989,5683,0.982010,0.773861,0.785553,0.863654,0.077324,0.661153,0.992266
990,5682,0.841023,0.779998,0.747635,0.817129,0.079045,0.649416,0.991784
991,12772,1.246695,0.786818,1.030905,0.940693,0.165583,1.135882,0.968540
992,12771,0.742420,1.032760,0.889983,1.003829,0.077832,0.680220,0.990560


In [323]:
features = [f for f in df_default.columns if f not in ['cell_id', 'quality_avg']]
label = ['quality_avg']

In [327]:
X_train, X_test, y_train, y_test = train_test_split(df_default[features], df_default[label], test_size=0.3, random_state=3407)

In [328]:
X_train

Unnamed: 0,HR Usage Rate,Number of Available\nTCH,Param 1,Param 2,"TCH Blocking Rate, BH","TCH Traffic (Erl), BH"
206,0.665482,0.786289,1.236062,1.463641,0.203430,0.453889
338,0.732590,0.783644,0.786180,0.940693,0.068827,0.629708
818,1.064911,1.247671,0.956013,1.029450,0.067231,0.873499
938,1.282694,1.247671,1.132325,0.988348,0.088855,0.810791
635,0.382260,0.885769,0.991343,0.836248,0.081716,0.703709
...,...,...,...,...,...,...
775,0.578513,0.785760,0.889983,0.980719,0.089016,0.646282
701,1.059833,0.786818,0.955199,0.859881,0.073752,0.468377
467,1.040334,1.247671,1.360279,1.425661,0.099327,0.862635
580,0.469054,0.786818,1.132325,1.481656,0.082576,0.706588


In [329]:
y_test

Unnamed: 0,quality_avg
462,0.991632
105,0.987623
943,0.991124
19,0.991694
108,0.986945
...,...
877,0.984887
99,0.993430
855,0.991447
153,0.990276


## LinearRegression

In [330]:
model_lr = LinearRegression()

In [331]:
model_lr.fit(X_train, y_train)

In [332]:
model_lr.score(X_train, y_train)

0.6888653603106805

In [333]:
model_lr.score(X_test, y_test)

0.7317514603810608

In [334]:
model_lr.coef_

array([[-0.00355263,  0.00028745, -0.00753265,  0.00474244, -0.04550634,
        -0.00373596]])

In [335]:
model_lr.intercept_

array([0.99908513])

In [101]:
# dump(model_lr, osp.join(save_path, 'model_lr_69.joblib'))

['/home/rid/Projects/Study/Magister/Dyploma/Bachelor/data/generated/models/model_lr_69.joblib']

## PolynomialFeatures

In [336]:
transformer = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)

In [337]:
X_train_polynom = transformer.fit_transform(X_train)
X_test_polynom = transformer.transform(X_test)

In [338]:
model_pol_lr = LinearRegression().fit(X_train_polynom, y_train)

In [339]:
model_pol_lr.score(X_train_polynom, y_train)

0.8010072279874878

In [340]:
model_pol_lr.score(X_test_polynom, y_test)

0.7473130977180884

In [341]:
model_pol_lr.coef_

array([[-5.72020771e-03, -1.00976797e-02,  4.22664768e-03,
        -9.02314972e-03, -2.88252770e-02,  1.38153514e-02,
        -6.48722431e-03,  4.89782278e-05,  2.71649762e-04,
        -1.66052245e-02,  1.47399716e-02,  1.02729892e-02,
        -2.95982657e-03,  7.21571801e-02, -1.39754437e-03,
         4.01924739e-03, -4.41641427e-02, -2.48503882e-02,
         2.35633757e-02,  1.00685403e-02, -8.38560803e-02]])

In [342]:
model_lr.intercept_

array([0.99908513])

In [102]:
# dump(model_pol_lr, osp.join(save_path, 'model_pol_lr_76.joblib'))

['/home/rid/Projects/Study/Magister/Dyploma/Bachelor/data/generated/models/model_pol_lr_76.joblib']

## PLSRegression

In [343]:
from sklearn.cross_decomposition import PLSRegression

In [344]:
reg = PLSRegression(n_components=6)

In [348]:
reg.fit(X_train_polynom, y_train.to_numpy().ravel())

In [350]:
reg.score(X_train_polynom, y_train.to_numpy().ravel())

0.7641170403189254

In [352]:
reg.score(X_test_polynom, y_test.to_numpy().ravel())

0.7316923927067003

In [547]:
pd.DataFrame(reg.coef_.T).sort_values(by=0).style.background_gradient(cmap ='coolwarm', axis=0)

Unnamed: 0,0
20,-0.004755
16,-0.004141
9,-0.003143
18,-0.002062
17,-0.001474
0,-0.001396
11,-0.00075
14,-0.000712
2,-0.000675
1,-0.000629


In [543]:
np.argpartition(np.abs(list(reg.coef_)), -6)[-6:]

array([[14, 10,  2, 19,  4,  5,  6,  7,  8, 15,  1, 11, 12,  0, 17,  3,
        18,  9, 16, 13, 20]])

## Model's weight visualise

In [550]:
fs = ['HR Usage Rate', 'Number of Available\nTCH', 'Param 1', 'Param 2', 'TCH Blocking Rate, BH', 'TCH Traffic (Erl), BH', 'HR Usage Rate_ks', 'Number of Available\nTCH_ks', 'Param 1_ks', 'Param 2_ks', 'TCH Blocking Rate, BH_ks', 'TCH Traffic (Erl), BH_ks', 'HR Usage Rate_wasserstein', 'Number of Available\nTCH_wasserstein', 'Param 1_wasserstein', 'Param 2_wasserstein', 'TCH Blocking Rate, BH_wasserstein', 'TCH Traffic (Erl), BH_wasserstein', 'HR Usage Rate_kl_div', 'Number of Available\nTCH_kl_div', 'Param 1_kl_div', 'Param 2_kl_div', 'TCH Blocking Rate, BH_kl_div', 'TCH Traffic (Erl), BH_kl_div', 'HR Usage Rate_psi', 'Number of Available\nTCH_psi', 'Param 1_psi', 'Param 2_psi', 'TCH Blocking Rate, BH_psi', 'TCH Traffic (Erl), BH_psi', 'HR Usage Rate_jensenshannon', 'Number of Available\nTCH_jensenshannon', 'Param 1_jensenshannon', 'Param 2_jensenshannon', 'TCH Blocking Rate, BH_jensenshannon', 'TCH Traffic (Erl), BH_jensenshannon', 'HR Usage Rate_cramer_von_mises', 'Number of Available\nTCH_cramer_von_mises', 'Param 1_cramer_von_mises', 'Param 2_cramer_von_mises', 'TCH Blocking Rate, BH_cramer_von_mises', 'TCH Traffic (Erl), BH_cramer_von_mises', 'HR Usage Rate_hellinger', 'Number of Available\nTCH_hellinger', 'Param 1_hellinger', 'Param 2_hellinger', 'TCH Blocking Rate, BH_hellinger', 'TCH Traffic (Erl), BH_hellinger', 'HR Usage Rate_ed', 'Number of Available\nTCH_ed', 'Param 1_ed', 'Param 2_ed', 'TCH Blocking Rate, BH_ed', 'TCH Traffic (Erl), BH_ed', 'HR Usage Rate_t_test', 'Number of Available\nTCH_t_test', 'Param 1_t_test', 'Param 2_t_test', 'TCH Blocking Rate, BH_t_test', 'TCH Traffic (Erl), BH_t_test']

In [634]:
def coefs_viz(paths: pd.Series, m_cols: List[str]) -> pd.DataFrame:
    c_df = pd.DataFrame()

    for p in paths:
        m = load(p)
        if len(m.coef_) != 1:
            m.coef_ = m.coef_.reshape(1, -1)

        c_df = pd.concat([c_df, pd.DataFrame(m.coef_, columns=m_cols, index=[p])],)

    return c_df

### PLSR

In [635]:
plsr_res = coefs_viz(poly_drift_res.plsr_path, fs)
plsr_res

Unnamed: 0,HR Usage Rate,Number of Available\nTCH,Param 1,Param 2,"TCH Blocking Rate, BH","TCH Traffic (Erl), BH",HR Usage Rate_ks,Number of Available\nTCH_ks,Param 1_ks,Param 2_ks,...,Param 1_ed,Param 2_ed,"TCH Blocking Rate, BH_ed","TCH Traffic (Erl), BH_ed",HR Usage Rate_t_test,Number of Available\nTCH_t_test,Param 1_t_test,Param 2_t_test,"TCH Blocking Rate, BH_t_test","TCH Traffic (Erl), BH_t_test"
data/generated/models/poly_drift_1/not-sampled_no-ref-window/get_by_feature_score/plsr_all_stats.joblib,0.000116,-0.000222,-0.000328,0.000192,-0.000928,-0.000379,6.9e-05,1.7e-05,-0.000135,-0.000135,...,-5.4e-05,0.000304,-0.005942,-0.000361,-7.2e-05,-9.4e-05,-0.000147,-7.5e-05,-0.000466,-2.6e-05
data/generated/models/poly_drift_1/not-sampled_no-ref-window/get_weighted_poly/plsr_all_stats.joblib,0.000116,-0.000222,-0.000328,0.000192,-0.000928,-0.000379,6.9e-05,1.7e-05,-0.000135,-0.000135,...,-5.4e-05,0.000304,-0.005942,-0.000361,-7.2e-05,-9.4e-05,-0.000147,-7.5e-05,-0.000466,-2.6e-05
data/generated/models/poly_drift_1/not-sampled_no-ref-window/get_thresh_poly/plsr_all_stats.joblib,5.6e-05,-0.000229,-0.000213,0.000207,-0.002123,-0.000295,0.0,0.0,0.0,0.0,...,-4.5e-05,0.000242,-0.005457,-0.00039,-8.4e-05,-6.3e-05,-1.2e-05,-0.000186,0.00046,1.2e-05
data/generated/models/poly_drift_1/not-sampled_no-ref-window/get_weighted_thresh_poly/plsr_all_stats.joblib,0.000535,0.000111,-0.001272,0.000625,-0.002206,-0.000725,0.0,0.0,0.0,0.0,...,-0.000295,0.001041,0.0,-0.00053,-0.000314,-0.00013,-0.000149,6.9e-05,3.2e-05,-0.000285
data/generated/models/poly_drift_1/sampled_no-ref-window/get_by_feature_score/plsr_all_stats.joblib,-0.000198,-3.4e-05,-1.5e-05,-7e-06,0.000216,-3.1e-05,-0.000198,-3.4e-05,-1.5e-05,-7e-06,...,-0.000286,5.9e-05,-0.007713,-0.000657,-0.00021,-4.4e-05,7.4e-05,-0.000133,-0.001266,-0.000219
data/generated/models/poly_drift_1/sampled_no-ref-window/get_weighted_poly/plsr_all_stats.joblib,-0.000198,-3.4e-05,-1.5e-05,-7e-06,0.000216,-3.1e-05,-0.000198,-3.4e-05,-1.5e-05,-7e-06,...,-0.000286,5.9e-05,-0.007713,-0.000657,-0.00021,-4.4e-05,7.4e-05,-0.000133,-0.001266,-0.000219
data/generated/models/poly_drift_1/sampled_no-ref-window/get_thresh_poly/plsr_all_stats.joblib,0.0,0.0,0.0,0.0,0.000265,-0.000132,0.0,0.0,0.0,0.0,...,-0.000209,0.000207,-0.007569,-0.000728,-0.000104,-0.000163,0.000141,-4.7e-05,-0.001201,-0.000199
data/generated/models/poly_drift_1/sampled_no-ref-window/get_weighted_thresh_poly/plsr_all_stats.joblib,0.0,0.0,0.0,0.0,0.000232,0.0,0.0,0.0,0.0,0.0,...,-0.000608,0.000663,0.0,-0.000489,-0.000774,1.4e-05,-0.000171,-0.000488,7.1e-05,-0.000533
data/generated/models/poly_drift_1/not-sampled_ref-window-1k/get_by_feature_score/plsr_all_stats.joblib,-0.00013,-2.4e-05,0.000146,-6.2e-05,0.000495,-0.000219,-0.00013,-2.4e-05,0.000146,-6.2e-05,...,-0.000103,0.000458,-0.00575,-2.8e-05,-0.000142,0.000252,-7.9e-05,-0.000163,-0.000604,0.00027
data/generated/models/poly_drift_1/not-sampled_ref-window-1k/get_weighted_poly/plsr_all_stats.joblib,-0.00013,-2.4e-05,0.000146,-6.2e-05,0.000495,-0.000219,-0.00013,-2.4e-05,0.000146,-6.2e-05,...,-0.000103,0.000458,-0.00575,-2.8e-05,-0.000142,0.000252,-7.9e-05,-0.000163,-0.000604,0.00027


In [569]:
plsr_res.reset_index(drop=True).style.background_gradient(cmap ='coolwarm', axis=1)

Unnamed: 0,HR Usage Rate,Number of Available TCH,Param 1,Param 2,"TCH Blocking Rate, BH","TCH Traffic (Erl), BH",HR Usage Rate_ks,Number of Available TCH_ks,Param 1_ks,Param 2_ks,"TCH Blocking Rate, BH_ks","TCH Traffic (Erl), BH_ks",HR Usage Rate_wasserstein,Number of Available TCH_wasserstein,Param 1_wasserstein,Param 2_wasserstein,"TCH Blocking Rate, BH_wasserstein","TCH Traffic (Erl), BH_wasserstein",HR Usage Rate_kl_div,Number of Available TCH_kl_div,Param 1_kl_div,Param 2_kl_div,"TCH Blocking Rate, BH_kl_div","TCH Traffic (Erl), BH_kl_div",HR Usage Rate_psi,Number of Available TCH_psi,Param 1_psi,Param 2_psi,"TCH Blocking Rate, BH_psi","TCH Traffic (Erl), BH_psi",HR Usage Rate_jensenshannon,Number of Available TCH_jensenshannon,Param 1_jensenshannon,Param 2_jensenshannon,"TCH Blocking Rate, BH_jensenshannon","TCH Traffic (Erl), BH_jensenshannon",HR Usage Rate_cramer_von_mises,Number of Available TCH_cramer_von_mises,Param 1_cramer_von_mises,Param 2_cramer_von_mises,"TCH Blocking Rate, BH_cramer_von_mises","TCH Traffic (Erl), BH_cramer_von_mises",HR Usage Rate_hellinger,Number of Available TCH_hellinger,Param 1_hellinger,Param 2_hellinger,"TCH Blocking Rate, BH_hellinger","TCH Traffic (Erl), BH_hellinger",HR Usage Rate_ed,Number of Available TCH_ed,Param 1_ed,Param 2_ed,"TCH Blocking Rate, BH_ed","TCH Traffic (Erl), BH_ed",HR Usage Rate_t_test,Number of Available TCH_t_test,Param 1_t_test,Param 2_t_test,"TCH Blocking Rate, BH_t_test","TCH Traffic (Erl), BH_t_test"
0,0.000116,-0.000222,-0.000328,0.000192,-0.000928,-0.000379,6.9e-05,1.7e-05,-0.000135,-0.000135,-0.000617,-0.000141,0.000116,-0.000222,-0.000328,0.000192,-0.000928,-0.000379,0.000181,4.7e-05,-3.1e-05,-6.3e-05,-0.000885,0.000381,-3.6e-05,0.00012,-7.3e-05,-2.3e-05,-0.000644,7.3e-05,-0.000566,0.000264,-3.7e-05,2.6e-05,-0.001815,-0.000135,-9.8e-05,2e-05,-2.5e-05,3.2e-05,8e-06,0.000226,-0.000411,-0.000149,0.000289,-0.0005,-0.000277,-0.000208,3e-06,-0.000469,-5.4e-05,0.000304,-0.005942,-0.000361,-7.2e-05,-9.4e-05,-0.000147,-7.5e-05,-0.000466,-2.6e-05
1,0.000116,-0.000222,-0.000328,0.000192,-0.000928,-0.000379,6.9e-05,1.7e-05,-0.000135,-0.000135,-0.000617,-0.000141,0.000116,-0.000222,-0.000328,0.000192,-0.000928,-0.000379,0.000181,4.7e-05,-3.1e-05,-6.3e-05,-0.000885,0.000381,-3.6e-05,0.00012,-7.3e-05,-2.3e-05,-0.000644,7.3e-05,-0.000566,0.000264,-3.7e-05,2.6e-05,-0.001815,-0.000135,-9.8e-05,2e-05,-2.5e-05,3.2e-05,8e-06,0.000226,-0.000411,-0.000149,0.000289,-0.0005,-0.000277,-0.000208,3e-06,-0.000469,-5.4e-05,0.000304,-0.005942,-0.000361,-7.2e-05,-9.4e-05,-0.000147,-7.5e-05,-0.000466,-2.6e-05
2,5.6e-05,-0.000229,-0.000213,0.000207,-0.002123,-0.000295,0.0,0.0,0.0,0.0,-0.000668,0.0,5.6e-05,-0.000229,-0.000213,0.000207,-0.002123,-0.000295,0.000266,0.00025,1.1e-05,-9e-06,-0.001118,0.000465,-2.5e-05,0.000219,-7.3e-05,-2e-06,-7.4e-05,0.000157,-0.000786,-0.000125,-0.000216,-1.8e-05,-0.000212,-4.8e-05,0.0,0.0,0.0,0.0,0.0,0.0,-0.00038,-0.000205,0.000327,-0.000355,8e-06,-0.000136,3.1e-05,-0.000473,-4.5e-05,0.000242,-0.005457,-0.00039,-8.4e-05,-6.3e-05,-1.2e-05,-0.000186,0.00046,1.2e-05
3,0.000535,0.000111,-0.001272,0.000625,-0.002206,-0.000725,0.0,0.0,0.0,0.0,0.000772,0.0,0.000535,0.000111,-0.001272,0.000625,-0.002206,-0.000725,4.6e-05,2.7e-05,0.000127,2e-05,0.0,0.000862,-0.000709,0.000202,-0.000519,-0.000472,0.0,-0.000333,-0.001841,0.000576,-0.000516,-0.000421,0.0,0.000198,0.0,0.0,0.0,0.0,0.0,0.0,-0.001273,0.000534,0.001062,-0.000281,0.0,0.00055,0.000417,-0.000531,-0.000295,0.001041,0.0,-0.00053,-0.000314,-0.00013,-0.000149,6.9e-05,3.2e-05,-0.000285
4,-0.000198,-3.4e-05,-1.5e-05,-7e-06,0.000216,-3.1e-05,-0.000198,-3.4e-05,-1.5e-05,-7e-06,0.000216,-3.1e-05,5.4e-05,-0.000432,-0.000441,4.3e-05,-0.003035,-0.00041,-9.2e-05,0.000269,6.9e-05,4e-05,0.001114,0.000592,-0.000126,0.000149,0.00012,9.3e-05,0.000236,0.00013,-0.000195,0.000336,8.3e-05,-1.9e-05,0.000274,-0.0002,0.000366,-6.2e-05,-0.000142,0.000555,-0.001206,-1.1e-05,-0.000255,0.000247,1e-05,-0.000236,-0.001231,0.000176,-4.2e-05,-0.000901,-0.000286,5.9e-05,-0.007713,-0.000657,-0.00021,-4.4e-05,7.4e-05,-0.000133,-0.001266,-0.000219
5,-0.000198,-3.4e-05,-1.5e-05,-7e-06,0.000216,-3.1e-05,-0.000198,-3.4e-05,-1.5e-05,-7e-06,0.000216,-3.1e-05,5.4e-05,-0.000432,-0.000441,4.3e-05,-0.003035,-0.00041,-9.2e-05,0.000269,6.9e-05,4e-05,0.001114,0.000592,-0.000126,0.000149,0.00012,9.3e-05,0.000236,0.00013,-0.000195,0.000336,8.3e-05,-1.9e-05,0.000274,-0.0002,0.000366,-6.2e-05,-0.000142,0.000555,-0.001206,-1.1e-05,-0.000255,0.000247,1e-05,-0.000236,-0.001231,0.000176,-4.2e-05,-0.000901,-0.000286,5.9e-05,-0.007713,-0.000657,-0.00021,-4.4e-05,7.4e-05,-0.000133,-0.001266,-0.000219
6,0.0,0.0,0.0,0.0,0.000265,-0.000132,0.0,0.0,0.0,0.0,0.000265,-0.000132,-1.7e-05,-0.000358,-0.000444,7.3e-05,-0.003001,-0.000306,-8.9e-05,0.0003,0.000172,-1.5e-05,0.00112,0.000621,-0.000166,0.000155,0.000286,3.8e-05,0.000271,0.000154,9.3e-05,0.000192,-0.000263,2e-06,4.4e-05,-0.000563,-9.2e-05,0.0,0.0,0.0,-0.001221,0.0,-0.000168,0.000149,-0.000204,-5.9e-05,-0.001363,0.000221,-9.4e-05,-0.000962,-0.000209,0.000207,-0.007569,-0.000728,-0.000104,-0.000163,0.000141,-4.7e-05,-0.001201,-0.000199
7,0.0,0.0,0.0,0.0,0.000232,0.0,0.0,0.0,0.0,0.0,0.000232,0.0,0.000446,-0.000369,-0.001222,8.7e-05,-0.004555,-0.000466,-0.000655,0.00016,8e-05,0.000114,0.001077,0.000966,-0.000824,0.000183,-4.6e-05,0.000118,-0.003299,0.000139,4.5e-05,0.00074,0.000267,0.000184,0.0,-0.000432,-1.8e-05,0.0,0.0,0.0,0.0,0.0,-0.000776,0.000656,-0.000341,-0.000597,0.0,0.000192,0.000223,-0.001356,-0.000608,0.000663,0.0,-0.000489,-0.000774,1.4e-05,-0.000171,-0.000488,7.1e-05,-0.000533
8,-0.00013,-2.4e-05,0.000146,-6.2e-05,0.000495,-0.000219,-0.00013,-2.4e-05,0.000146,-6.2e-05,0.000495,-0.000219,-0.000151,-0.000393,-0.000484,0.000268,-0.003073,0.000154,-0.000165,0.000258,-4.6e-05,-4.8e-05,0.000233,-0.000483,-0.000189,0.000167,-0.000333,5.3e-05,-0.002745,-0.000482,1.7e-05,0.000307,-1.8e-05,-7.9e-05,0.000345,-0.000316,0.000281,-0.000128,2.6e-05,-0.000216,-0.001349,-7.4e-05,0.000102,0.000308,-0.000339,-1.1e-05,0.000541,-0.000366,-0.00026,-0.000559,-0.000103,0.000458,-0.00575,-2.8e-05,-0.000142,0.000252,-7.9e-05,-0.000163,-0.000604,0.00027
9,-0.00013,-2.4e-05,0.000146,-6.2e-05,0.000495,-0.000219,-0.00013,-2.4e-05,0.000146,-6.2e-05,0.000495,-0.000219,-0.000151,-0.000393,-0.000484,0.000268,-0.003073,0.000154,-0.000165,0.000258,-4.6e-05,-4.8e-05,0.000233,-0.000483,-0.000189,0.000167,-0.000333,5.3e-05,-0.002745,-0.000482,1.7e-05,0.000307,-1.8e-05,-7.9e-05,0.000345,-0.000316,0.000281,-0.000128,2.6e-05,-0.000216,-0.001349,-7.4e-05,0.000102,0.000308,-0.000339,-1.1e-05,0.000541,-0.000366,-0.00026,-0.000559,-0.000103,0.000458,-0.00575,-2.8e-05,-0.000142,0.000252,-7.9e-05,-0.000163,-0.000604,0.00027


In [608]:
def get_top(df: pd.DataFrame, n = 6):
    top = pd.DataFrame(np.zeros((len(df.columns))), index=df.columns, columns=[f'number_in_top_{n}'])
    # print(top)
    for i in df.index:
        for j in df.loc[i].abs().sort_values(ascending=False)[:n].index:
            top.loc[j] = top.loc[j] +1

    return top.sort_values(by=top.columns[0], ascending=False)

In [607]:
get_top(plsr_res, 7)

Unnamed: 0,number_in_top_6
"TCH Blocking Rate, BH_wasserstein",14.0
"TCH Blocking Rate, BH_ed",12.0
"TCH Blocking Rate, BH",10.0
"TCH Blocking Rate, BH_ks",9.0
"TCH Blocking Rate, BH_cramer_von_mises",9.0
"TCH Blocking Rate, BH_t_test",8.0
"TCH Blocking Rate, BH_kl_div",7.0
"TCH Blocking Rate, BH_psi",7.0
Number of Available\nTCH_ed,7.0
"TCH Blocking Rate, BH_hellinger",5.0


### LinearR

In [636]:
plr_res = coefs_viz(poly_drift_res.lr_path, fs)
plr_res

Unnamed: 0,HR Usage Rate,Number of Available\nTCH,Param 1,Param 2,"TCH Blocking Rate, BH","TCH Traffic (Erl), BH",HR Usage Rate_ks,Number of Available\nTCH_ks,Param 1_ks,Param 2_ks,...,Param 1_ed,Param 2_ed,"TCH Blocking Rate, BH_ed","TCH Traffic (Erl), BH_ed",HR Usage Rate_t_test,Number of Available\nTCH_t_test,Param 1_t_test,Param 2_t_test,"TCH Blocking Rate, BH_t_test","TCH Traffic (Erl), BH_t_test"
data/generated/models/poly_drift_1/not-sampled_no-ref-window/get_by_feature_score/lr_all_stats.joblib,0.004081048,-75129930.0,34939640.0,-308055200.0,-240166100.0,235975500.0,0.5312881,-19621530000.0,57994650.0,162910300.0,...,0.002753,-0.003372,-0.029959,-0.002563,-0.000791,0.001638,-0.000508,-0.000654,-0.001269,0.00117
data/generated/models/poly_drift_1/not-sampled_no-ref-window/get_weighted_poly/lr_all_stats.joblib,0.004066861,113584000.0,-170909300.0,53281230.0,-324003500.0,-210723600.0,0.536111,-17246280000.0,161414500.0,-62855700.0,...,0.002943,-0.003953,-0.564533,-0.00523,-0.00079,0.001661,-0.000544,-0.000771,-0.024043,0.002404
data/generated/models/poly_drift_1/not-sampled_no-ref-window/get_thresh_poly/lr_all_stats.joblib,3.761002e-05,0.0002232398,-0.0005879206,0.0005656681,-0.0009377695,0.0002216623,2.2039800000000002e-17,6.888411000000001e-17,-5.036119e-17,1.065093e-16,...,0.000288,-0.00023,-0.002917,-0.00026,-3.7e-05,2.9e-05,2e-05,-0.000212,9.6e-05,9e-06
data/generated/models/poly_drift_1/not-sampled_no-ref-window/get_weighted_thresh_poly/lr_all_stats.joblib,0.0002188755,0.0009428642,-0.0008721953,-0.0002719177,-0.03740851,5.833259e-05,-1.784326e-16,1.111714e-16,-6.188084e-17,1.546479e-16,...,0.000121,0.000354,0.0,4.4e-05,-0.00015,-7.6e-05,-0.000133,0.000175,0.000661,0.000308
data/generated/models/poly_drift_1/sampled_no-ref-window/get_by_feature_score/lr_all_stats.joblib,-12770370000.0,120373.9,-0.08980025,0.02238894,0.05174255,-0.6576239,12770370000.0,120373.7,-0.02024469,-0.05093017,...,0.003324,-0.00451,-0.045451,-0.003592,-0.000251,0.001469,-0.000484,-0.001929,-0.00329,3.5e-05
data/generated/models/poly_drift_1/sampled_no-ref-window/get_weighted_poly/lr_all_stats.joblib,-18959670000.0,134368.8,0.1483417,0.1835642,-0.004969478,-1.331792,18959670000.0,134368.9,-0.01910138,-0.1669448,...,0.003555,-0.005286,-0.85648,-0.007331,-0.000249,0.001606,-0.000516,-0.002263,-0.062019,7.5e-05
data/generated/models/poly_drift_1/sampled_no-ref-window/get_thresh_poly/lr_all_stats.joblib,-3.417495e-16,-1.317306e-17,1.350374e-16,-4.510281e-17,-4.791375e-05,-0.003868966,-2.0816680000000002e-17,-3.697129e-17,0.0,-8.337515000000001e-17,...,0.000169,-0.000105,-0.004179,-0.000342,-1.2e-05,1.4e-05,5.4e-05,-8.1e-05,-0.000168,-1.3e-05
data/generated/models/poly_drift_1/sampled_no-ref-window/get_weighted_thresh_poly/lr_all_stats.joblib,4.811229e-15,-8.269752000000001e-17,1.031076e-16,6.772198000000001e-17,0.0005757511,-1.398485e-16,-1.228299e-16,2.775219e-17,-4.9155020000000003e-17,-4.109126e-17,...,-1e-05,0.000289,0.0,-0.000175,-0.000143,0.000237,-2.5e-05,-0.000168,0.001233,-0.000146
data/generated/models/poly_drift_1/not-sampled_ref-window-1k/get_by_feature_score/lr_all_stats.joblib,-0.003386384,786569900.0,2322819000.0,-962069400.0,74499410000.0,129830600.0,-0.003379956,-786569900.0,-2322819000.0,962069400.0,...,0.006744,-0.00436,-0.181844,-0.000843,-0.002434,-4.4e-05,0.001163,-0.00025,-0.001522,0.001461
data/generated/models/poly_drift_1/not-sampled_ref-window-1k/get_weighted_poly/lr_all_stats.joblib,-0.003386384,0.002462098,2.341587,-2.670405,0.01665985,-0.009481051,-0.003386384,0.002462098,2.341587,-2.670405,...,0.007183,-0.005123,-3.426456,-0.001738,-0.00244,-4e-05,0.001232,-0.000292,-0.028307,0.003008


In [638]:
get_top(plr_res, 6)

Unnamed: 0,number_in_top_6
"TCH Blocking Rate, BH",7.0
"TCH Blocking Rate, BH_kl_div",5.0
Param 1,5.0
Param 2,5.0
"TCH Traffic (Erl), BH",5.0
"TCH Blocking Rate, BH_psi",5.0
Number of Available\nTCH_ks,5.0
"TCH Blocking Rate, BH_ed",4.0
Number of Available\nTCH,4.0
"TCH Blocking Rate, BH_wasserstein",4.0


In [641]:
def get_top_weighted(df: pd.DataFrame, n = 6):
    top = pd.DataFrame(np.zeros((len(df.columns))), index=df.columns, columns=[f'number_in_top_{n}'])
    # print(top)
    for i in df.index:
        print(df.loc[i].abs().sort_values(ascending=False)[:n].index)
        for w, j in enumerate(df.loc[i].abs().sort_values(ascending=False)[:n].index):
            top.loc[j] = top.loc[j] + (n - w)

    return top.sort_values(by=top.columns[0], ascending=False)

In [642]:
get_top_weighted(plr_res, 6)

Index(['Number of Available\nTCH_ks', 'Param 2_wasserstein', 'Param 2',
       'TCH Blocking Rate, BH', 'TCH Blocking Rate, BH_wasserstein',
       'TCH Traffic (Erl), BH'],
      dtype='object')
Index(['Number of Available\nTCH_ks', 'TCH Blocking Rate, BH',
       'TCH Blocking Rate, BH_wasserstein',
       'TCH Traffic (Erl), BH_wasserstein', 'TCH Traffic (Erl), BH',
       'Param 1'],
      dtype='object')
Index(['TCH Blocking Rate, BH_kl_div', 'TCH Blocking Rate, BH_jensenshannon',
       'TCH Blocking Rate, BH_ed', 'TCH Blocking Rate, BH_psi',
       'HR Usage Rate_jensenshannon', 'Param 1_jensenshannon'],
      dtype='object')
Index(['TCH Blocking Rate, BH', 'TCH Blocking Rate, BH_wasserstein',
       'Number of Available\nTCH_hellinger', 'Param 1_jensenshannon',
       'TCH Traffic (Erl), BH_jensenshannon', 'TCH Traffic (Erl), BH_kl_div'],
      dtype='object')
Index(['HR Usage Rate', 'HR Usage Rate_ks', 'Number of Available\nTCH',
       'Number of Available\nTCH_ks', 'Param 2_

Unnamed: 0,number_in_top_6
"TCH Blocking Rate, BH_kl_div",30.0
"TCH Blocking Rate, BH",30.0
HR Usage Rate,23.0
HR Usage Rate_ks,21.0
Number of Available\nTCH_ks,21.0
"TCH Blocking Rate, BH_psi",21.0
"TCH Blocking Rate, BH_ed",18.0
"TCH Traffic (Erl), BH",16.0
"TCH Blocking Rate, BH_ks",16.0
"TCH Blocking Rate, BH_wasserstein",15.0


In [643]:
get_top_weighted(plsr_res, 6)

Index(['TCH Blocking Rate, BH_ed', 'TCH Blocking Rate, BH_jensenshannon',
       'TCH Blocking Rate, BH', 'TCH Blocking Rate, BH_wasserstein',
       'TCH Blocking Rate, BH_kl_div', 'TCH Blocking Rate, BH_psi'],
      dtype='object')
Index(['TCH Blocking Rate, BH_ed', 'TCH Blocking Rate, BH_jensenshannon',
       'TCH Blocking Rate, BH', 'TCH Blocking Rate, BH_wasserstein',
       'TCH Blocking Rate, BH_kl_div', 'TCH Blocking Rate, BH_psi'],
      dtype='object')
Index(['TCH Blocking Rate, BH_ed', 'TCH Blocking Rate, BH_wasserstein',
       'TCH Blocking Rate, BH', 'TCH Blocking Rate, BH_kl_div',
       'HR Usage Rate_jensenshannon', 'TCH Blocking Rate, BH_ks'],
      dtype='object')
Index(['TCH Blocking Rate, BH_wasserstein', 'TCH Blocking Rate, BH',
       'HR Usage Rate_jensenshannon', 'HR Usage Rate_hellinger', 'Param 1',
       'Param 1_wasserstein'],
      dtype='object')
Index(['TCH Blocking Rate, BH_ed', 'TCH Blocking Rate, BH_wasserstein',
       'TCH Blocking Rate, BH_t_test'

Unnamed: 0,number_in_top_6
"TCH Blocking Rate, BH_ed",72.0
"TCH Blocking Rate, BH_wasserstein",68.0
"TCH Blocking Rate, BH",34.0
"TCH Blocking Rate, BH_psi",23.0
"TCH Blocking Rate, BH_t_test",22.0
"TCH Blocking Rate, BH_cramer_von_mises",20.0
"TCH Blocking Rate, BH_ks",18.0
"TCH Blocking Rate, BH_kl_div",12.0
"TCH Blocking Rate, BH_jensenshannon",10.0
"TCH Blocking Rate, BH_hellinger",10.0
