# Imports


In [1]:
import numpy as np
import pandas as pd
from darts.models.forecasting.nhits import NHiTSModel
from darts import TimeSeries
import torch
from typing import Callable

from helpers import predict, load_agent, quality, clip
from preprocess import preprocess_stats
from rl.sim_enviroment import SimulatedCustomEnv

from evidently.report import Report
from evidently.metric_preset import DataDriftPreset
from evidently.options import DataDriftOptions

  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()


# Scipts

In [2]:

def optimize_params(data: pd.DataFrame, preprocess: Callable = preprocess_stats) -> pd.DataFrame:
    """
    Run and evaluate agent.

    :param data:        raw observations in pandas DataFrame
    :return:            result saves to the same path as input

    Args:
        preprocess: function to preprocess data
    """
    columns = ['Cell ID', 'LAC', 'HR Usage Rate', 'TCH Blocking Rate, BH', 'Number of Available\nTCH',
               'TCH Traffic (Erl), BH', 'Lower_limit', 'Upper_limit']

    df = preprocess(data, columns)
    obs_array = df.drop(columns=['Cell ID', 'LAC'], errors='ignore')
    obs_array.rename_axis(None, axis=1, inplace=True)
    obs_array.reset_index(drop=True, inplace=True)

    agent = load_agent('sac_last_60_50d_exp-r.pt', 'pt')
    state_predictor = NHiTSModel.load_from_checkpoint("nhits_35lw_2l_1b_3s_35d_no_TB", "state_predictor", best=True)

    # # 'HR Usage Rate', 'TCH Blocking Rate, BH'
    # self.current_state = series[randint(0, len(series))].head(n_past)
    # # 'Number of Available\nTCH', 'TCH Traffic (Erl), BH', 'Param 1',  'Param 2'
    # self.cov = covariates[0].head(n_past)


    lower_limits = []
    upper_limits = []
    qualities = []
    new_states = []

    # 'HR Usage Rate', 'TCH Blocking Rate, BH'
    current_state = obs_array.iloc[:7, :2]
    cov = obs_array.iloc[:7, -4:]

    # print(TimeSeries.from_dataframe(obs_array.iloc[:, :2]))
    # print(len(TimeSeries.from_dataframe(obs_array.iloc[:, :2])))

    # setting env for reward calculation
    # environment = SimulatedCustomEnv(
    #     state_predictor,
    #     np.array([1,1]),
    #     TimeSeries.from_dataframe(obs_array.iloc[:, :2]),
    #     TimeSeries.from_dataframe(obs_array.iloc[:, -4:]),
    #     7
    # )
    # obs = environment.reset()
    # mom_reward = []

    for i, row in enumerate(obs_array.iloc[7:].values):
        # print('Curr_state=', current_state.shape)

        a1, a2 = predict(row, agent)
        lower = clip(int(row[-2] + a1 * 30))
        upper = clip(int(row[-1] + a2 * 30))

        # compure reward
        # new_state, reward, done, info = environment.step(np.array([a1, a2]))
        # mom_reward.append(reward)

        # Compute quality
        qualities.append(
            quality(blocking=row[1], ch=row[2], traffic=row[3], param1=row[-2], param2=row[-1], prparam1=lower,
                    prparam2=upper)
        )

        cov.iloc[-1, -2:] = (lower, upper)
        # print(cov)
        # n for number of states to predict
        # current_state.rename_axis(None, axis=1, inplace=True)
        # current_state.reset_index(drop=True, inplace=True)
        pred_state = state_predictor.predict(n=1, series=TimeSeries.from_dataframe(current_state),
                                             past_covariates=TimeSeries.from_dataframe(cov), verbose=False)
        new_states.append(pred_state)

        lower_limits.append(lower)
        upper_limits.append(upper)

        current_state = pd.concat([current_state.iloc[1:], obs_array.iloc[i +7: i+8, :2]], axis=0, join='inner')
        # print(current_state)

        cov = obs_array.iloc[i+1: i +8, -4:]
    # df['Lower_limit_Gen'], df['Upper_limit_Gen'], df['Limit_quality_Gen'] = lower_limits, upper_limits, qualities
    # df["Quality Rate"] = 1 - (2*df['HR Usage Rate']/100 + np.log(df['TCH Blocking Rate, BH'] + 1))/(1 + np.log(101))

    states_df = pd.concat(list(map(lambda x: x.pd_dataframe(), new_states)))
    states_df["Quality Rate"] = 1 - (2*states_df['HR Usage Rate']/100 + np.log(states_df['TCH Blocking Rate, BH'] + 1))/(1 + np.log(101))
    # states_df['cum_reward'] = np.cumsum(mom_reward)
    # states_df['mom_reward'] = mom_reward

    return states_df


In [3]:
from typing import List


def preprocess_full(data: pd.DataFrame, cols: List[str]=None):
    df = data.copy()
    cols = ['HR Usage Rate', 'TCH Blocking Rate, BH', 'Number of Available\nTCH',
               'TCH Traffic (Erl), BH', 'Lower_limit', 'Upper_limit']
    df.drop(columns='DATA', inplace=True)
    df.rename(columns={'Param 1': cols[-2], 'Param 2': cols[-1]}, inplace=True)
    return df[cols]

# Tests

In [12]:
preprocess_stats(pd.read_excel('data/GBTS_TOTAL_20220522.xlsm'), ['Cell ID', 'LAC', 'HR Usage Rate', 'TCH Blocking Rate, BH', 'Number of Available\nTCH',
               'TCH Traffic (Erl), BH', 'Lower_limit', 'Upper_limit'])

  warn(msg)


1,Cell ID,LAC,HR Usage Rate,"TCH Blocking Rate, BH",Number of Available\nTCH,"TCH Traffic (Erl), BH",Lower_limit,Upper_limit
6,32871,57951,78,0.51,13,7.32,13.0,21.0
7,32872,57951,76,0.00,13,5.42,17.0,28.0
8,32873,57951,92,0.00,20,10.44,17.0,28.0
9,10461,57951,75,0.00,17,16.30,20.0,33.0
10,10462,57951,76,0.00,5,1.52,26.0,43.0
...,...,...,...,...,...,...,...,...
1122,7982,57951,78,0.00,13,1.76,26.0,43.0
1123,7987,57951,50,0.00,20,8.76,12.0,21.0
1124,7983,57951,88,0.26,12,4.23,18.0,29.0
1125,7988,57951,78,0.19,12,8.04,19.0,31.0


In [28]:
pd.read_csv('data/dataset_full.csv', index_col=0)

Unnamed: 0,Cell ID,DATA,Number of Available\nTCH,HR Usage Rate,"TCH Blocking Rate, BH","TCH Traffic (Erl), BH",Param 1,Param 2
0,25771,2020-10-02,4.0,94.95,0.00,7.78,56,68
1,25772,2020-10-02,3.0,65.10,0.00,5.74,24,43
2,25773,2020-10-02,3.0,97.38,0.12,13.42,78,92
3,3361,2020-10-02,3.0,97.66,1.22,18.27,90,97
5,3363,2020-10-02,3.0,31.84,0.00,5.10,36,50
...,...,...,...,...,...,...,...,...
1028,882,2022-05-22,13.0,78.00,0.00,1.76,26,43
1029,887,2022-05-22,20.0,50.00,0.00,8.76,12,21
1030,883,2022-05-22,12.0,88.00,0.26,4.23,18,29
1031,888,2022-05-22,12.0,78.00,0.19,8.04,19,31


In [30]:
states = optimize_params(pd.read_excel('data/GBTS_TOTAL_20220522.xlsm'))

# df.to_excel('quality_results.xlsm')

  warn(msg)
  rank_zero_warn(
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU c

In [31]:
states['Quality Rate'].std()

0.07517744051985999

# Run

In [18]:
df = pd.read_csv('data/dataset_full.csv', index_col=0)

In [51]:
cell_list = list(map(lambda x: x[0], df[['Cell ID']].value_counts().index[:10].tolist()))
curr = df[df['Cell ID'].isin(cell_list)]
reff = df[~df['Cell ID'].isin(cell_list)]

In [57]:
curr

Unnamed: 0,Cell ID,DATA,Number of Available\nTCH,HR Usage Rate,"TCH Blocking Rate, BH","TCH Traffic (Erl), BH",Param 1,Param 2
48,13311,2020-10-02,6.0,98.46,0.13,34.06,85,97
49,13312,2020-10-02,6.0,85.87,0.00,26.85,5,35
50,13313,2020-10-02,3.0,99.54,0.00,14.03,85,97
707,1941,2020-10-02,3.0,47.34,0.00,6.44,32,50
708,1945,2020-10-02,2.0,67.87,0.00,3.04,38,39
...,...,...,...,...,...,...,...,...
719,1946,2022-05-22,12.0,78.00,0.00,8.50,27,46
720,1942,2022-05-22,13.0,88.00,0.17,5.54,17,28
721,1947,2022-05-22,27.0,75.00,0.26,23.02,12,21
722,1943,2022-05-22,27.0,100.00,0.06,23.02,26,43


In [58]:
data_drift_report = Report(metrics=[
   DataDriftPreset(),
])

In [59]:
data_drift_report.run(reference_data=reff, current_data=curr,)

In [72]:
data_drift_report.show()

In [76]:
data_drift_report.as_dict()['metrics'][0]['result']['share_of_drifted_columns']

0.75

In [98]:
scores = []

for cell in df[['Cell ID']].value_counts().keys()[:10]:
    cell_data = df[df['Cell ID'] == cell]

    data_drift_report = Report(metrics=[
        DataDriftPreset(),
    ])
    data_drift_report.run(reference_data=reff, current_data=cell_data,)
    drift = data_drift_report.as_dict()['metrics'][0]['result']['share_of_drifted_columns']

    states = optimize_params(cell_data, preprocess=preprocess_full)

    scores.append({
        'cell_id': cell,
        'drift_score': drift,
        'quality_avg': states['Quality Rate'].mean(),
        'quality_min': states['Quality Rate'].min(),
        'quality_max': states['Quality Rate'].max(),
        'quality_std': states['Quality Rate'].std(),
    })

HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU ava

In [99]:
scores_df = pd.DataFrame(scores)
scores_df

Unnamed: 0,cell_id,drift_score,quality_avg,quality_min,quality_max,quality_std
0,"(1946,)",0.75,0.874172,0.739002,1.688068,0.056878
1,"(1945,)",0.75,0.881243,0.710617,1.069511,0.036151
2,"(1947,)",1.0,0.843145,0.679234,0.999486,0.047978
3,"(1941,)",0.875,0.852543,0.681631,1.311538,0.069052
4,"(1943,)",0.875,0.806069,0.667248,1.095661,0.088015
5,"(1942,)",0.875,0.854989,0.722536,1.801337,0.06683
6,"(13313,)",0.875,0.77718,0.716126,1.150875,0.034354
7,"(13312,)",0.875,0.827764,0.683533,1.014011,0.051588
8,"(13311,)",0.875,0.766738,0.637207,1.072541,0.05034
9,"(22953,)",0.75,0.906194,0.752501,1.128156,0.035729


In [100]:
scores_df.to_csv('drift_scores.csv')

In [103]:
scores_df[scores_df.columns[1:]].corr()

Unnamed: 0,drift_score,quality_avg,quality_min,quality_max,quality_std
drift_score,1.0,-0.548021,-0.622229,-0.259476,0.272396
quality_avg,-0.548021,1.0,0.711855,0.302779,-0.159708
quality_min,-0.622229,0.711855,1.0,0.482624,-0.378733
quality_max,-0.259476,0.302779,0.482624,1.0,0.32067
quality_std,0.272396,-0.159708,-0.378733,0.32067,1.0


In [1]:
np.cumsum([1, 2, 3, 1, 5])

NameError: name 'np' is not defined

# Dump

In [53]:
df = preprocess_stats(pd.read_excel('data/GBTS_TOTAL_20220522.xlsm'), ['Cell ID', 'LAC', 'HR Usage Rate', 'TCH Blocking Rate, BH', 'Number of Available\nTCH', 'TCH Traffic (Erl), BH', 'Lower_limit', 'Upper_limit'])
obs_array = df.drop(columns=['Cell ID', 'LAC'])

  warn(msg)


In [46]:
obs_array

1,HR Usage Rate,"TCH Blocking Rate, BH",Number of Available\nTCH,"TCH Traffic (Erl), BH",Lower_limit,Upper_limit
6,78,0.51,13,7.32,13.0,21.0
7,76,0.00,13,5.42,17.0,28.0
8,92,0.00,20,10.44,17.0,28.0
9,75,0.00,17,16.30,20.0,33.0
10,76,0.00,5,1.52,26.0,43.0
...,...,...,...,...,...,...
1122,78,0.00,13,1.76,26.0,43.0
1123,50,0.00,20,8.76,12.0,21.0
1124,88,0.26,12,4.23,18.0,29.0
1125,78,0.19,12,8.04,19.0,31.0


In [81]:
obs_array.reset_index(drop=True, inplace=True)
obs_array.index.name = None
obs_array.index.rename('time', inplace=True)
obs_array

Unnamed: 0_level_0,HR Usage Rate,"TCH Blocking Rate, BH",Number of Available\nTCH,"TCH Traffic (Erl), BH",Lower_limit,Upper_limit
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,78,0.51,13,7.32,13.0,21.0
1,76,0.00,13,5.42,17.0,28.0
2,92,0.00,20,10.44,17.0,28.0
3,75,0.00,17,16.30,20.0,33.0
4,76,0.00,5,1.52,26.0,43.0
...,...,...,...,...,...,...
1051,78,0.00,13,1.76,26.0,43.0
1052,50,0.00,20,8.76,12.0,21.0
1053,88,0.26,12,4.23,18.0,29.0
1054,78,0.19,12,8.04,19.0,31.0


In [82]:
current_state = obs_array.iloc[:7, :2]

In [103]:
current_state.iloc[-1:]

Unnamed: 0_level_0,HR Usage Rate,"TCH Blocking Rate, BH"
time,Unnamed: 1_level_1,Unnamed: 2_level_1
6,26,0.0


In [84]:
current_state.columns

Index(['HR Usage Rate', 'TCH Blocking Rate, BH'], dtype='object', name='')

In [62]:
current_state[['HR Usage Rate', 'TCH Blocking Rate, BH']] = current_state[['HR Usage Rate', 'TCH Blocking Rate, BH']].astype('float64')
current_state

Unnamed: 0_level_0,HR Usage Rate,"TCH Blocking Rate, BH"
time,Unnamed: 1_level_1,Unnamed: 2_level_1
0,78.0,0.51
1,76.0,0.0
2,92.0,0.0
3,75.0,0.0
4,76.0,0.0
5,63.0,0.0
6,26.0,0.0


In [89]:
current_state.rename_axis(None, axis=1, inplace=True)
current_state.columns

Index(['HR Usage Rate', 'TCH Blocking Rate, BH'], dtype='object')

In [90]:
TimeSeries.from_dataframe(current_state)

In [18]:
cell_id = pd.read_pickle('cell_id_data_nona.pkl')

In [21]:
cell_df = pd.DataFrame(cell_id[3371])

In [65]:
cell_df.columns

Index(['DATA', 'Number of Available\nTCH', 'HR Usage Rate',
       'TCH Blocking Rate, BH', 'TCH Traffic (Erl), BH', 'Param 1', 'Param 2'],
      dtype='object')

In [28]:
TimeSeries.from_dataframe(cell_df[['HR Usage Rate','TCH Blocking Rate, BH']])

In [39]:
cell_df[['HR Usage Rate','TCH Blocking Rate, BH']].dtypes

HR Usage Rate            float64
TCH Blocking Rate, BH    float64
dtype: object

In [40]:
cell_df[cell_df.isna().any(axis=1)]

Unnamed: 0_level_0,DATA,Number of Available\nTCH,HR Usage Rate,"TCH Blocking Rate, BH","TCH Traffic (Erl), BH",Param 1,Param 2
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1


In [41]:
current_state[current_state.isna().any(axis=1)]

1,HR Usage Rate,"TCH Blocking Rate, BH",date
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1


In [85]:
current_state.columns

Index(['HR Usage Rate', 'TCH Blocking Rate, BH'], dtype='object', name='')

In [74]:
cell_df.index

RangeIndex(start=0, stop=593, step=1, name='time')

In [86]:
cell_df.columns

Index(['DATA', 'Number of Available\nTCH', 'HR Usage Rate',
       'TCH Blocking Rate, BH', 'TCH Traffic (Erl), BH', 'Param 1', 'Param 2'],
      dtype='object')

In [88]:
current_state.rename_axis(None, axis=1).columns

Index(['HR Usage Rate', 'TCH Blocking Rate, BH'], dtype='object')