In [None]:
import warnings

import numpy as np
import pandas as pd
from darts.models.forecasting.nhits import NHiTSModel
from darts import TimeSeries
import torch
from typing import Callable
from tqdm import tqdm

from helpers import predict, load_agent, quality, clip
from preprocess import preprocess_stats
from rl.sim_enviroment import SimulatedCustomEnv

from evidently.report import Report
from evidently.metric_preset import DataDriftPreset
from evidently.options import DataDriftOptions

In [2]:

def optimize_params(data: pd.DataFrame, preprocess: Callable = preprocess_stats) -> pd.DataFrame:
    """
    Run and evaluate agent.

    :param data:        raw observations in pandas DataFrame
    :return:            result saves to the same path as input

    Args:
        preprocess: function to preprocess data
    """
    columns = ['Cell ID', 'LAC', 'HR Usage Rate', 'TCH Blocking Rate, BH', 'Number of Available\nTCH',
               'TCH Traffic (Erl), BH', 'Lower_limit', 'Upper_limit']

    df = preprocess(data, columns)
    obs_array = df.drop(columns=['Cell ID', 'LAC'], errors='ignore')
    obs_array.rename_axis(None, axis=1, inplace=True)
    obs_array.reset_index(drop=True, inplace=True)

    agent = load_agent('sac_last_60_50d_exp-r.pt', 'pt')
    state_predictor = NHiTSModel.load_from_checkpoint("nhits_35lw_2l_1b_3s_35d_no_TB", "state_predictor", best=True, map_location='cuda')

    # # 'HR Usage Rate', 'TCH Blocking Rate, BH'
    # self.current_state = series[randint(0, len(series))].head(n_past)
    # # 'Number of Available\nTCH', 'TCH Traffic (Erl), BH', 'Param 1',  'Param 2'
    # self.cov = covariates[0].head(n_past)


    lower_limits = []
    upper_limits = []
    qualities = []
    new_states = []

    # 'HR Usage Rate', 'TCH Blocking Rate, BH'
    current_state = obs_array.iloc[:7, :2]
    cov = obs_array.iloc[:7, -4:]

    # print(TimeSeries.from_dataframe(obs_array.iloc[:, :2]))
    # print(len(TimeSeries.from_dataframe(obs_array.iloc[:, :2])))

    # setting env for reward calculation
    environment = SimulatedCustomEnv(
        state_predictor,
        np.array([1,1]),
        TimeSeries.from_dataframe(obs_array.iloc[:, :2]),
        TimeSeries.from_dataframe(obs_array.iloc[:, -4:]),
        7
    )
    obs = environment.reset()
    mom_reward = []

    for i, row in enumerate(obs_array.iloc[7:].values):
        # print('Curr_state=', current_state.shape)

        a1, a2 = predict(row, agent)
        lower = clip(int(row[-2] + a1 * 30))
        upper = clip(int(row[-1] + a2 * 30))

        # compure reward
        new_state, reward, done, info = environment.step(np.array([a1, a2]))
        mom_reward.append(reward)

        # Compute quality
        qualities.append(
            quality(blocking=row[1], ch=row[2], traffic=row[3], param1=row[-2], param2=row[-1], prparam1=lower,
                    prparam2=upper)
        )

        cov.iloc[-1, -2:] = (lower, upper)
        # print(cov)
        # n for number of states to predict
        # current_state.rename_axis(None, axis=1, inplace=True)
        # current_state.reset_index(drop=True, inplace=True)
        pred_state = state_predictor.predict(n=1, series=TimeSeries.from_dataframe(current_state),
                                             past_covariates=TimeSeries.from_dataframe(cov), verbose=False)
        new_states.append(pred_state)

        lower_limits.append(lower)
        upper_limits.append(upper)

        current_state = pd.concat([current_state.iloc[1:], obs_array.iloc[i +7: i+8, :2]], axis=0, join='inner')
        # print(current_state)

        cov = obs_array.iloc[i+1: i +8, -4:]
    # df['Lower_limit_Gen'], df['Upper_limit_Gen'], df['Limit_quality_Gen'] = lower_limits, upper_limits, qualities
    # df["Quality Rate"] = 1 - (2*df['HR Usage Rate']/100 + np.log(df['TCH Blocking Rate, BH'] + 1))/(1 + np.log(101))

    states_df = pd.concat(list(map(lambda x: x.pd_dataframe(), new_states)))
    states_df["Quality Rate"] = 1 - (2*states_df['HR Usage Rate']/100 + np.log(states_df['TCH Blocking Rate, BH'] + 1))/(1 + np.log(101))
    states_df['cum_reward'] = np.cumsum(mom_reward)
    states_df['mom_reward'] = mom_reward

    return states_df


In [3]:
from typing import List


def preprocess_full(data: pd.DataFrame, cols: List[str]=None):
    df = data.copy()
    cols = ['HR Usage Rate', 'TCH Blocking Rate, BH', 'Number of Available\nTCH',
               'TCH Traffic (Erl), BH', 'Lower_limit', 'Upper_limit']
    df.drop(columns='DATA', inplace=True)
    df.rename(columns={'Param 1': cols[-2], 'Param 2': cols[-1]}, inplace=True)
    return df[cols]

In [4]:
df = pd.read_csv('data/dataset_full.csv', index_col=0)

In [5]:
cell_list = list(map(lambda x: x[0], df[['Cell ID']].value_counts().index[:10].tolist()))
curr = df[df['Cell ID'].isin(cell_list)]
reff = df[~df['Cell ID'].isin(cell_list)]

In [6]:
%%time

scores = []

import logging
# logging.getLogger("pytorch_lightning.utilities.rank_zero").setLevel(logging.WARNING)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    logging.getLogger("pytorch_lightning").setLevel(logging.WARNING)

    for cell in tqdm(df[['Cell ID']].value_counts().keys()[:2]):
        cell_data = df[df['Cell ID'] == cell]

        data_drift_report = Report(metrics=[
            DataDriftPreset(),
        ])
        data_drift_report.run(reference_data=reff, current_data=cell_data,)
        drift = data_drift_report.as_dict()['metrics'][0]['result']['share_of_drifted_columns']

        states = optimize_params(cell_data, preprocess=preprocess_full)

        scores.append({
            'cell_id': cell,
            'drift_score': drift,
            'quality_avg': states['Quality Rate'].mean(),
            'quality_min': states['Quality Rate'].min(),
            'quality_max': states['Quality Rate'].max(),
            'quality_std': states['Quality Rate'].std(),
            'cum_reward_avg': states['cum_reward'].mean(),
            'cum_reward_max': states['cum_reward'].max(),
            'cum_reward_std': states['cum_reward'].std(),
            'mom_reward_avg': states['mom_reward'].mean(),
            'mom_reward_min': states['mom_reward'].min(),
            'mom_reward_max': states['mom_reward'].max(),
            'mom_reward_std': states['mom_reward'].std(),
        })

scores_df = pd.DataFrame(scores)
scores_df.to_csv('drift_scores_rewards_300.csv')


  0%|          | 0/2 [00:06<?, ?it/s]


RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.

In [7]:
# scores_df = pd.read_csv('drift_scores_rewards.csv', index_col=0)
scores_df

Unnamed: 0,cell_id,drift_score,quality_avg,quality_min,quality_max,quality_std,cum_reward_avg,cum_reward_max,cum_reward_std,mom_reward_avg,mom_reward_min,mom_reward_max,mom_reward_std
0,"(1946,)",0.75,0.874185,0.73475,1.688068,0.056923,-3575299.0,-45,3179021.0,-17657.96173,-34750,-45,10037.161238
1,"(1945,)",0.75,0.881187,0.70782,1.069511,0.036198,-3039105.0,-50,2671801.0,-14785.532446,-28105,-50,8152.344845
2,"(1947,)",1.0,0.842564,0.679234,0.999759,0.047799,-1147139.0,-10,1042341.0,-5578.85,-9795,-10,3138.280158
3,"(1941,)",0.875,0.852576,0.678216,1.311538,0.069046,-3435891.0,-45,3090324.0,-17293.177258,-34560,-45,10108.637397
4,"(1943,)",0.875,0.806072,0.668687,1.095661,0.087994,-3560386.0,-50,3162241.0,-17649.991639,-34895,-50,10007.992846
5,"(1942,)",0.875,0.855051,0.722536,1.801337,0.066887,-3499136.0,-60,3084310.0,-17133.244147,-32395,-60,9472.583864
6,"(13313,)",0.875,0.777178,0.716126,1.150875,0.034356,-2864914.0,-45,2320279.0,-12764.756711,-21165,-45,5655.18708
7,"(13312,)",0.875,0.827807,0.683533,1.014011,0.051529,-1365232.0,20,1551018.0,-8932.39094,-23890,10,7760.800072
8,"(13311,)",0.875,0.766741,0.637207,1.072541,0.050358,-525096.2,15,451084.2,-2500.184564,-4505,10,1293.218758
9,"(22953,)",0.75,0.906204,0.752501,1.126982,0.035694,-3266940.0,-45,2888713.0,-16287.254237,-30980,-45,9091.123056


In [8]:
scores_df[scores_df.columns[1:]].corr()

Unnamed: 0,drift_score,quality_avg,quality_min,quality_max,quality_std,cum_reward_avg,cum_reward_max,cum_reward_std,mom_reward_avg,mom_reward_min,mom_reward_max,mom_reward_std
drift_score,1.0,-0.550605,-0.605572,-0.258672,0.269613,0.528747,0.407681,-0.535117,0.541696,0.537498,0.431527,-0.518617
quality_avg,-0.550605,1.0,0.698381,0.303663,-0.158696,-0.499749,-0.446164,0.543064,-0.546772,-0.565781,-0.449636,0.595814
quality_min,-0.605572,0.698381,1.0,0.467612,-0.377524,-0.636475,-0.612046,0.626828,-0.625488,-0.561378,-0.623015,0.5458
quality_max,-0.258672,0.303663,0.467612,1.0,0.323319,-0.563363,-0.528408,0.56103,-0.5553,-0.516847,-0.540661,0.497546
quality_std,0.269613,-0.158696,-0.377524,0.323319,1.0,-0.303255,-0.185431,0.344766,-0.347661,-0.412466,-0.196013,0.419151
cum_reward_avg,0.528747,-0.499749,-0.636475,-0.563363,-0.303255,1.0,0.931871,-0.991554,0.988429,0.924603,0.947229,-0.882294
cum_reward_max,0.407681,-0.446164,-0.612046,-0.528408,-0.185431,0.931871,1.0,-0.886365,0.874997,0.740686,0.997704,-0.678855
cum_reward_std,-0.535117,0.543064,0.626828,0.56103,0.344766,-0.991554,-0.886365,1.0,-0.9996,-0.965525,-0.907093,0.935646
mom_reward_avg,0.541696,-0.546772,-0.625488,-0.5553,-0.347661,0.988429,0.874997,-0.9996,1.0,0.971219,0.896998,-0.943279
mom_reward_min,0.537498,-0.565781,-0.561378,-0.516847,-0.412466,0.924603,0.740686,-0.965525,0.971219,1.0,0.772915,-0.994334


In [23]:
len(df['Cell ID'].unique())

1043

In [11]:
300 / len(df['Cell ID'].unique())

In [10]:
scores_df

In [7]:
import logging

loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict]

In [13]:
with open('loggers.txt', 'w') as f:
    for item in loggers:
        # write each item on a new line
        f.write("%s\n" % item)

In [20]:
class LessThanFilter(logging.Filter):
    def __init__(self, exclusive_maximum, name=""):
        super(LessThanFilter, self).__init__(name)
        self.max_level = exclusive_maximum

    def filter(self, record):
        #non-zero return means we log this message
        return 1 if record.levelno < self.max_level else 0

logging.getLogger("pytorch_lightning.utilities.rank_zero").addFilter(LessThanFilter(logging.ERROR))

In [23]:
logging.getLogger("pytorch_lightning.utilities.rank_zero").error('asd')
logging.getLogger("pytorch_lightning.utilities.rank_zero").error('asd')
logging.getLogger("pytorch_lightning.utilities.rank_zero").error('asd')
logging.getLogger("pytorch_lightning.utilities.rank_zero").error('asd')
logging.getLogger("pytorch_lightning.utilities.rank_zero").error('asd')

In [27]:
logging.getLogger("pytorch_lightning.utilities.rank_zero").findCaller()

('/home/rid/Soft/anaconda3/envs/sm_bachelor/lib/python3.9/site-packages/IPython/core/interactiveshell.py',
 3448,
 'run_ast_nodes',
 None)

In [9]:
%%time

data_drift_report = Report(metrics=[
    DataDriftPreset(),
])
data_drift_report.run(reference_data=reff, current_data=cell_data,)
drift = data_drift_report.as_dict()['metrics'][0]['result']['share_of_drifted_columns']

CPU times: user 6.43 s, sys: 212 ms, total: 6.65 s
Wall time: 6.8 s


In [10]:
%%time

data_drift_report.run(reference_data=reff, current_data=cell_data,)
drift = data_drift_report.as_dict()['metrics'][0]['result']['share_of_drifted_columns']

CPU times: user 6.22 s, sys: 168 ms, total: 6.39 s
Wall time: 6.49 s


In [11]:
%%time

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    states = optimize_params(cell_data, preprocess=preprocess_full)

CPU times: user 42.4 s, sys: 1.72 s, total: 44.1 s
Wall time: 44.1 s


In [3]:
torch.cuda.is_available()

False