In [17]:
import os
import sys
import glob
import re
import ast
import warnings
import time

import csv
import json
import pickle

import math
import random
import numpy as np
import scipy as sp
import datetime as dt
import pandas as pd
import swifter
from scipy.stats import gaussian_kde
from scipy.integrate import quad
from scipy.optimize import minimize
from sklearn.metrics import mean_squared_error

import portion as P
import itertools as it
import copy
from tqdm.notebook import tqdm
from collections import namedtuple
from pprint import pprint
from pytictoc import TicToc

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

from my_utils import *

# Configure display options
# pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
%config InlineBackend.figure_format = 'retina'

# Set plot style
# plt.style.use('ggplot')

In [None]:
# # DataLoader: Single Radio Example
# dates = data_loader(query_dates=True)

# selected_dates = [s for s in dates if s >= '2023-09-12']
# # excluded_dates = []
# # selected_exps = []
# # excluded_exps = ['Modem_Action_Test', 'Control_Group', 'Control_Group2', 'Control_Group3']
# # selected_routes = ['BR']
# # excluded_routes = []
# filepaths = data_loader(mode='sr', selected_dates=selected_dates)

# filepath = filepaths[0]
# pprint(filepath)

# ho_df = pd.read_csv(filepath[0], parse_dates=['start', 'end'])
# df = pd.read_csv(filepath[1], parse_dates=['Timestamp'])[['seq', 'Timestamp', 'lost', 'excl', 'latency']]
# df, ho_df, empty_data = data_aligner(df, ho_df)

# display(df)


# # DataLoader: Dual Radio Example
# dates = data_loader(query_dates=True)

# selected_dates = [s for s in dates if s >= '2023-09-12']
# # excluded_dates = []
# # selected_exps = []
# # excluded_exps = ['Modem_Action_Test', 'Control_Group', 'Control_Group2', 'Control_Group3']
# # selected_routes = ['BR']
# # excluded_routes = []
# filepaths = data_loader(mode='dr', selected_dates=selected_dates)

# filepath = filepaths[0]
# pprint(filepath)

# ho_df1 = pd.read_csv(filepath[0][0], parse_dates=['start', 'end'])
# ho_df2 = pd.read_csv(filepath[1][0], parse_dates=['start', 'end'])
# df1 = pd.read_csv(filepath[0][1], parse_dates=['Timestamp'])[['seq', 'Timestamp', 'lost', 'excl', 'latency']]
# df2 = pd.read_csv(filepath[1][1], parse_dates=['Timestamp'])[['seq', 'Timestamp', 'lost', 'excl', 'latency']]
# df, df1, df2, ho_df1, ho_df2, empty_data = data_consolidator(df1, df2, ho_df1, ho_df2)

# display(df)

# Define Useful Functions

In [18]:
def interpolate(x, y, ratio=0.5):
    """
    Args:
        x, y (datetime.datetime): x < y
        ratio (float): a decimal numeral in a range [0, 1]; 0 means break at x, 1 means break at y.
    Returns:
        (datetime.datetime): breakpoint of interpolation
    """
    return x + (y - x) * ratio

def mean_downsample(data, sample_size=100000):
    """
    平均下採樣函數
    
    Args:
    data: 原始數據的列表
    sample_size: 下採樣後的樣本大小
    
    Returns:
    downsampled_data: 下採樣後的數據列表
    """
    chunk_size = len(data) // sample_size
    if chunk_size == 0:
        return data
    downsampled_data = [sum(data[i:i+chunk_size]) / chunk_size for i in range(0, len(data), chunk_size)]
    return downsampled_data

def median_downsample(data, sample_size=100000):
    """
    中位數下採樣函數
    
    Args:
    data: 原始數據的列表
    sample_size: 下採樣後的樣本大小
    
    Returns:
    downsampled_data: 下採樣後的數據列表
    """
    chunk_size = len(data) // sample_size
    if chunk_size == 0:
        return data
    downsampled_data = []
    for i in range(0, len(data), chunk_size):
        chunk = data[i:i+chunk_size]
        median_index = len(chunk) // 2
        median_value = np.median(chunk)
        downsampled_data.append(median_value)
    return downsampled_data

def total_area_kde(kde, lower_bound=-np.inf, upper_bound=np.inf):
    # 定義積分函數
    def integrand(x):
        return kde(x)
    total_area, _ = quad(integrand, lower_bound, upper_bound)
    return total_area

def total_area_histogram_with_centers(x_centers, heights, bin_width):
    # 計算每個 bin 的面積並相加
    total_area = bin_width * sum(heights)
    return total_area

# Class: Profile

In [19]:
class Profile():
    def __init__(self, filepaths, model_name='Trial',
                 scope_agg=None, scope=None, sp_columns=['type'], ts_column='Timestamp',
                 metrics=['dl_lost', 'dl_excl', 'ul_lost', 'ul_excl']):
        
        # Initialize instance variables
        self.filepaths = copy.deepcopy(filepaths)
        self.model_name = model_name
        if scope_agg is None:
            if scope is None:
                scope = {
                    **{key: (-5.0, 5.0) for key in ['LTEH', 'ENBH', 'MCGH', 'MNBH', 'SCGM', 'SCGA', 'SCGR-I', 'SCGR-II', 'SCGC-I', 'SCGC-II']},
                    **{key: (-10.0, 10.0) for key in ['SCGF', 'MCGF', 'NASR']}, 
                    'Stable': (-1.0, 1.0)
                }
            self.scope_agg = { mode_mets: scope.copy() for mode_mets in metrics }   
        else:
            self.scope_agg = copy.deepcopy(scope_agg)
        self.sp_columns = sp_columns[:]
        self.ts_column = ts_column
        self.metrics = metrics[:]
        self.Container = { mode_mets: { tag: { 'dist_table': [],
                                               'relative_loex_timestamp': [],
                                               'relative_timestamp': [],
                                               'interruption_time': [],
                                               'trigger_loex': [],
                                               'event_count': [] } 
                                        for tag in self.scope_agg[mode_mets].keys() }
                                        for mode_mets in metrics }
        self.Profile = { mode_mets: { tag: { 'dist_table': None,
                                             'relative_loex_timestamp': [],
                                             'relative_timestamp': [],
                                             'interruption_time': [],
                                             'trigger_loex': [],
                                             'event_count': 0 } 
                                        for tag in self.scope_agg[mode_mets].keys() }
                                        for mode_mets in metrics }
        self.scope_models = copy.deepcopy(self.scope_agg)
        self.hist_models = { mode_mets: { tag: None for tag in self.scope_agg[mode_mets].keys() } for mode_mets in metrics }
        self.kde_models = { mode_mets: { tag: (None, None) for tag in self.scope_agg[mode_mets].keys() } for mode_mets in metrics }
        self.prob_models = { mode_mets: { tag: None for tag in self.scope_agg[mode_mets].keys() } for mode_mets in metrics }
        
        # Construct profiles
        # self.construct_profile()
        # self.modeling()
    
    
    def create_instance(self, df, center, interval, mets='lost', w_size=0.01):
        df = df[(df[self.ts_column] >= interval.lower) & (df[self.ts_column] < interval.upper)].copy().reset_index(drop=True)
        
        # Relative window converted from timestamp
        df['relative_time'] = (df['Timestamp'] - center).dt.total_seconds()
        df['window_id'] = ((df['relative_time'] + w_size / 2) // w_size) * w_size  # 四捨五入
        
        if mets == 'lost':
            RATE_TYPE = 'PLR'
            loex_df = df[df['lost']].copy()
            ts_group = df.groupby(['window_id'])
            table = ts_group.agg({'lost': ['count','sum'], 'Timestamp': ['first']}).reset_index()
        elif mets == 'excl':
            RATE_TYPE = 'ELR'
            df['excl_exact'] = df['excl'] & ~df['lost']
            loex_df = df[df['excl_exact']].copy()
            ts_group = df.groupby(['window_id'])
            table = ts_group.agg({'excl_exact': ['count','sum'], 'Timestamp': ['first']}).reset_index()
        
        table.columns = ['window_id','tx_count',mets,'Timestamp']
        
        return table, loex_df['relative_time'].to_list(), df['relative_time'].to_list()
        # return table, loex_df['relative_time'].to_numpy()
    
    
    def setup_profile(self, df, ho_df, mode, mets, scope):
        # Initialize "Register"
        Register = { tag: { 'dist_table': [],
                            'relative_loex_timestamp': [],
                            'relative_timestamp': [],
                            'interruption_time': [],
                            'trigger_loex': [] } for tag in scope.keys() }
        this_df = df.copy()
        
        for i, row in ho_df.iterrows():
            prior_row = ho_df.iloc[i-1] if i != 0 else None
            post_row = ho_df.iloc[i+1] if i != len(ho_df) - 1 else None

            # Peek the next event to avoid HO overlapping with handoverFailure (skip!!)
            if i != len(ho_df) - 1 and pd.notna(row.end) and row.end > post_row.start:
                # print('Overlapping event occurs!!')
                # print(i, row['start'], row['end'], row['type'], row['cause'])
                # print(i+1, post_row['start'], post_row['end'], post_row['type'], post_row['cause'])
                continue
            
            # Set prior event if the prior loop is skipped
            if i != 0 and pd.notna(prior_row.end) and prior_row.end > row.start:
                prior_row = ho_df.iloc[i-2] if i > 1 else None
            
            # Basic information of the current row
            tag = '_'.join([s for s in row[self.sp_columns] if pd.notna(s)])  # specific column name
            start_ts, end_ts = row['start'], row['end']  # handover start/end time
            interruption_time = (end_ts - start_ts).total_seconds() if pd.notna(end_ts) else 0  # handover interruption time
            
            # Set simple left/right bounds
            current_left_bound = start_ts + pd.Timedelta(seconds=(scope[tag][0]))
            current_right_bound = start_ts + pd.Timedelta(seconds=(scope[tag][1]))
            
            # Set left/right bounds to avoid event overlapping with each other
            if prior_row is not None:
                prior_tag = '_'.join([s for s in prior_row[self.sp_columns] if pd.notna(s)])
                prior_right_bound = prior_row['start'] + pd.Timedelta(seconds=(scope[prior_tag][1]))
                if pd.notna(prior_row['end']):
                    # left = prior_row['end'] + (start_ts - prior_row['end']) / 2
                    left_bound = min(max(current_left_bound, Profile.interpolate(prior_right_bound, current_left_bound), prior_row['end']), start_ts)
                else:
                    # left = prior_row['start'] + (start_ts - prior_row['start']) / 2
                    left_bound = min(max(current_left_bound, Profile.interpolate(prior_right_bound, current_left_bound), prior_row['start']), start_ts)
            else:
                # left_bound = pd.Timestamp.min
                left_bound = current_left_bound
            
            if post_row is not None:
                post_tag = '_'.join([s for s in post_row[self.sp_columns] if pd.notna(s)])
                post_left_bound = post_row['start'] + pd.Timedelta(seconds=(scope[post_tag][0]))
                if pd.notna(end_ts):
                    # right = end_ts + (post_row['start'] - end_ts) / 2
                    right_bound = max(min(current_right_bound, Profile.interpolate(current_right_bound, post_left_bound), post_row['start']), end_ts)
                else:
                    # right = start_ts + (post_row['start'] - start_ts) / 2
                    right_bound = max(min(current_right_bound, Profile.interpolate(current_right_bound, post_left_bound), post_row['start']), start_ts)
            else:
                # right_bound = pd.Timestamp.max
                right_bound = current_right_bound
            
            # interval = P.closed(max(start_ts+pd.Timedelta(seconds=scope[tag][0]), left_bound), min(start_ts+pd.Timedelta(seconds=scope[tag][1]), right_bound))
            interval = P.closed(left_bound, right_bound)

            # Consider the stable duration before an event starts
            stable_df = this_df[this_df[self.ts_column] < interval.lower].copy()
            stable_df['Timestamp_to_second'] = stable_df['Timestamp'].dt.floor('S')
            
            if not stable_df.empty:
                unique_timestamps = stable_df['Timestamp_to_second'].unique()
                
                tmp_df = stable_df.copy()
                for ts in unique_timestamps:
                    stable_center = ts + pd.Timedelta(seconds=0.5)
                    stable_interval = P.closed(ts, min(ts + pd.Timedelta(seconds=1), interval.lower))
                    
                    # Create an instance of stable profile
                    # dist_table, relative_loex_timestamp = self.create_instance(tmp_df.copy(), stable_center, stable_interval, mets=mets)
                    dist_table, relative_loex_timestamp, relative_timestamp = self.create_instance(tmp_df.copy(), stable_center, stable_interval, mets=mets)
                    
                    # if len(relative_loex_timestamp):
                    #     display(dist_table)
                    #     print('Stable', len(relative_loex_timestamp), relative_loex_timestamp)
                    
                    # Feed into "Register"
                    if len(relative_loex_timestamp):
                        Register['Stable']['trigger_loex'].append(1)
                        Register['Stable']['dist_table'].append(dist_table)
                        Register['Stable']['relative_loex_timestamp'] += relative_loex_timestamp
                        # Register['Stable']['relative_loex_timestamp'] = np.concatenate((Register['Stable']['relative_loex_timestamp'], relative_loex_timestamp))
                    else:
                        Register['Stable']['trigger_loex'].append(0)
                    Register['Stable']['interruption_time'].append((stable_interval.upper - stable_interval.lower).total_seconds())
                    Register['Stable']['relative_timestamp'] += relative_timestamp
                    
                    # Update dataframe to accelerate
                    tmp_df = tmp_df[tmp_df[self.ts_column] >= ts + pd.Timedelta(seconds=1)]
            
            # Create an instance of handover profile
            # dist_table, relative_loex_timestamp = self.create_instance(this_df.copy(), start_ts, interval, mets=mets)
            dist_table, relative_loex_timestamp, relative_timestamp = self.create_instance(this_df.copy(), start_ts, interval, mets=mets)
            
            # if len(relative_loex_timestamp):
            #     display(dist_table)
            #     print(tag, len(relative_loex_timestamp), relative_loex_timestamp)
            
            # Feed into "Register"
            if len(relative_loex_timestamp):
                Register[tag]['trigger_loex'].append(1)
                Register[tag]['dist_table'].append(dist_table)
                Register[tag]['relative_loex_timestamp'] += relative_loex_timestamp
                # Register[tag]['relative_loex_timestamp'] = np.concatenate((Register[tag]['relative_loex_timestamp'], relative_loex_timestamp))
            else:
                Register[tag]['trigger_loex'].append(0)
            Register[tag]['interruption_time'].append(interruption_time)
            Register[tag]['relative_timestamp'] += relative_timestamp
            
            # Update dataframe to accelerate the speed
            this_df = this_df[this_df[self.ts_column] >= interval.upper].copy()

        # Consider the stable duration after the last event ends
        stable_df = this_df.copy()
        stable_df['Timestamp_to_second'] = stable_df['Timestamp'].dt.floor('S')

        if not stable_df.empty:
            unique_timestamps = stable_df['Timestamp_to_second'].unique()
            
            tmp_df = stable_df.copy()
            for ts in unique_timestamps:
                stable_center = ts + pd.Timedelta(seconds=0.5)
                stable_interval = P.closed(ts, ts + pd.Timedelta(seconds=1))
                
                # Create an instance of stable profile
                # dist_table, relative_loex_timestamp = self.create_instance(tmp_df.copy(), stable_center, stable_interval, mets=mets)
                dist_table, relative_loex_timestamp, relative_timestamp = self.create_instance(tmp_df.copy(), stable_center, stable_interval, mets=mets)
                
                # if len(relative_loex_timestamp):
                #     display(dist_table)
                #     print('Stable', len(relative_loex_timestamp), relative_loex_timestamp)
                
                # Feed into "Register"
                if len(relative_loex_timestamp):
                    Register['Stable']['trigger_loex'].append(1)
                    Register['Stable']['dist_table'].append(dist_table)
                    Register['Stable']['relative_loex_timestamp'] += relative_loex_timestamp
                    # Register['Stable']['relative_loex_timestamp'] = np.concatenate((Register['Stable']['relative_loex_timestamp'], relative_loex_timestamp))
                else:
                    Register['Stable']['trigger_loex'].append(0)
                Register['Stable']['interruption_time'].append((stable_interval.upper - stable_interval.lower).total_seconds())
                Register['Stable']['relative_timestamp'] += relative_timestamp
                
                # Update dataframe to accelerate
                tmp_df = tmp_df[tmp_df[self.ts_column] >= ts + pd.Timedelta(seconds=1)]
        
        return Register
    
    
    @staticmethod
    def dist_aggregate(tables, mode, mets):
        if mets == 'lost':
            RATE_TYPE = 'PLR'
        elif mets == 'excl':
            RATE_TYPE = 'ELR'
        
        table = pd.DataFrame(columns=['window_id','tx_count',mets])
        table[mets] = table[mets].astype('Int32')
        table['window_id'] = table['window_id'].astype('float32')
        table['tx_count'] = table['tx_count'].astype('Int32')
        
        tables = [t for t in tables if t is not None]
        for this_table in tables:
            table = table.merge(this_table, on=['window_id'], how='outer').fillna(0)
            table['tx_count'] = table['tx_count_x'] + table['tx_count_y']
            table[mets] = table[f'{mets}_x'] + table[f'{mets}_y']
            table = table[['window_id','tx_count',mets]]
        
        table[RATE_TYPE] = table[mets] / (table['tx_count'] + 1e-9) * 100
        table[RATE_TYPE] = table[RATE_TYPE].astype('float32')
        
        table = table[['window_id','tx_count',mets,RATE_TYPE]].copy().sort_values(by=['window_id']).reset_index(drop=True)
        return table
    
    
    def construct_profile(self):
        n = len(self.filepaths)
        
        for i, filepath in enumerate(self.filepaths):
            
            if i > 1:
                break
            
            for s in filepath[:3]:
                print(f'{i+1}/{n}', s)
            
            dl_df, ul_df = None, None
            ho_df = pd.read_csv(filepath[0], parse_dates=['start', 'end'])
            if ho_df.empty:
                print('******** Empty RRC Data ********')
                continue
            
            for mode_mets in self.metrics:
                mode, mets = mode_mets[:2], mode_mets[-4:]
                scope = self.scope_agg[mode_mets]
                
                if mode == 'dl':
                    if dl_df is None:
                        dl_df = pd.read_csv(filepath[1], parse_dates=['Timestamp'])[['seq', 'Timestamp', 'lost', 'excl', 'latency']]
                    df = dl_df.copy()
                elif mode == 'ul':
                    if ul_df is None:
                        ul_df = pd.read_csv(filepath[2], parse_dates=['Timestamp'])[['seq', 'Timestamp', 'lost', 'excl', 'latency']]
                    df = ul_df.copy()
                
                ho_df = pd.read_csv(filepath[0], parse_dates=['start', 'end'])
                df, ho_df, empty_data = data_aligner(df, ho_df)
                
                if empty_data:
                    print('******** Empty Data:', mode_mets, '********')
                    continue
                
                Register = self.setup_profile(df, ho_df, mode, mets, scope)
                
                # Append "Register" for each trace
                for tag in scope.keys():
                    # if len(Register[tag]['interruption_time']) == 0:
                    #     continue
                    
                    table = Profile.dist_aggregate(Register[tag]['dist_table'], mode, mets)
                    self.Container[mode_mets][tag]['dist_table'].append(table)
                    self.Container[mode_mets][tag]['relative_loex_timestamp'].append(Register[tag]['relative_loex_timestamp'])
                    self.Container[mode_mets][tag]['relative_timestamp'].append(Register[tag]['relative_timestamp'])
                    self.Container[mode_mets][tag]['trigger_loex'].append(Register[tag]['trigger_loex'])
                    self.Container[mode_mets][tag]['interruption_time'].append(Register[tag]['interruption_time'])
                    self.Container[mode_mets][tag]['event_count'].append(len(Register[tag]['interruption_time']))
        
        for mode_mets in self.metrics:
            mode, mets = mode_mets[:2], mode_mets[-4:]
            scope = self.scope_agg[mode_mets]
            
            for tag in scope.keys():
                self.Profile[mode_mets][tag]['dist_table'] = Profile.dist_aggregate(self.Container[mode_mets][tag]['dist_table'], mode, mets)
                
                # self.Profile[mode_mets][tag]['relative_loex_timestamp'] = self.Container[mode_mets][tag]['relative_loex_timestamp']
                data = []
                for lst in self.Container[mode_mets][tag]['relative_loex_timestamp']:
                    data += lst
                self.Profile[mode_mets][tag]['relative_loex_timestamp'] = Profile.mean_downsample(sorted(data))
                self.Container[mode_mets][tag]['relative_loex_timestamp'] = []
                
                # self.Profile[mode_mets][tag]['relative_timestamp'] = self.Container[mode_mets][tag]['relative_timestamp']
                data = []
                for lst in self.Container[mode_mets][tag]['relative_timestamp']:
                    data += lst
                self.Profile[mode_mets][tag]['relative_timestamp'] = Profile.mean_downsample(sorted(data))
                self.Container[mode_mets][tag]['relative_timestamp'] = []
                
                # self.Profile[mode_mets][tag]['trigger_loex'] = self.Container[mode_mets][tag]['trigger_loex']
                for lst in self.Container[mode_mets][tag]['trigger_loex']:
                    self.Profile[mode_mets][tag]['trigger_loex'] += lst
                
                # self.Profile[mode_mets][tag]['interruption_time'] = self.Container[mode_mets][tag]['interruption_time']
                for lst in self.Container[mode_mets][tag]['interruption_time']:
                    self.Profile[mode_mets][tag]['interruption_time'] += lst
                
                # self.Profile[mode_mets][tag]['event_count'] = self.Container[mode_mets][tag]['event_count']
                self.Profile[mode_mets][tag]['event_count'] += sum(self.Container[mode_mets][tag]['event_count'])
                
                del data
    
    
    def modeling(self, sd_factor=3, w_size=0.01):
        for i, mode_mets in enumerate(self.metrics):
            mode, mets = mode_mets[:2], mode_mets[-4:]
            if mets == 'lost':
                RATE_TYPE = 'PLR'
            elif mets == 'excl':
                RATE_TYPE = 'ELR'
            
            # if i > 0:
            #     break
            
            scope = self.scope_agg[mode_mets]
            for tag in scope.keys():
                # print(tag)
                left_bound, right_bound = scope[tag]
                table = self.Profile[mode_mets][tag]['dist_table']
                loex_data = self.Profile[mode_mets][tag]['relative_loex_timestamp']
                xmit_data = self.Profile[mode_mets][tag]['relative_timestamp']
                trigger_lst = self.Profile[mode_mets][tag]['trigger_loex']
                
                self.hist_models[mode_mets][tag] = table.copy()
                
                if len(trigger_lst) == 0:
                    continue
                
                estimated_p = sum(trigger_lst) / len(trigger_lst)
                self.prob_models[mode_mets][tag] = estimated_p
                
                if len(loex_data) == 0:
                    continue
                
                if tag == 'Stable':
                    mean = 0
                    left_bound, right_bound = -0.5, 0.5
                else:
                    loex_table = table[table[mets] > 0].reset_index(drop=True)
                    mean, std_dev = np.mean(loex_data), np.std(loex_data)
                    left_bound = math.floor(max(left_bound, mean - sd_factor * std_dev, loex_table.iloc[0]['window_id']) * 10) / 10
                    right_bound = math.ceil(min(right_bound, mean + sd_factor * std_dev, loex_table.iloc[-1]['window_id']) * 10) / 10
                
                self.scope_models[mode_mets][tag] = (left_bound, right_bound)
                
                x = np.asarray(table['window_id'], dtype=np.float64)
                y = np.asarray(table[RATE_TYPE], dtype=np.float64)
                
                # 計算直方圖的面積
                hist_area = Profile.total_area_histogram_with_centers(x, y, w_size)
                # print("Total area of histogram:", hist_area)
                
                # kde1 = gaussian_kde(loex_data)
                # kde2 = gaussian_kde(xmit_data)
                # def kde(x):
                #     kde2_values = kde2(x)
                #     # 檢查 kde2 是否為零，如果是則返回一個小的非零值
                #     kde2_values[kde2_values == 0] = 1
                #     return kde1(x) / kde2_values
                kde = gaussian_kde(loex_data)
                
                # 計算 KDE 下的總面積（無窮積分）
                # kde_area = Profile.total_area_kde(kde)
                # 計算 KDE 下的總面積（只計算正負2.5個標準差內的點，理論上 scalar 會稍微高估，但不會太多）
                kde_area = Profile.total_area_kde(kde, left_bound, right_bound)
                # print("Total area under KDE:", kde_area)
                
                scalar = hist_area / kde_area
                # print("Scalar:", scalar)
                
                self.kde_models[mode_mets][tag] = (scalar, kde)
    
    
    def plot(self, sd_factor=3):
        for i, mode_mets in enumerate(self.metrics):
            mode, mets = mode_mets[:2], mode_mets[-4:]
            if mets == 'lost':
                RATE_TYPE = 'PLR'
            elif mets == 'excl':
                RATE_TYPE = 'ELR'
            
            # if i > 0:
            #     break
            
            scope = self.scope_agg[mode_mets]
            for tag in scope.keys():
                print('===================================================================================')
                print(tag)
                
                loex_data = self.Profile[mode_mets][tag]['relative_loex_timestamp']
                xmit_data = self.Profile[mode_mets][tag]['relative_timestamp']
                trigger_lst = self.Profile[mode_mets][tag]['trigger_loex']
                
                left_bound, right_bound = self.scope_models[mode_mets][tag]
                table = self.hist_models[mode_mets][tag]
                scalar, kde = self.kde_models[mode_mets][tag]
                
                if len(loex_data) == 0:
                    continue
                
                fig, ax = plt.subplots(figsize=(6, 4))
                
                x = np.asarray(table['window_id'], dtype=np.float64)
                y = np.asarray(table[RATE_TYPE], dtype=np.float64)
                ax.bar(x, y, label='hist', width=0.01, alpha=0.97)
                
                # ax = ax.twinx()
                
                x = np.linspace(min(xmit_data), max(xmit_data), 1000)
                density = scalar * kde(x)
                
                ax.fill_between(x, density, label='KDE', color='tab:orange', alpha=0.45, linewidth=0)
        
                # find the scope and boundaries
                # ax.axvline(x=mean, color='red', linestyle='--', label='Mean')
                ax.axvline(x=left_bound, color='blue', linestyle='--', label=f'-{sd_factor} Std')
                ax.axvline(x=right_bound, color='blue', linestyle='--', label=f'+{sd_factor} Std')
                
                bottom, top = ax.get_ylim()
                ax.text(left_bound, bottom-0.05*(top-bottom), '{:.1f}'.format(left_bound), ha='center', fontweight='bold', fontsize=10, color='blue')
                ax.text(right_bound, bottom-0.05*(top-bottom), '{:.1f}'.format(right_bound), ha='center', fontweight='bold', fontsize=10, color='blue')
                
                if mode == 'dl':
                    if mets == 'lost':
                        ax.set_title(f'Downlink PLR: {tag}')
                    elif mets == 'excl':
                        ax.set_title(f'Downlink ELR: {tag}')
                elif mode == 'ul':
                    if mets == 'lost':
                        ax.set_title(f'Uplink PLR: {tag}')
                    elif mets == 'excl':
                        ax.set_title(f'Uplink ELR: {tag}')
                
                if mets == 'lost':
                    ax.set_ylabel('Packet Loss Rate (%)')
                elif mets == 'excl':
                    ax.set_ylabel('Excessive Latency Rate (%)')
                ax.set_xlabel('Relative Timestamp (sec)')
                
                ax.legend()
                
                plt.gcf().autofmt_xdate()
                plt.show()
    
    
    def save_models(self):
        with open(f'{self.model_name}_kde_models.pkl', 'wb') as f:
            pickle.dump(self.kde_models, f)
        with open(f'{self.model_name}_hist_models.pkl', 'wb') as f:
            pickle.dump(self.hist_models, f, protocol=pickle.HIGHEST_PROTOCOL)
        with open(f'{self.model_name}_scope_models.pkl', 'wb') as f:
            pickle.dump(self.scope_models, f)
        with open(f'{self.model_name}_sr_prob_models.pkl', 'wb') as f:
            pickle.dump(self.prob_models, f)

# Profile Unit Test

In [20]:
# DataLoader: Single Radio Example
dates = data_loader(query_dates=True)

selected_dates = [s for s in dates if s >= '2023-09-12']
# excluded_dates = []
# selected_exps = []
# excluded_exps = ['Modem_Action_Test', 'Control_Group', 'Control_Group2', 'Control_Group3']
# selected_routes = ['BR']
# excluded_routes = []
filepaths = data_loader(mode='sr', selected_dates=selected_dates)

filepath = filepaths[0]
pprint(filepath)

ho_df = pd.read_csv(filepath[0], parse_dates=['start', 'end'])
df = pd.read_csv(filepath[1], parse_dates=['Timestamp'])[['seq', 'Timestamp', 'lost', 'excl', 'latency']]
df, ho_df, empty_data = data_aligner(df, ho_df)

display(df)

['/Users/jackbedford/Desktop/MOXA/Code/data/2023-09-12-2/UDP_Bandlock_9S_Phone_Brown/sm00/#01/data/handover_info_log.csv',
 '/Users/jackbedford/Desktop/MOXA/Code/data/2023-09-12-2/UDP_Bandlock_9S_Phone_Brown/sm00/#01/data/udp_dnlk_loss_latency.csv',
 '/Users/jackbedford/Desktop/MOXA/Code/data/2023-09-12-2/UDP_Bandlock_9S_Phone_Brown/sm00/#01/data/udp_uplk_loss_latency.csv',
 '/Users/jackbedford/Desktop/MOXA/Code/data/2023-09-12-2/UDP_Bandlock_9S_Phone_Brown/sm00/#01/data/diag_log_sm00_2023-09-12_13-34-15_rrc.csv',
 '/Users/jackbedford/Desktop/MOXA/Code/data/2023-09-12-2/UDP_Bandlock_9S_Phone_Brown/sm00/#01/data/diag_log_sm00_2023-09-12_13-34-15_ml1.csv',
 '/Users/jackbedford/Desktop/MOXA/Code/data/2023-09-12-2/UDP_Bandlock_9S_Phone_Brown/sm00/#01/data/diag_log_sm00_2023-09-12_13-34-15_nr_ml1.csv']


Unnamed: 0,seq,Timestamp,lost,excl,latency
0,2501,2023-09-12 13:34:16.245625,False,False,0.015184
1,2502,2023-09-12 13:34:16.247625,False,False,0.013184
2,2503,2023-09-12 13:34:16.249625,False,False,0.016496
3,2504,2023-09-12 13:34:16.251626,False,False,0.014495
4,2505,2023-09-12 13:34:16.253626,False,False,0.012495
...,...,...,...,...,...
1417486,1419987,2023-09-12 14:21:31.350104,False,False,-0.013385
1417487,1419988,2023-09-12 14:21:31.352104,False,False,-0.009389
1417488,1419989,2023-09-12 14:21:31.354104,False,False,-0.011389
1417489,1419990,2023-09-12 14:21:31.356104,False,False,-0.013389


In [13]:
filepaths = filepaths[0:1]
filepaths

[['/Users/jackbedford/Desktop/MOXA/Code/data/2023-09-12-2/UDP_Bandlock_9S_Phone_Brown/sm00/#01/data/handover_info_log.csv',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-09-12-2/UDP_Bandlock_9S_Phone_Brown/sm00/#01/data/udp_dnlk_loss_latency.csv',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-09-12-2/UDP_Bandlock_9S_Phone_Brown/sm00/#01/data/udp_uplk_loss_latency.csv',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-09-12-2/UDP_Bandlock_9S_Phone_Brown/sm00/#01/data/diag_log_sm00_2023-09-12_13-34-15_rrc.csv',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-09-12-2/UDP_Bandlock_9S_Phone_Brown/sm00/#01/data/diag_log_sm00_2023-09-12_13-34-15_ml1.csv',
  '/Users/jackbedford/Desktop/MOXA/Code/data/2023-09-12-2/UDP_Bandlock_9S_Phone_Brown/sm00/#01/data/diag_log_sm00_2023-09-12_13-34-15_nr_ml1.csv']]

In [21]:
obj = Profile(filepaths)

In [22]:
obj.Container

{'dl_lost': {'LTEH': {'dist_table': [],
   'relative_loex_timestamp': [],
   'relative_timestamp': [],
   'interruption_time': [],
   'trigger_loex': [],
   'event_count': []},
  'ENBH': {'dist_table': [],
   'relative_loex_timestamp': [],
   'relative_timestamp': [],
   'interruption_time': [],
   'trigger_loex': [],
   'event_count': []},
  'MCGH': {'dist_table': [],
   'relative_loex_timestamp': [],
   'relative_timestamp': [],
   'interruption_time': [],
   'trigger_loex': [],
   'event_count': []},
  'MNBH': {'dist_table': [],
   'relative_loex_timestamp': [],
   'relative_timestamp': [],
   'interruption_time': [],
   'trigger_loex': [],
   'event_count': []},
  'SCGM': {'dist_table': [],
   'relative_loex_timestamp': [],
   'relative_timestamp': [],
   'interruption_time': [],
   'trigger_loex': [],
   'event_count': []},
  'SCGA': {'dist_table': [],
   'relative_loex_timestamp': [],
   'relative_timestamp': [],
   'interruption_time': [],
   'trigger_loex': [],
   'event_count'