# Import & Define Functions

In [28]:
import pandas as pd
import numpy as np
import datetime as dt
import swifter
from collections import namedtuple
import matplotlib.pyplot as plt
from pprint import pprint
import portion as P

pd.set_option('display.max_columns', 200)
# pd.set_option('display.max_rows', 200)

class myQueue:
    def __init__(self, maxsize=0):
        self.data = []
        self.maxsize = maxsize if maxsize > 0 else float('inf')
    def tolist(self):
        return self.data
    def size(self):
        return self.maxsize
    def len(self):
        return len(self.data)
    def empty(self):
        return self.len() == 0
    def full(self):
        return self.len() == self.maxsize
    def pop(self, index=0):
        """
        if index > 0, recursively pop() until pop out the specific element.
        return the final popped-out element.
        """
        for _ in range(index, 0, -1):
            self.pop()
        return self.data.pop(0) if not self.empty() else None
    def push(self, element):
        """
        return 0 if success; 1 if the front is popped.
        """
        flag = 0
        if self.full():
            self.pop()
            flag = 1
        self.data.append(element)
        return flag
    def front(self):
        return self.data[0] if not self.empty() else None
    def rear(self):
        return self.data[-1] if not self.empty() else None
    def get(self, index):
        if isinstance(index, list):
            tmp = []
            for i in index:
                tmp = [*tmp, self.get(i)]
            return tmp
        return self.data[index] if index < self.len() and abs(index) <= self.len() else None
    def find(self, element):
        if isinstance(element, list):
            for ele in element:
                index = self.find(ele)
                if index != None:
                    return index
            return None
        return self.data.index(element) if element in self.data else None

## handover parsing

In [29]:
def mi_parse_ho(df, tz=0, debug=False):
    df['Timestamp'] = pd.to_datetime(df['Timestamp']) + pd.Timedelta(hours=tz)
    
    ### Define Basic Element
    HO = namedtuple('HO', 'start, end, cause, others', defaults=tuple([None]*4))
    stNR = namedtuple('stNR', 'snrPCI, tnrPCI', defaults=tuple([None]*2))
    stLTE = namedtuple('stLTE', 'sPCI, sFreq, tPCI, tFreq', defaults=tuple([None]*4))
    NR_CEL = namedtuple('NR_CEL', 'nrPCI, nrFreq', defaults=tuple([None]*2))
    LTE_CEL = namedtuple('LTE_CEL', 'ePCI, ECI, eNB, BID, DL_Freq, DL_BW, UL_Freq, UL_BW', defaults=tuple([None]*8))
    C = namedtuple('C', HO._fields + stLTE._fields + stNR._fields + \
        LTE_CEL._fields + tuple([f'{s}1' for s in LTE_CEL._fields]) + NR_CEL._fields + tuple([f'{s}1' for s in NR_CEL._fields]), 
        defaults=tuple([None]*30))
    
    def dprint(*args, **kwargs):
        if debug:
            print(*args, **kwargs)
    
    def NR_OTA(pos=None):
        row = df.iloc[pos] if pos else df.iloc[i]
        if row.type_id == '5G_NR_RRC_OTA_Packet':
            return True
        else:
            return False
    
    def CEL_INFO(pos=None):
        row = df.iloc[pos] if pos else df.iloc[i]
        if row.type_id == 'LTE_RRC_Serv_Cell_Info':
            return True
        else:
            return False
    
    def nr_track(pos=None):
        row = df.iloc[pos] if pos else df.iloc[i]
        if int(row.PCI) in [0, 65535]:  # 65535 is for samgsung; 0 is for xiaomi.
            return NR_CEL()
        else:
            return NR_CEL(int(row.PCI), int(row.Freq))
    
    def eci_track(pos=None):
        row = df.iloc[pos] if pos else df.iloc[i]
        PCI = int(row['PCI'])
        ECI = int(row['Cell Identity'])
        eNB = ECI // 256
        BID = int(row['Band ID'])
        DL_Freq = int(row['DL frequency'])
        DL_BW = row['DL bandwidth']
        UL_Freq = int(row['UL frequency'])
        UL_BW = row['UL bandwidth']
        return LTE_CEL(PCI, ECI, eNB, BID, DL_Freq, DL_BW, UL_Freq, UL_BW)
    
    def peek_nr(pos=None, look_after=0.5, look_before=0.0):
        ## look_after == 0.5 is a magic number
        ### TODO 先偷看 ho start - end 之間的 cell information
        if pos:  # position of end of an event
            for j in range(i, pos):
                if NR_OTA(j):
                    qpscell.push(nr_track(j))
        ### END TODO
        # dprint(f'pscell={pscell}')
        # dprint(qpscell.tolist())
        index = None
        for j in range(qpscell.len()):
            if pscell != qpscell.get(j):
                index = j
                break
        # dprint(f'index={index}')
        if index != None:
            return qpscell.pop(index)
        ### haven't find pci change yet!
        t = df['Timestamp'].iloc[i]
        for j in range(i, len(df)):  # 往前走，最多走到底
            t1 = df["Timestamp"].iloc[j]
            if (t1 - t).total_seconds() > look_after:
                break
            if df['type_id'].iloc[j] != '5G_NR_RRC_OTA_Packet':
                continue
            row = df.iloc[j]
            if int(row.PCI) in [0, 65535]:  # 65535 is for samgsung; 0 is for xiaomi.
                return NR_CEL()
            else:
                return NR_CEL(int(row.PCI), int(row.Freq))
        return pscell
    
    def peek_eci(pos=None, look_after=0.5, look_before=0.0):
        ## look_after == 0.5 is a magic number
        ### TODO 先偷看 ho start - end 之間的 cell information
        if pos:  # position of end of an event
            for j in range(i, pos):
                if CEL_INFO(j):
                    qpcell.push(eci_track(j))
        ### END TODO
        # dprint(f'pcell={pcell}')
        # dprint(qpcell.tolist())
        index = None
        for j in range(qpcell.len()):
            if pcell != qpcell.get(j):
                index = j
                break
        # dprint(f'index={index}')
        if index != None:
            return qpcell.pop(index)
        ### haven't find pci change yet!
        t = df['Timestamp'].iloc[i]
        for j in range(i, len(df)):  # 往前走，最多走到底
            t1 = df['Timestamp'].iloc[j]
            if (t1 - t).total_seconds() > look_after:
                break
            if df['type_id'].iloc[j] != 'LTE_RRC_Serv_Cell_Info':
                continue
            row = df.iloc[j]
            PCI = int(row['PCI'])
            ECI = int(row['Cell Identity'])
            eNB = ECI // 256
            BID = int(row['Band ID'])
            DL_Freq = int(row['DL frequency'])
            DL_BW = row['DL bandwidth']
            UL_Freq = int(row['UL frequency'])
            UL_BW = row['UL bandwidth']
            return LTE_CEL(PCI, ECI, eNB, BID, DL_Freq, DL_BW, UL_Freq, UL_BW)
        return pcell

    def find_1st_after(target, look_after=1.0):
        for j in range(i, len(df)):  # 往前走，最多走到底
            t1 = df["Timestamp"].iloc[j]
            if (t1 - t).total_seconds() > look_after:
                return None, None
            if df[target].iloc[j] in [1,'1']:
                return t1, j  # timestamp & position
        return None, None

    def find_1st_before(target, look_before=1.0):
        for j in range(i, -1, -1):  # 倒退嚕，最多走回頭
            t1 = df["Timestamp"].iloc[j]
            if (t - t1).total_seconds() > look_before:
                return None, None
            if df[target].iloc[j] in [1,'1']:
                return t1, j  # timestamp & position
        return None, None

    D = {
        ### Conn Setup/Rel & HO
        'Conn_Rel':[],    # Conn Release: rrcConnectionRelease
        'Conn_Setup':[],  # Conn Setup: rrcConnectionRequest + rrcConnectionSetup
        'LTE_HO': [],     # E_PCel -> E_PCel’: lte-rrc.t304 & LTE_PCel does change
        'SN_Rel': [],     # EUTRA + NR -> EUTRA:(CHT) lte-rrc.t304 & LTE_PCel does not change
                          #                     (TWM) nr-Config-r15: release (0) 
        'SN_Setup': [],   # EUTRA -> EUTRA + NR:(CHT) lte-rrc.t304 + nr-rrc.t304 + dualConnectivityPHR: setup (1) & LTE_PCel does not change
                          #                     (TWM) nr-rrc.t304 + dualConnectivityPHR: setup (1)
        'MN_HO': [],      # E_PCel + N_PSCel -> E_PCel’ + N_PSCel: lte-rrc.t304 + nr-rrc.t304 + dualConnectivityPHR: setup (1) & LTE_PCel does change
        'SN_HO': [],      # E_PCel + N_PSCel -> E_PCel + N_PSCel’: nr-rrc.t304
        'MNSN_HO': [],         # (TWM)
        'SN_Rel_MN_HO': [],    # (TWM)
        'SN_Setup_MN_HO': [],  # (TWM)
        ### Link Failure
        'SCG_Failure': [],   # scgFailureInformationNR-r15
        'MCG_Failure': [],   # rrcConnectionReestablishmentRequest + rrcConnectionReestablishmentComplete
        'NAS_Recovery': [],  # rrcConnectionReestablishmentRequest + rrcConnectionReestablishmentReject + rrcConnectionRequest + rrcConnectionSetup
        # MCG_Failure, NAS_Recovery may be caused by 'reconfigurationFailure (0)', 'handoverFailure (1)', 'otherFailure (2)'
        }
    
    A = { 'Conn_Rel':[], 'Conn_Setup':[],
        'LTE_HO': [], 'SN_Rel': [], 'SN_Setup': [], 'MN_HO': [], 'SN_HO': [],
        'MNSN_HO': [], 'SN_Rel_MN_HO': [], 'SN_Setup_MN_HO': [],
        'SCG_Failure': [], 'MCG_Failure': [], 'NAS_Recovery': [] }
    
    qpscell = myQueue(3)
    qpcell = myQueue(3)
    
    init = 1
    pcell, pscell = LTE_CEL(), NR_CEL()
    prev_pci, prev_freq = None, None
    
    for i, row in df.iterrows():
        if NR_OTA():
            qpscell.push(nr_track())
            continue
        elif CEL_INFO():
            qpcell.push(eci_track())
            continue
        if init:
            t_init, pci_init, freq_init = row.Timestamp, int(row.PCI), int(row.Freq)
            pcell = LTE_CEL(ePCI=pci_init, DL_Freq=freq_init)
            dprint(f"{t_init} | Initial PCI={pci_init} EARFCN={freq_init}")
            dprint()
            init = 0
        
        t, pci, freq = row.Timestamp, int(row.PCI), int(row.Freq)
        
        if (prev_pci, prev_freq) != (pci, freq):
            for j in range(i, len(df)):  # 往前走，最多走到底
                if CEL_INFO(j):
                    next_pcell = eci_track(j)
                    if next_pcell[0] == pci:
                        qpcell.push(next_pcell)
                        break
                elif not NR_OTA(j):
                    if df['PCI'].iloc[j] != pci:
                        break
        
        if not qpscell.empty():
            pscell = qpscell.pop()
        if not qpcell.empty():
            pcell = qpcell.pop()
        
        ### Conn_Rel
        if df["rrcConnectionRelease"].iloc[i] == 1:
            D['Conn_Rel'].append(HO(start=t))
            A['Conn_Rel'].append(C(*HO(start=t), *stLTE(sPCI=pci, sFreq=freq), *stNR(snrPCI=pscell[0]), *pcell, *LTE_CEL(), *pscell, *NR_CEL()))
            dprint(f"{t}, {pd.NaT} | Conn_Rel at PCI={pci} EARFCN={freq}.")
            dprint(f'{tuple(pcell)} -> {tuple(LTE_CEL())}')
            dprint(f'{tuple(pscell)} ->{tuple(NR_CEL())}')
            pcell, pscell = LTE_CEL(), NR_CEL()
            dprint()

        ### Conn_Setup
        if df["rrcConnectionRequest"].iloc[i] == 1:
            a, j1 = find_1st_after('rrcConnectionReconfigurationComplete',look_after=2)
            b, j2 = find_1st_after('securityModeComplete',look_after=2)
            end = a if a > b else b
            j = j1 if a > b else j2
            _pcell = peek_eci(pos=j)
            D['Conn_Setup'].append(HO(start=t, end=end))
            A['Conn_Setup'].append(C(*HO(start=t, end=end), *stLTE(tPCI=pci, tFreq=freq), *stNR(), *pcell, *_pcell, *pscell, *pscell))
            dprint(f"{t}, {end} | Conn_Setup to PCI={pci} EARFCN={freq}.")
            dprint(f'{tuple(pcell)} -> {tuple(_pcell)}')
            dprint(f'{tuple(pscell)} -> {tuple(pscell)}')
            dprint()
        
        ### SN_Setup, SN_Rel, MO_HO, LTE_HO
        if df["lte-rrc.t304"].iloc[i] == 1:
            end, j = find_1st_after('rrcConnectionReconfigurationComplete')
            serv_cell, target_cell = pci, int(df['lte_targetPhysCellId'].iloc[i])
            serv_freq, target_freq = freq, int(df['dl-CarrierFreq'].iloc[i])
            nr_target_cell = int(df["nr_physCellId"].iloc[i])
            
            if df["SCellToAddMod-r10"].iloc[i] == 1:
                n =len(str(df["SCellIndex-r10.1"].iloc[i]).split('@'))
                others=f'Set up {n} SCell.'
            else:
                others=None
            
            if serv_freq != target_freq:
                others = f'{others} Inter-Freq HO.' if others else 'Inter-Freq HO.'
            
            ### SN_Setup, MN_HO
            if df["nr-rrc.t304"].iloc[i] == 1 and df["dualConnectivityPHR: setup (1)"].iloc[i] == 1:
                ### SN_Setup
                if serv_cell == target_cell and serv_freq == target_freq:
                    _pscell = peek_nr(pos=j)
                    D['SN_Setup'].append(HO(start=t, end=end, others=others))
                    A['SN_Setup'].append(C(*HO(start=t, end=end, others=others), *stLTE(sPCI=serv_cell, sFreq=serv_freq), *stNR(tnrPCI=nr_target_cell), *pcell, *pcell, *pscell, *_pscell))
                    dprint(f"{t}, {end} | SN_Setup to nrPCI={nr_target_cell} | {others}")
                    dprint(f'{tuple(pcell)} -> {tuple(pcell)}')
                    dprint(f'{tuple(pscell)} -> {tuple(_pscell)}')
                    dprint()
                else:
                ### MN_HO
                    _pcell = peek_eci(pos=j)
                    D['MN_HO'].append(HO(start=t, end=end, others=others))
                    A['MN_HO'].append(C(*HO(start=t, end=end, others=others), *stLTE(sPCI=serv_cell, sFreq=serv_freq, tPCI=target_cell, tFreq=target_freq), *stNR(snrPCI=pscell[0]), *pcell, *_pcell, *pscell, *pscell))
                    dprint(f"{t}, {end} | MN_HO ({serv_cell}, {serv_freq}) -> ({target_cell}, {target_freq}) | {others}")
                    dprint(f'{tuple(pcell)} -> {tuple(_pcell)}')
                    dprint(f'{tuple(pscell)} -> {tuple(pscell)}')
                    dprint()
            else:
            ### SN_Rel, LTE_HO
                ### SN_Rel
                if serv_cell == target_cell and serv_freq == target_freq:
                    a, b = find_1st_before("scgFailureInformationNR-r15")
                    if a is not None:
                        others = f'{others} Caused by scg-failure.' if others else 'Caused by scg-failure.'
                    D['SN_Rel'].append(HO(start=t, end=end, others=others))
                    A['SN_Rel'].append(C(*HO(start=t, end=end, others=others), *stLTE(sPCI=serv_cell, sFreq=serv_freq), *stNR(snrPCI=pscell[0]), *pcell, *pcell, *pscell, *NR_CEL()))
                    dprint(f"{t}, {end} | SN_Rel at nrPCI={pscell[0]} | {others}")
                    dprint(f'{tuple(pcell)} -> {tuple(pcell)}')
                    dprint(f'{tuple(pscell)} -> {tuple(NR_CEL())}')
                    pscell = NR_CEL()
                    dprint()
                else:
                ### LTE_HO
                    _pcell = peek_eci(pos=j)
                    D['LTE_HO'].append(HO(start=t, end=end, others=others))
                    A['LTE_HO'].append(C(*HO(start=t, end=end, others=others), *stLTE(sPCI=serv_cell, sFreq=serv_freq, tPCI=target_cell, tFreq=target_freq), *stNR(), *pcell, *_pcell, *pscell, *pscell))
                    dprint(f"{t}, {end} | LTE_HO ({serv_cell}, {serv_freq}) -> ({target_cell}, {target_freq}) | {others}")
                    dprint(f'{tuple(pcell)} -> {tuple(_pcell)}')
                    dprint(f'{tuple(pscell)} -> {tuple(pscell)}')
                    dprint()

        ### SN_HO
        if df["nr-rrc.t304"].iloc[i] == 1 and not df["dualConnectivityPHR: setup (1)"].iloc[i] == 1:
            end, j = find_1st_after('rrcConnectionReconfigurationComplete')
            nr_target_cell = int(df["nr_physCellId"].iloc[i])
            _pscell = peek_nr(pos=j)
            D['SN_HO'].append(HO(start=t, end=end))
            A['SN_HO'].append(C(*HO(start=t, end=end), *stLTE(sPCI=pci, sFreq=freq), *stNR(snrPCI=pscell[0], tnrPCI=nr_target_cell), *pcell, *pcell, *pscell, *_pscell))
            dprint(f"{t}, {end} | SN_HO to nrPCI={nr_target_cell}")
            dprint(f'{tuple(pcell)} -> {tuple(pcell)}')
            dprint(f'{tuple(pscell)} -> {tuple(_pscell)}')
            dprint()

        ### SCG_Failure
        if df["scgFailureInformationNR-r15"].iloc[i] == 1:
            # others = df["failureType-r15"].iloc[i]
            cause = df["failureType-r15"].iloc[i]
            _pscell = peek_nr()
            D['SCG_Failure'].append(HO(start=t, cause=cause))  # end time??
            A['SCG_Failure'].append(C(*HO(start=t, cause=cause), *stLTE(sPCI=pci, sFreq=freq), *stNR(snrPCI=pscell[0]), *pcell, *pcell, *pscell, *_pscell))
            dprint(f"{t}, {pd.NaT} | SCG_Failure at nrPCI={pscell[0]} | {cause}")
            dprint(f'{tuple(pcell)} -> {tuple(pcell)}')
            dprint(f'{tuple(pscell)} -> {tuple(_pscell)}')
            ### SCG Fail 之後必定會 SN Rel
            dprint()
        
        ### MCG_Failure (type II), NAS_Recovery (type III)
        if df["rrcConnectionReestablishmentRequest"].iloc[i] == 1:
            end1, j1 = find_1st_after('rrcConnectionReestablishmentComplete', look_after=1)
            end2, j2 = find_1st_after('rrcConnectionReestablishmentReject', look_after=1)
            end3, j3 = find_1st_after('rrcConnectionRequest', look_after=1)
            # others = df["reestablishmentCause"].iloc[i]
            cause = df["reestablishmentCause"].iloc[i]
            # target_cell = int(df['physCellId.3'].iloc[i])
            serv_cell, target_cell = pci, int(df['physCellId.3'].iloc[i])
            serv_freq, target_freq = freq, None
            
            ### MCG_Failure (type II)
            if (end1 and not end2) or (end1 and end2 and end1 < end2):
                # dprint(end1, end2)
                end, j = end1, j1
                _pcell = peek_eci()
                D['MCG_Failure'].append(HO(start=t, end=end, cause=cause))
                A['MCG_Failure'].append(C(*HO(start=t, end=end, cause=cause), *stLTE(sPCI=serv_cell, sFreq=serv_freq, tPCI=target_cell, tFreq=target_freq), *stNR(snrPCI=pscell[0]), *pcell, *_pcell, *pscell, *NR_CEL()))
                dprint(f"{t}, {end} | MCG_Failure PCI={serv_cell} -> PCI={target_cell}, recconected to {pci} | {cause}")
                dprint(f'{tuple(pcell)} -> {tuple(_pcell)}')
                dprint(f'{tuple(pscell)} -> {tuple(NR_CEL())}')
                pscell = NR_CEL()
                dprint()
                ### MCG Fail 之後有機會不經過 RRC Connection Setup 就 Reconnect
            else: 
            ### NAS_Recovery (type III)
                # dprint(end1, end2)
                end, j = end3, j3
                _pcell = peek_eci()
                D['NAS_Recovery'].append(HO(start=t, end=end-pd.Timedelta(microseconds=1) if end else None, cause=cause))  # end time??
                A['NAS_Recovery'].append(C(*HO(start=t, end=end-pd.Timedelta(microseconds=1) if end else None, cause=cause), *stLTE(sPCI=serv_cell, sFreq=serv_freq, tPCI=target_cell, tFreq=target_freq), *stNR(snrPCI=pscell[0]), *pcell, *_pcell, *pscell, *NR_CEL()))
                dprint(f"{t}, {end} | NAS_Recovery PCI={serv_cell} -> PCI={target_cell} | {cause}")
                dprint(f'{tuple(pcell)} -> {tuple(_pcell)}')
                dprint(f'{tuple(pscell)} -> {tuple(NR_CEL())}')
                pscell = NR_CEL()
                dprint()
        
        ### Update previous pci, freq
        prev_pci, prev_freq = pci, freq
    
    ### Build DataFrame
    df_HO = pd.DataFrame()
    for key in A.keys():
        df_HO = pd.concat([df_HO, \
            pd.DataFrame(A[key], index=[key]*len(A[key]))])
    df_HO = df_HO.sort_values(by=['start']).reset_index()
    df_HO = df_HO.rename(columns={'index': 'ho_type'})
    df_HO = df_HO.reindex(
        ['start','end','ho_type','interrupt','sPCI','sFreq','tPCI','tFreq','snrPCI','tnrPCI','cause','others'] + \
            df_HO.columns.tolist()[df_HO.columns.get_loc('ePCI'):df_HO.columns.get_loc('nrFreq1')+1], axis=1)
    df_HO['start'] = pd.to_datetime(df_HO['start'])
    df_HO['end'] = pd.to_datetime(df_HO['end'])
    df_HO['Timestamp'] = df_HO['start']
    df_HO['Type_ID'] = 'RRC_OTA_Handover_Parsing'
    df_HO['interrupt'] = (df_HO['end'] - df_HO['start']).dt.total_seconds()
    ### Set dtypes
    df_HO['ho_type'] = df_HO['ho_type'].astype('category')
    df_HO['cause'] = df_HO['cause'].astype('category')
    df_HO['others'] = df_HO['others'].astype('string')
    df_HO['DL_BW'] = df_HO['DL_BW'].astype('category')
    df_HO['DL_BW1'] = df_HO['DL_BW1'].astype('category')
    df_HO['UL_BW'] = df_HO['UL_BW'].astype('category')
    df_HO['UL_BW1'] = df_HO['UL_BW1'].astype('category')
    for tag in df_HO.columns[df_HO.columns.get_loc('sPCI'):df_HO.columns.get_loc('nrFreq1')+1]:
        if tag not in ['cause','others','DL_BW','DL_BW1','UL_BW','UL_BW1']:
            df_HO[tag] = df_HO[tag].astype('Int32')
    df_HO['interrupt'] = df_HO['interrupt'].astype('float32')
    df_HO['Timestamp'] = pd.to_datetime(df_HO['Timestamp'])
    df_HO['Type_ID'] = df_HO['Type_ID'].astype('category')
    return A, D, df_HO

## set data (cell)

In [30]:
def set_data(df):
    df = df.rename(columns={
        'sequence.number':'sequence_num',
        'transmit.time':'transmit_time',
        'transmit.time_epoch':'transmit_time_epoch',
        'arrival.time':'arrival_time',
        'arrival.time_epoch':'arrival_time_epoch',
    })
    df['sequence_num'] = df['sequence_num'].astype('Int32')
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    df['transmit_time'] = pd.to_datetime(df['transmit_time'])
    df['arrival_time'] = pd.to_datetime(df['arrival_time'])
    df['lost'] = df['lost'].astype('boolean')
    df['excl'] = df['excl'].astype('boolean')
    # df['Timestamp_epoch'] = df['Timestamp_epoch'].astype('float32')
    # df['transmit_time_epoch'] = df['transmit_time_epoch'].astype('float32')
    # df['arrival_time_epoch'] = df['arrival_time_epoch'].astype('float32')
    df['latency'] = df['latency'].astype('float32')
    df = df.drop(['Timestamp_epoch','transmit_time_epoch','arrival_time_epoch'], axis=1)
    return df

## handover classification labeling

In [31]:
def cut_head_tail(df_HO, df, mode='ul'):
    if mode == 'ul':
        start = df.iloc[0].transmit_time
        stop = df.iloc[-1].transmit_time
        df_HO = df_HO.query('Timestamp >= @start & Timestamp <= @stop').copy().reset_index(drop=True)
    if mode == 'dl':
        start = df.iloc[0].arrival_time
        stop = df.iloc[-1].arrival_time
        df_HO = df_HO.query('Timestamp >= @start & Timestamp <= @stop').copy().reset_index(drop=True)
    return df_HO

def is_disjoint(set1, set2):
    """
    Check if two sets are disjoint.
    """
    return (set1 & set2).empty

def is_disjoint_dict(E):
    test_intv = P.empty()
    for key, val in E.items():
        # print(key)
        for intv in val:
            if is_disjoint(test_intv, intv.interval):
                test_intv = test_intv | intv.interval
            else:
                print(key, intv.index)
                return False
    return True

def interp(x, y, ratio):
    """
    Interpolation

    Args:
        x, y (datetime.datetime): x < y
        ratio (float): a decimal numeral in a range [0, 1]; 0 means break at x, 1 means break at y.
    Returns:
        (datetime.datetime): breakpoint of interpolation
    """
    return x + (y - x) * ratio

def get_ho_interval(df, sec=(1, 3), ratio=0.5,
                 ignored=['Conn_Setup','Conn_Rel'],
                 handover=['LTE_HO','SN_Rel','SN_Setup','MN_HO','SN_HO','MNSN_HO','SN_Rel_MN_HO','SN_Setup_MN_HO'],
                 linkfailure=['SCG_Failure','MCG_Failure','NAS_Recovery']):
    
    HO_INTV = namedtuple('HO_INTV', 'index, interval, state1, state2, cause, interrupt, \
                                     ePCI, earfcn, nrPCI, ePCI1, earfcn1, nrPCI1', defaults=tuple([None]*12))
    
    def ignore_col(row):
        if row.ho_type in ignored:
            return False
        else:
            return True
    df = df[df.apply(ignore_col, axis=1)].reset_index(drop=True)
    
    column_names = []
    for type_name in handover + linkfailure:
        column_names += ["before_{}".format(type_name), "during_{}".format(type_name), "after_{}".format(type_name)]
    E = { col:[] for col in column_names }
    
    for i, row in df.iterrows():
        prior_row = df.iloc[i - 1] if i != 0 else None
        post_row = df.iloc[i + 1] if i != len(df)-1 else None
        ### peri_interval
        if pd.isna(row.end):
            peri_interval = P.singleton(row.start)
        else:
            peri_interval = P.closed(row.start, row.end)
        ### prior_interval
        C = row.start - pd.Timedelta(seconds=sec[0]) if row.ho_type in handover else row.start - pd.Timedelta(seconds=sec[1])
        D = row.start
        prior_interval = P.closedopen(C, D)
        if ratio != None and i != 0:
            A = max(prior_row.start, prior_row.end)
            B = max(prior_row.start, prior_row.end) + pd.Timedelta(seconds=sec[0]) if prior_row.ho_type in handover else max(prior_row.start, prior_row.end) + pd.Timedelta(seconds=sec[1])
            if P.openclosed(A, B).overlaps(prior_interval):
                # print("Overlaps with the previous!")
                bkp = interp(C, B, ratio)
                bkp = max(bkp, A)  # to avoid the breakpoint overlaps the previous event's duration
                # bkp = min(max(bkp, A), D)  # 我不侵犯到其他任何人，代表其他人也不會侵犯到我！
                prior_interval = P.closedopen(bkp, D)
                if A in prior_interval:
                    prior_interval = P.open(bkp, D)
                # blindly set as open inverval is fine, but may miss one point.
        ### post_interval
        C = row.end
        D = row.end + pd.Timedelta(seconds=sec[0]) if row.ho_type in handover else row.end + pd.Timedelta(seconds=sec[1])
        post_interval = P.openclosed(C, D)
        if ratio != None and i != len(df)-1:
            A = min(post_row.start, post_row.end) - pd.Timedelta(seconds=sec[0]) if post_row.ho_type in handover else min(post_row.start, post_row.end) - pd.Timedelta(seconds=sec[1])
            B = min(post_row.start, post_row.end)
            if P.closedopen(A, B).overlaps(post_interval):
                # print("Overlaps with the following!")
                bkp = interp(A, D, ratio)
                bkp = min(bkp, B)  # to avoid the breakpoint overlaps the following event's duration
                # bkp = max(min(bkp, B), C)  # 我不侵犯到其他任何人，代表其他人也不會侵犯到我！
                post_interval = P.open(C, bkp)
        ### append dictionary
        type_name = row.ho_type
        state1, state2= pd.NA, pd.NA
        if type_name in ['LTE_HO','MN_HO','MNSN_HO','SN_Rel_MN_HO','SN_Setup_MN_HO']:
            state1 = 'inter_freq' if row.sFreq != row.tFreq else 'intra_freq'
            if row.eNB != row.eNB1:
                state2 = 'inter_enb'
            elif row.sPCI != row.tPCI:
                state2 = 'inter_sector'
            else:
                state2 = 'intra_sector'
        E[f'before_{type_name}'].append(HO_INTV(i, prior_interval, state1, state2, row.cause, row.interrupt, row.sPCI, row.sFreq, row.snrPCI, row.tPCI, row.tFreq, row.tnrPCI))
        E[f'during_{type_name}'].append(HO_INTV(i, peri_interval, state1, state2, row.cause, row.interrupt, row.sPCI, row.sFreq, row.snrPCI, row.tPCI, row.tFreq, row.tnrPCI))
        E[f'after_{type_name}'].append(HO_INTV(i, post_interval, state1, state2, row.cause, row.interrupt, row.sPCI, row.sFreq, row.snrPCI, row.tPCI, row.tFreq, row.tnrPCI))
        ### check whether the intervals are pairwise disjoint
        if not is_disjoint_dict(E):
            print('Warning: Intervals are not totally disjoint!')
    return E

def label_ho_info(df, E, mode='ul'):
    def removeprefix(string, prefix=['before','during','after']):
        for pref in prefix:
            if string.startswith(pref):
                return pref, string[len(pref)+1:]
        return None, string
    
    df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
               'ho_ePCI','ho_earfcn','ho_nrPCI','ho_ePCI1','ho_earfcn1','ho_nrPCI1')] = \
               [-1, 'stable', 'stable', 'stable', 'stable', 'no failure', None,
               None, None, None, None, None, None]
            
    for key, val in E.items():
        pref, key = removeprefix(key)
        for intv in val:
            if intv.interval.empty:
                continue
            # print(pref, key)
            # print(intv.interval)
            if mode == 'ul':
                df.loc[(df['transmit_time'] >= intv.interval.lower) & (df['transmit_time'] <= intv.interval.upper),
                       ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
                        'ho_ePCI','ho_earfcn','ho_nrPCI','ho_ePCI1','ho_earfcn1','ho_nrPCI1')] = \
                        [intv.index, pref, key, intv.state1, intv.state2, intv.cause, intv.interrupt,
                        intv.ePCI, intv.earfcn, intv.nrPCI, intv.ePCI1, intv.earfcn1, intv.nrPCI1]
            if mode == 'dl':
                df.loc[(df['arrival_time'] >= intv.interval.lower) & (df['arrival_time'] <= intv.interval.upper),
                       ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
                        'ho_ePCI','ho_earfcn','ho_nrPCI','ho_ePCI1','ho_earfcn1','ho_nrPCI1')] = \
                        [intv.index, pref, key, intv.state1, intv.state2, intv.cause, intv.interrupt,
                        intv.ePCI, intv.earfcn, intv.nrPCI, intv.ePCI1, intv.earfcn1, intv.nrPCI1]
    df['ho_index'] = df['ho_index'].astype('Int16')
    df['ho_state'] = df['ho_state'].astype('category')
    df['ho_type'] = df['ho_type'].astype('category')
    df['ho_type1'] = df['ho_type1'].astype('category')
    df['ho_type2'] = df['ho_type2'].astype('category')
    df['ho_fcause'] = df['ho_fcause'].astype('category')
    df['ho_interrupt'] = df['ho_interrupt'].astype('float32')
    for tag in df.columns[df.columns.get_loc('ho_ePCI'):df.columns.get_loc('ho_nrPCI1')+1]:
        df[tag] = df[tag].astype('Int32')
    return df

# Testing

In [32]:
df_ho = pd.read_csv("/home/wmnlab/D/database/2023-02-04#1/_Bandlock_Udp_B3_B7_B8_RM500Q/qc02/#01/data/diag_log_qc02_2023-02-04_16-13-28_rrc.csv")
df_ul = pd.read_csv("/home/wmnlab/D/database/2023-02-04#1/_Bandlock_Udp_B3_B7_B8_RM500Q/qc02/#01/data/udp_uplk_loss_latency.csv")
df_dl = pd.read_csv("/home/wmnlab/D/database/2023-02-04#1/_Bandlock_Udp_B3_B7_B8_RM500Q/qc02/#01/data/udp_dnlk_loss_latency.csv")

_, _, df_ho = mi_parse_ho(df_ho.copy(), +8)
df_ul = set_data(df_ul.copy())
df_dl = set_data(df_dl.copy())

In [33]:
row = df_ho.iloc[2]
print(row.start)
print(row.end)
print(row.start > pd.to_datetime('2023-02-04 08:14:55.825220'))
print(row.start < pd.to_datetime('2023-02-04 08:14:55.825220'))
print(row.end > pd.to_datetime('2023-02-04 08:14:55.825220'))
print(row.end < pd.to_datetime('2023-02-04 08:14:55.825220'))

2023-02-04 16:15:09.036863
NaT
True
False
False
False


In [34]:
print(pd.to_datetime('2023'))
print(pd.to_datetime('2023-02'))
print(pd.to_datetime('2023-02-04'))
print(pd.to_datetime('2023-02-04 08'))
print(pd.to_datetime('2023-02-04 08:14'))
print(pd.to_datetime('2023-02-04 08:14:55'))
print(pd.to_datetime('2023-02-04 08:14:55.825220'))
print(pd.to_datetime('08:14'))
print(pd.to_datetime('08:14:55'))
print(pd.to_datetime('08:14:55.825220'))

2023-01-01 00:00:00
2023-02-01 00:00:00
2023-02-04 00:00:00
2023-02-04 08:00:00
2023-02-04 08:14:00
2023-02-04 08:14:55
2023-02-04 08:14:55.825220
2023-02-23 08:14:00
2023-02-23 08:14:55
2023-02-23 08:14:55.825220


In [35]:
print(df_ul.iloc[0].transmit_time)
print(df_ul.iloc[-1].transmit_time)
print(df_dl.iloc[0].arrival_time)
print(df_dl.iloc[-1].arrival_time)

2023-02-04 16:16:36.807362
2023-02-04 16:25:25.909159
2023-02-04 16:16:36.852106
2023-02-04 16:25:25.964639


## Classify

In [36]:
df_ho = pd.read_csv("/home/wmnlab/D/database/2023-02-04#1/_Bandlock_Udp_B3_B7_B8_RM500Q/qc02/#01/data/diag_log_qc02_2023-02-04_16-13-28_rrc.csv")
df_ul = pd.read_csv("/home/wmnlab/D/database/2023-02-04#1/_Bandlock_Udp_B3_B7_B8_RM500Q/qc02/#01/data/udp_uplk_loss_latency.csv")
df_dl = pd.read_csv("/home/wmnlab/D/database/2023-02-04#1/_Bandlock_Udp_B3_B7_B8_RM500Q/qc02/#01/data/udp_dnlk_loss_latency.csv")

_, _, df_ho = mi_parse_ho(df_ho.copy(), +8)
df_ul = set_data(df_ul.copy())
df_dl = set_data(df_dl.copy())
df_ho_ul = cut_head_tail(df_ho.copy(), df_ul.copy(), mode='ul')
df_ho_dl = cut_head_tail(df_ho.copy(), df_dl.copy(), mode='dl')

### Uplink
E = get_ho_interval(df_ho_ul.copy())
df_ul = label_ho_info(df_ul.copy(), E, mode='ul')
df_ul.to_pickle('ho_classify_ul_example.pkl')

### Downlink
E = get_ho_interval(df_ho_dl.copy())
df_dl = label_ho_info(df_dl.copy(), E, mode='dl')
df_dl.to_pickle('ho_classify_dl_example.pkl')

  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',


In [37]:
df = pd.read_pickle('ho_classify_ul_example.pkl')

In [38]:
df

Unnamed: 0,sequence_num,Timestamp,lost,excl,latency,transmit_time,arrival_time,ho_index,ho_state,ho_type,ho_type1,ho_type2,ho_fcause,ho_interrupt,ho_ePCI,ho_earfcn,ho_nrPCI,ho_ePCI1,ho_earfcn1,ho_nrPCI1
0,10001,2023-02-04 16:16:36.807340,False,False,0.050546,2023-02-04 16:16:36.807362,2023-02-04 16:16:36.857886,-1,stable,stable,stable,stable,no failure,,,,,,,
1,10002,2023-02-04 16:16:36.809340,False,False,0.053014,2023-02-04 16:16:36.809358,2023-02-04 16:16:36.862354,-1,stable,stable,stable,stable,no failure,,,,,,,
2,10003,2023-02-04 16:16:36.811340,False,False,0.051354,2023-02-04 16:16:36.811359,2023-02-04 16:16:36.862694,-1,stable,stable,stable,stable,no failure,,,,,,,
3,10004,2023-02-04 16:16:36.813340,False,False,0.051926,2023-02-04 16:16:36.813356,2023-02-04 16:16:36.865266,-1,stable,stable,stable,stable,no failure,,,,,,,
4,10005,2023-02-04 16:16:36.815340,False,False,0.052000,2023-02-04 16:16:36.815360,2023-02-04 16:16:36.867340,-1,stable,stable,stable,stable,no failure,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264515,274516,2023-02-04 16:25:25.901142,False,False,0.006791,2023-02-04 16:25:25.901157,2023-02-04 16:25:25.907933,-1,stable,stable,stable,stable,no failure,,,,,,,
264516,274517,2023-02-04 16:25:25.903142,False,False,0.009353,2023-02-04 16:25:25.903157,2023-02-04 16:25:25.912495,-1,stable,stable,stable,stable,no failure,,,,,,,
264517,274518,2023-02-04 16:25:25.905142,False,False,0.007959,2023-02-04 16:25:25.905158,2023-02-04 16:25:25.913101,-1,stable,stable,stable,stable,no failure,,,,,,,
264518,274519,2023-02-04 16:25:25.907143,False,False,0.010403,2023-02-04 16:25:25.907158,2023-02-04 16:25:25.917546,-1,stable,stable,stable,stable,no failure,,,,,,,


In [39]:
df.dtypes

sequence_num              Int32
Timestamp        datetime64[ns]
lost                    boolean
excl                    boolean
latency                 float32
transmit_time    datetime64[ns]
arrival_time     datetime64[ns]
ho_index                  Int16
ho_state               category
ho_type                category
ho_type1               category
ho_type2               category
ho_fcause              category
ho_interrupt            float32
ho_ePCI                   Int32
ho_earfcn                 Int32
ho_nrPCI                  Int32
ho_ePCI1                  Int32
ho_earfcn1                Int32
ho_nrPCI1                 Int32
dtype: object

In [40]:
df['ho_index'].count()

264520

In [41]:
df['ho_type'].unique()

['stable', 'SN_Rel', 'SN_Setup', 'SN_HO', 'MN_HO', 'NAS_Recovery', 'LTE_HO', 'MCG_Failure']
Categories (8, object): ['LTE_HO', 'MCG_Failure', 'MN_HO', 'NAS_Recovery', 'SN_HO', 'SN_Rel', 'SN_Setup', 'stable']

In [42]:
df['ho_state'].unique()

['stable', 'before', 'during', 'after']
Categories (4, object): ['after', 'before', 'during', 'stable']

In [43]:
df.loc[df['ho_state'] == 'before', 'ho_type'].value_counts()

MN_HO           8536
SN_HO           7518
NAS_Recovery    1500
SN_Rel          1233
MCG_Failure      568
LTE_HO           499
SN_Setup         244
stable             0
Name: ho_type, dtype: int64

# Start

In [44]:
from tqdm import tqdm
from pytictoc import TicToc
import os

database = "/home/wmnlab/D/database/"
dates = [
         "2023-02-04",
         "2023-02-04#1",
         "2023-02-04#2",
         ]
devices = sorted([
    "qc01",
    "qc02",
    "qc03",
])
exps = {  # experiment_name: (number_of_experiment_rounds, list_of_experiment_round)
            # If the list is None, it will not list as directories.
            # If the list is empty, it will list all directories in the current directory by default.
            # If the number of experiment times != the length of existing directories of list, it would trigger warning and skip the directory.
    "_Bandlock_Udp_B3_B7_B8_RM500Q": (2, []),
    "_Bandlock_Udp_all_RM500Q": (2, []),
}

def makedir(dirpath, mode=0):  # mode=1: show message, mode=0: hide message
    if os.path.isdir(dirpath):
        if mode:
            print("mkdir: cannot create directory '{}': directory has already existed.".format(dirpath))
        return
    ### recursively make directory
    _temp = []
    while not os.path.isdir(dirpath):
        _temp.append(dirpath)
        dirpath = os.path.dirname(dirpath)
    while _temp:
        dirpath = _temp.pop()
        print("mkdir", dirpath)
        os.mkdir(dirpath)

In [45]:
t = TicToc()  # create instance of class
t.tic()  # Start timer
# --------------------- (3) decode a batch of files (User Settings) ---------------------
for date in dates:
    for expr, (times, traces) in exps.items():
        print(os.path.join(database, date, expr))
        for dev in devices:
            if not os.path.isdir(os.path.join(database, date, expr, dev)):
                print("|___ {} does not exist.".format(os.path.join(database, date, expr, dev)))
                continue
            
            print("|___", os.path.join(database, date, expr, dev))
            if traces == None:
                # print(os.path.join(database, date, expr, dev))
                continue
            elif len(traces) == 0:
                traces = sorted(os.listdir(os.path.join(database, date, expr, dev)))
            
            print("|    ", times)
            traces = [trace for trace in traces if os.path.isdir(os.path.join(database, date, expr, dev, trace))]
            if len(traces) != times:
                print("***************************************************************************************")
                print("Warning: the number of traces does not match the specified number of experiment times.")
                print("***************************************************************************************")
            for trace in traces:
                print("|    |___", os.path.join(database, date, expr, dev, trace))
        print()

/home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q
|___ /home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc01
|     2
|    |___ /home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc01/#01
|    |___ /home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc01/#02
|___ /home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc02
|     2
|    |___ /home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc02/#01
|    |___ /home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc02/#02
|___ /home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc03
|     2
|    |___ /home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc03/#01
|    |___ /home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_B3_B7_B8_RM500Q/qc03/#02

/home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_all_RM500Q
|___ /home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_all_RM500Q/qc01 does not exist.
|___ /home/wmnlab/D/database/2023-0

## Execute

In [46]:
### Read files
for date in dates:
    for expr, (times, traces) in exps.items():
        for dev in devices:
            if not os.path.isdir(os.path.join(database, date, expr, dev)):
                print("{} does not exist.\n".format(os.path.join(database, date, expr, dev)))
                continue

            if traces == None:
                print("------------------------------------------")
                print(date, expr, dev)
                print("------------------------------------------")
                source_dir = os.path.join(database, date, expr, dev)
                target_dir = os.path.join(database, date, expr, dev)
                makedir(target_dir)
                traces = sorted(os.listdir(os.path.join(database, date, expr, dev)))
                # filenames = os.listdir(source_dir)
            elif len(traces) == 0:
                traces = sorted(os.listdir(os.path.join(database, date, expr, dev)))
            
            traces = [trace for trace in traces if os.path.isdir(os.path.join(database, date, expr, dev, trace))]
            for trace in traces:
                print("------------------------------------------")
                print(date, expr, dev, trace)
                print("------------------------------------------")
                source_dir = os.path.join(database, date, expr, dev, trace, "data")
                target_dir1 = os.path.join(database, date, expr, dev, trace, "data")
                # target_dir2 = os.path.join(database, date, expr, dev, trace, "statistics")
                if expr == "tsync":
                    source_dir = os.path.join(database, date, expr, dev, trace)
                    target_dir = os.path.join(database, date, expr, dev, trace)
                makedir(target_dir1)
                # makedir(target_dir2)
                
                t1 = TicToc()  # create instance of class
                t1.tic()  # Start timer
                
                filenames = os.listdir(source_dir)
                filenames = [s for s in filenames if s.startswith('diag_log') and s.endswith('_rrc.csv')]
                print(filenames[0])
                
                df_ho = pd.read_csv(os.path.join(source_dir, filenames[0]))
                df_ul = pd.read_csv(os.path.join(source_dir, "udp_uplk_loss_latency.csv"))
                df_dl = pd.read_csv(os.path.join(source_dir, "udp_dnlk_loss_latency.csv"))

                _, _, df_ho = mi_parse_ho(df_ho.copy(), +8)
                df_ul = set_data(df_ul.copy())
                df_dl = set_data(df_dl.copy())
                df_ho_ul = cut_head_tail(df_ho.copy(), df_ul.copy(), mode='ul')
                df_ho_dl = cut_head_tail(df_ho.copy(), df_dl.copy(), mode='dl')

                ### Uplink
                E = get_ho_interval(df_ho_ul.copy())
                df_ul = label_ho_info(df_ul.copy(), E, mode='ul')
                df_ul.to_pickle(os.path.join(target_dir1, 'udp_uplk_loss_latency_ho.pkl'))

                ### Downlink
                E = get_ho_interval(df_ho_dl.copy())
                df_dl = label_ho_info(df_dl.copy(), E, mode='dl')
                df_dl.to_pickle(os.path.join(target_dir1, 'udp_dnlk_loss_latency_ho.pkl'))
                
                t1.toc()
t.toc()  # Time elapsed since t.tic()


------------------------------------------
2023-02-04 _Bandlock_Udp_B3_B7_B8_RM500Q qc01 #01
------------------------------------------
diag_log_qc01_2023-02-04_14-57-22_rrc.csv


  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',


Elapsed time is 4.437892 seconds.
------------------------------------------
2023-02-04 _Bandlock_Udp_B3_B7_B8_RM500Q qc01 #02
------------------------------------------
diag_log_qc01_2023-02-04_15-56-34_rrc.csv


  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',


Elapsed time is 2.539074 seconds.
------------------------------------------
2023-02-04 _Bandlock_Udp_B3_B7_B8_RM500Q qc02 #01
------------------------------------------
diag_log_qc02_2023-02-04_14-57-22_rrc.csv


  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',


Elapsed time is 4.501736 seconds.
------------------------------------------
2023-02-04 _Bandlock_Udp_B3_B7_B8_RM500Q qc02 #02
------------------------------------------
diag_log_qc02_2023-02-04_15-56-34_rrc.csv


  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',


Elapsed time is 2.631746 seconds.
------------------------------------------
2023-02-04 _Bandlock_Udp_B3_B7_B8_RM500Q qc03 #01
------------------------------------------
diag_log_qc03_2023-02-04_14-57-22_rrc.csv


  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',


Elapsed time is 4.584048 seconds.
------------------------------------------
2023-02-04 _Bandlock_Udp_B3_B7_B8_RM500Q qc03 #02
------------------------------------------
diag_log_qc03_2023-02-04_15-56-34_rrc.csv


  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',


Elapsed time is 3.023879 seconds.
/home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_all_RM500Q/qc01 does not exist.

/home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_all_RM500Q/qc02 does not exist.

/home/wmnlab/D/database/2023-02-04/_Bandlock_Udp_all_RM500Q/qc03 does not exist.

------------------------------------------
2023-02-04#1 _Bandlock_Udp_B3_B7_B8_RM500Q qc01 #01
------------------------------------------
diag_log_qc01_2023-02-04_16-13-28_rrc.csv


  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',


Elapsed time is 4.802079 seconds.
------------------------------------------
2023-02-04#1 _Bandlock_Udp_B3_B7_B8_RM500Q qc01 #02
------------------------------------------
diag_log_qc01_2023-02-04_16-30-29_rrc.csv


  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',


Elapsed time is 4.534271 seconds.
------------------------------------------
2023-02-04#1 _Bandlock_Udp_B3_B7_B8_RM500Q qc02 #01
------------------------------------------
diag_log_qc02_2023-02-04_16-13-28_rrc.csv


  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',


Elapsed time is 4.315866 seconds.
------------------------------------------
2023-02-04#1 _Bandlock_Udp_B3_B7_B8_RM500Q qc02 #02
------------------------------------------
diag_log_qc02_2023-02-04_16-30-29_rrc.csv


  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',


Elapsed time is 4.755608 seconds.
------------------------------------------
2023-02-04#1 _Bandlock_Udp_B3_B7_B8_RM500Q qc03 #01
------------------------------------------
diag_log_qc03_2023-02-04_16-13-28_rrc.csv


  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',


Elapsed time is 5.112027 seconds.
------------------------------------------
2023-02-04#1 _Bandlock_Udp_B3_B7_B8_RM500Q qc03 #02
------------------------------------------
diag_log_qc03_2023-02-04_16-30-29_rrc.csv


  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',


Elapsed time is 5.222859 seconds.
/home/wmnlab/D/database/2023-02-04#1/_Bandlock_Udp_all_RM500Q/qc01 does not exist.

/home/wmnlab/D/database/2023-02-04#1/_Bandlock_Udp_all_RM500Q/qc02 does not exist.

/home/wmnlab/D/database/2023-02-04#1/_Bandlock_Udp_all_RM500Q/qc03 does not exist.

/home/wmnlab/D/database/2023-02-04#2/_Bandlock_Udp_B3_B7_B8_RM500Q/qc01 does not exist.

/home/wmnlab/D/database/2023-02-04#2/_Bandlock_Udp_B3_B7_B8_RM500Q/qc02 does not exist.

/home/wmnlab/D/database/2023-02-04#2/_Bandlock_Udp_B3_B7_B8_RM500Q/qc03 does not exist.

------------------------------------------
2023-02-04#2 _Bandlock_Udp_all_RM500Q qc01 #01
------------------------------------------
diag_log_qc01_2023-02-04_16-50-15_rrc.csv


  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',


Elapsed time is 5.668210 seconds.
------------------------------------------
2023-02-04#2 _Bandlock_Udp_all_RM500Q qc01 #02
------------------------------------------
diag_log_qc01_2023-02-04_17-03-23_rrc.csv


  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',


Elapsed time is 4.501450 seconds.
------------------------------------------
2023-02-04#2 _Bandlock_Udp_all_RM500Q qc02 #01
------------------------------------------
diag_log_qc02_2023-02-04_16-50-15_rrc.csv


  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',


Elapsed time is 5.652726 seconds.
------------------------------------------
2023-02-04#2 _Bandlock_Udp_all_RM500Q qc02 #02
------------------------------------------
diag_log_qc02_2023-02-04_17-03-23_rrc.csv


  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',


Elapsed time is 4.635631 seconds.
------------------------------------------
2023-02-04#2 _Bandlock_Udp_all_RM500Q qc03 #01
------------------------------------------
diag_log_qc03_2023-02-04_16-50-15_rrc.csv


  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',


Elapsed time is 5.585834 seconds.
------------------------------------------
2023-02-04#2 _Bandlock_Udp_all_RM500Q qc03 #02
------------------------------------------
diag_log_qc03_2023-02-04_17-03-23_rrc.csv


  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',
  df.loc[:, ('ho_index','ho_state','ho_type','ho_type1','ho_type2','ho_fcause','ho_interrupt',


Elapsed time is 4.601235 seconds.
Elapsed time is 81.230229 seconds.


## Testing

In [47]:
df = pd.read_pickle("/home/wmnlab/D/database/2023-02-04#1/_Bandlock_Udp_B3_B7_B8_RM500Q/qc01/#01/data/udp_dnlk_loss_latency_ho.pkl")

In [48]:
df

Unnamed: 0,sequence_num,Timestamp,lost,excl,latency,transmit_time,arrival_time,ho_index,ho_state,ho_type,ho_type1,ho_type2,ho_fcause,ho_interrupt,ho_ePCI,ho_earfcn,ho_nrPCI,ho_ePCI1,ho_earfcn1,ho_nrPCI1
0,10001,2023-02-04 16:16:36.846746,False,False,0.008288,2023-02-04 16:16:36.846760,2023-02-04 16:16:36.855034,-1,stable,stable,stable,stable,no failure,,,,,,,
1,10002,2023-02-04 16:16:36.848746,False,False,0.011733,2023-02-04 16:16:36.848758,2023-02-04 16:16:36.860479,-1,stable,stable,stable,stable,no failure,,,,,,,
2,10003,2023-02-04 16:16:36.850746,False,False,0.009779,2023-02-04 16:16:36.850755,2023-02-04 16:16:36.860525,-1,stable,stable,stable,stable,no failure,,,,,,,
3,10004,2023-02-04 16:16:36.852746,False,False,0.007780,2023-02-04 16:16:36.852756,2023-02-04 16:16:36.860526,-1,stable,stable,stable,stable,no failure,,,,,,,
4,10005,2023-02-04 16:16:36.854747,False,False,0.005780,2023-02-04 16:16:36.854756,2023-02-04 16:16:36.860527,-1,stable,stable,stable,stable,no failure,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264515,274516,2023-02-04 16:25:25.901468,False,False,0.048541,2023-02-04 16:25:25.901475,2023-02-04 16:25:25.950009,-1,stable,stable,stable,stable,no failure,,,,,,,
264516,274517,2023-02-04 16:25:25.903468,False,False,0.052752,2023-02-04 16:25:25.903475,2023-02-04 16:25:25.956220,-1,stable,stable,stable,stable,no failure,,,,,,,
264517,274518,2023-02-04 16:25:25.905468,False,False,0.056539,2023-02-04 16:25:25.905475,2023-02-04 16:25:25.962007,-1,stable,stable,stable,stable,no failure,,,,,,,
264518,274519,2023-02-04 16:25:25.907468,False,False,0.054542,2023-02-04 16:25:25.907475,2023-02-04 16:25:25.962010,-1,stable,stable,stable,stable,no failure,,,,,,,


In [49]:
df.columns

Index(['sequence_num', 'Timestamp', 'lost', 'excl', 'latency', 'transmit_time',
       'arrival_time', 'ho_index', 'ho_state', 'ho_type', 'ho_type1',
       'ho_type2', 'ho_fcause', 'ho_interrupt', 'ho_ePCI', 'ho_earfcn',
       'ho_nrPCI', 'ho_ePCI1', 'ho_earfcn1', 'ho_nrPCI1'],
      dtype='object')

In [50]:
df.dtypes

sequence_num              Int32
Timestamp        datetime64[ns]
lost                    boolean
excl                    boolean
latency                 float32
transmit_time    datetime64[ns]
arrival_time     datetime64[ns]
ho_index                  Int16
ho_state               category
ho_type                category
ho_type1               category
ho_type2               category
ho_fcause              category
ho_interrupt            float32
ho_ePCI                   Int32
ho_earfcn                 Int32
ho_nrPCI                  Int32
ho_ePCI1                  Int32
ho_earfcn1                Int32
ho_nrPCI1                 Int32
dtype: object

In [51]:
df[df['excl'].isna()]

Unnamed: 0,sequence_num,Timestamp,lost,excl,latency,transmit_time,arrival_time,ho_index,ho_state,ho_type,ho_type1,ho_type2,ho_fcause,ho_interrupt,ho_ePCI,ho_earfcn,ho_nrPCI,ho_ePCI1,ho_earfcn1,ho_nrPCI1


In [52]:
df['latency'].value_counts()

inf         995
0.041227     22
0.025513     22
0.040411     22
0.041226     22
           ... 
0.090426      1
0.088427      1
0.086428      1
0.084452      1
0.058537      1
Name: latency, Length: 49939, dtype: int64

In [53]:
df['lost'].sum()

995

In [54]:
df['ho_type'].value_counts()

stable         230712
MN_HO           15295
SN_HO           12299
SN_Setup         3216
SN_Rel           1211
MCG_Failure      1081
LTE_HO            706
Name: ho_type, dtype: int64

In [55]:
df[df['lost'] == True]

Unnamed: 0,sequence_num,Timestamp,lost,excl,latency,transmit_time,arrival_time,ho_index,ho_state,ho_type,ho_type1,ho_type2,ho_fcause,ho_interrupt,ho_ePCI,ho_earfcn,ho_nrPCI,ho_ePCI1,ho_earfcn1,ho_nrPCI1
87138,97139,2023-02-04 16:19:31.130890,True,True,inf,2023-02-04 16:19:31.130897,2023-02-04 16:19:31.149368,7,before,MN_HO,intra_freq,inter_enb,,0.018926,266,1750,186,186,1750,
87139,97140,2023-02-04 16:19:31.132890,True,True,inf,2023-02-04 16:19:31.132898,2023-02-04 16:19:31.151374,7,before,MN_HO,intra_freq,inter_enb,,0.018926,266,1750,186,186,1750,
87140,97141,2023-02-04 16:19:31.134890,True,True,inf,2023-02-04 16:19:31.134898,2023-02-04 16:19:31.153380,7,before,MN_HO,intra_freq,inter_enb,,0.018926,266,1750,186,186,1750,
87141,97142,2023-02-04 16:19:31.136890,True,True,inf,2023-02-04 16:19:31.136899,2023-02-04 16:19:31.155388,7,before,MN_HO,intra_freq,inter_enb,,0.018926,266,1750,186,186,1750,
87142,97143,2023-02-04 16:19:31.138890,True,True,inf,2023-02-04 16:19:31.138898,2023-02-04 16:19:31.157394,7,before,MN_HO,intra_freq,inter_enb,,0.018926,266,1750,186,186,1750,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150669,160670,2023-02-04 16:21:38.198828,True,True,inf,2023-02-04 16:21:38.198833,2023-02-04 16:21:38.267931,25,after,SN_Setup,,,,0.017357,11,1750,,,,11
206419,216420,2023-02-04 16:23:29.704038,True,True,inf,2023-02-04 16:23:29.704046,2023-02-04 16:23:29.880011,-1,stable,stable,stable,stable,no failure,,,,,,,
206983,216984,2023-02-04 16:23:30.832091,True,True,inf,2023-02-04 16:23:30.832099,2023-02-04 16:23:31.160039,-1,stable,stable,stable,stable,no failure,,,,,,,
207002,217003,2023-02-04 16:23:30.870093,True,True,inf,2023-02-04 16:23:30.870100,2023-02-04 16:23:31.166081,-1,stable,stable,stable,stable,no failure,,,,,,,
