In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
import pandas as pd
import numpy as np
import csv

from datetime import datetime

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as plticker

import functools
import glob

In [None]:
# drop non numeric rows
df_lte['Time'] = pd.to_datetime(df_lte['Time'])
df_lte[["Band","EARFCN","RSRP","RSRQ"]] = df_lte[["Band","EARFCN","RSRP","RSRQ"]].apply(pd.to_numeric, errors='coerce')

In [None]:
# filter date
def range_filter(df, cat, start, end):
    date_str = datetime.now().strftime("%Y-%m-%d ")
    return df.loc[(df[cat] > date_str+start) & (df[cat] <= date_str+end)]

start_time = '00:19:30'
end_time = '00:20:00'

df_nr = range_filter(df_nr, 'Time', start_time, end_time)
df_lte = range_filter(df_lte, 'Time', start_time, end_time)

In [None]:
# predefine datatype
df = pd.read_csv('./5GBeams-v0.9/City-Wide-Coverage/v0.9-SLOOP-OpX-Combined.csv',\
                dtype={'5G-NR RRC NSA RRC State Info NSA RRC State': 'string'})

In [None]:
# twin plot
def twin_graph(df,x1,x2):
    fig, ax1 = plt.subplots()
    ax2 = ax1.twinx()
    
    ax1.scatter(df['Time'],df[x1],s=2,color='b')
    ax1.set_xlabel('Time')
    ax1.set_ylabel('Beam RSRQ', color='b')
    
    ax2.step(df['Time'],df[x2], color='g')
    ax2.set_ylabel('Cell', color='g')

In [None]:
# drop the rows if tag is NaN
df.dropna(subset=['5G KPI PCell RF Best Beam SS-RSRQ [dB]'])
# replace NaN
df_tmp[['5G-NR RRC NSA RRC State Info NSA RRC State']] = df_tmp[['5G-NR RRC NSA RRC State Info NSA RRC State']].fillna('')
# drop all nan columns
df.dropna(axis=1, how='all', inplace=True)

In [None]:
# find slots
def find_range(d, start_v, end_v):
    start = -1
    for i,e in enumerate(d):
        if e == start_v:
            start = i
            break
    for i,e in enumerate(d[start:]):
        if e == end_v:
            return start, start+i
    return start, -1

In [None]:
# file walking
import glob
files = glob.glob(my_path + '/**/*.csv', recursive=True)

In [None]:
# 5G NR dataset processing
def stat(df, tag):
    print(pd.unique(df[tag]))
    plt.scatter(range(len(df)), df[tag])
    def stat(df, tag):
    print(pd.unique(df[tag]))
    plt.scatter(range(len(df)), df[tag])
    
def cal_interval(df_time):
    time = list(df_time)
    d = []
    prev = time[0]
    for e in time[1:]:
        d.append((e-prev).total_seconds()*1000)
        prev = e
    d = np.array(d)
    idx = np.argsort(d)
    print('Max ', d[idx[-5:][::-1]])
    print('Min ', d[idx[:5]])
    print('Avg ', np.average(d))

# Catch time gaps
flag = False
for index, row in DF.iterrows():
    if not flag:
        if pd.isnull(row[serv_b]):
            #print(row['Time'])
            flag=True
            idx = index
    elif pd.notnull(row[serv_b]):
        #print(row['Time'])
        flag = False
        if (DF.iloc[index]['Time']-DF.iloc[idx]['Time']).total_seconds() > 3:
            print(idx, DF.iloc[idx]['Time'])
            print(index, DF.iloc[index]['Time'])

In [1]:
# CDF
def ecdf(a):
    x, counts = np.unique(a, return_counts=True)
    cusum = np.cumsum(counts)
    return x, cusum / cusum[-1]

def plot_ecdf(a, title=None, xlabel=None):
    x, y = ecdf(a)
    x = np.insert(x, 0, x[0])
    y = np.insert(y, 0, 0.)
    plt.plot(x, y, drawstyle='steps-post')
    plt.xlabel(xlabel)
    plt.grid(True)
    plt.title(title)
    plt.show()

In [None]:
# creates a (roughly square) grid of axes that can hold at least n subplots
# returns a flattened (1D) list of these n axes

def getsubplots(n):
    plotrows = int(np.floor(np.sqrt(n)))
    plotcols = int(np.ceil(n/plotrows))

    fig,axs = plt.subplots(plotrows,plotcols)
    #fig.tight_layout(rect=[0, 0.03, 1, 0.95])
    axs = axs.flatten()
    for ax in axs[n:]:
        ax.set_axis_off()
    return axs[:n]

In [None]:
# Max/Min N value
arr = np.array([1, 3, 2, 4, 5])
# Max 3
arr.argsort()[-3:][::-1]
# Min 2
arr.argsort()[-2:]

In [None]:
# Catch time gaps
flag = False

for index, row in DF.iterrows():
    if not flag:
        if pd.isnull(row[serv_b]):
            #print(row['Time'])
            flag=True
            idx = index
    elif pd.notnull(row[serv_b]):
        #print(row['Time'])
        flag = False
        if (DF.iloc[index]['Time']-DF.iloc[idx]['Time']).total_seconds() > 5:
            print(idx, DF.iloc[idx]['Time'])
            print(index, DF.iloc[index]['Time'])

In [None]:
# Transform timezone
pd.Timestamp(data['start']['timestamp']['time'][:-4],tz='GMT').tz_convert(tz='America/Los_Angeles').tz_localize(None)

In [None]:
# Plot time
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%S'))
ax1.xaxis.set_major_locator(mdates.SecondLocator(interval=10))

fig.autofmt_xdate()
plt.xticks(rotation=90, fontweight='light',  fontsize='x-small')

In [None]:
# Iperf related
def get_tp(f):
    data=json.load(f)
    time=[]
    tp=[]
    prev_t=pd.Timestamp(data['start']['timestamp']['time'][:-4],tz='GMT').tz_convert(tz='America/Los_Angeles').tz_localize(None)
    for interval in data['intervals']:
        prev_t=pd.Timedelta(seconds=interval['sum']['seconds'])+prev_t
        time.append(prev_t)
        tp.append(interval['sum']['bits_per_second']/1000000)
    return time,tp
with open('','r') as f:
    app_time,app_tp=get_tp()

# Fix iperf
def fix_iperf_log(fname):
    with open(fname,'r') as f:
        text=f.readlines()
        text=text[:-11]
        with open(fname+'.fixed','w') as out:
            out.writelines(text)