In [None]:
%matplotlib inline

%load_ext autoreload
%autoreload 2

## Stdlib
from collections import defaultdict, namedtuple
from datetime import datetime
import math
import os
import sys
import tempfile

## Non-std libs
import matplotlib.pyplot as plt
import pandas as pd

## Local modules
from scn_rrd import rrd_meta_utils, rrd_utils, rrd_utils_stdout, plot_utils

### Data rate consumption

In [None]:
portGroups = [
    'epc-backhaul-interface',
    'backhaul-interface',
]

meta = rrd_meta_utils.read_meta()
meta = meta[
    ( meta['location'].isin(plot_utils.PHYS_LOCS) ) &
    ( meta['port_group_name'].isin(portGroups) )
]

In [None]:
portId_to_consumeDf = {
    row['port_id'] :
    rrd_utils.read_rrd_via_scp(row['hostname'], row['port_rrd_filename'], '-3month')
    for (_, row) in meta.iterrows()
}

In [None]:
## Merge all ports for each device. (Eg for FCS-EPC.)
physLoc_to_consumeDf = dict()
for (_, row) in meta.iterrows():
    (physLoc, portId) = row[[ 'location', 'port_id' ]]
    df = physLoc_to_consumeDf.get(physLoc, pd.DataFrame())
    port_df = portId_to_consumeDf[portId]
    df = pd.concat([ df, port_df ], axis=0, ignore_index=True)
    physLoc_to_consumeDf[physLoc] = df

In [None]:
(fig, ax) = plt.subplots(figsize=(12, 6))
for (physLoc, consumeDf) in physLoc_to_consumeDf.items():
    dt_sec = (consumeDf['time'][1] - consumeDf['time'][0]).seconds
    
    # Raw avg bytes per step --> tot bytes per step --> tot bytes per day --> tot GB per day.
    factor = dt_sec / pow(2, 30)

    aggr_df = (
        consumeDf[['time', 'INOCTETS', 'OUTOCTETS']]
        .groupby(by=pd.Grouper(key='time', freq='D'))
        .sum()
        * factor
    )
    
    color = plot_utils.LOC_TO_COLOR[physLoc]
    aggr_df['INOCTETS'].plot(ax=ax, label=f"{physLoc} downloads", color=color, linestyle='solid')
    aggr_df['OUTOCTETS'].plot(ax=ax, label=f"{physLoc} uploads", color=color, linestyle='dashed')
ax.set_title('Daily consumption')
ax.set_ylabel('Gigabyte')
ax.set_xlabel('Day')
ax.legend()
None # Hide stdout output of the above line

In [None]:
(fig, ax) = plt.subplots(figsize=(12, 6))
for (physLoc, consumeDf) in physLoc_to_consumeDf.items():
    dt_sec = (consumeDf['time'][1] - consumeDf['time'][0]).seconds
    days = ( max(consumeDf['time']) - min(consumeDf['time']) ).days

    # Raw avg bytes per step --> tot bytes per step --> tot bytes per hour-of-day
    #   --> tot GB per hour-of-day --> tot GB per hour-of-day per avg day
    factor = dt_sec / pow(2, 30) / days

    aggr_df = (
        consumeDf[['time', 'INOCTETS', 'OUTOCTETS']]
        .groupby(consumeDf['time'].dt.hour)
        .sum(numeric_only=True)
        * factor
    )

    color = plot_utils.LOC_TO_COLOR[physLoc]
    aggr_df['INOCTETS'].plot(ax=ax, label=f"{physLoc} downloads", color=color, linestyle='solid')
    aggr_df['OUTOCTETS'].plot(ax=ax, label=f"{physLoc} uploads", color=color, linestyle='dashed')
ax.set_title('Average hourly consumption')
ax.set_ylabel('Gigabyte')
ax.set_xlabel('Hour')
ax.legend()
None # Hide stdout output of the above line

### Data rate capacity (aka speedtest)

In [None]:
MonitorDevice = namedtuple('MonitorDevice', ['phys_loc', 'login_env_key'])
monitor_devices = [
    MonitorDevice('nickelsville-cd', 'CD_MON_LOGIN'),
    MonitorDevice('sps-franklin', 'FRANKLIN_MON_LOGIN'),
    MonitorDevice('lihi-southend', 'SOUTHEND_MON_LOGIN'),
    MonitorDevice('FCS', 'FCS_MON_LOGIN'),
]

In [None]:
physLoc_to_capDf = dict()
for mondev in monitor_devices:
    login = rrd_meta_utils.DOTENV_ENTRIES[mondev.login_env_key]
    (addr, user, pw) = login.split('%%%%')

    if mondev.phys_loc == 'lihi-southend':
        # We cannot get a reliable ssh session with the southend device. Neither the scp way nor the stdout way works.
        # See Google Drive for this file.
        def one_df(local_stdout_filepath):
            with open(local_stdout_filepath, 'r') as f:
                stdout_str = f.read()
                return rrd_utils_stdout.rrd_stdout_to_dataframe(stdout_str)
        down_df = one_df('./data/southend_down.stdout.txt')
        up_df = one_df('./data/southend_up.stdout.txt')

    elif mondev.phys_loc == 'FCS':
        # Using the stdout way, FCS takes a really long time. And the rrdtool installed there is the latest,
        #   so we can use the scp way. Ugh so messy. Clean up later. We should colocate all RRDs @ the NMS server anyway.
        def one_df(rrd_filename):
            remote_filepath = f"~/speedtest/rrd/{rrd_filename}.rrd"
            with tempfile.NamedTemporaryFile() as f:
                scp_cmd = f" /usr/bin/rsync -az --rsh='/usr/bin/sshpass -p '{pw}' ssh -o StrictHostKeyChecking=no -l {user}' {user}@{addr}:{remote_filepath} {f.name}"
                ret = os.system(scp_cmd)
                if ret != 0:
                    print(f"Could not download {remote_filepath} from {mondev.phys_loc}", file=sys.stderr)
                    return None
                return rrd_utils.rrd_to_dataframe(f.name, '-3month')
        down_df = one_df('down_rate')
        up_df = one_df('up_rate')
        
    else:
        def one_df(rrd_filename):
            remote_rrd_filepath = f"~/speedtest/rrd/{rrd_filename}.rrd"
            return rrd_utils_stdout.read_rrd_via_stdout(addr, user, pw, remote_rrd_filepath, '-3month')
        down_df = one_df('down_rate')
        up_df = one_df('up_rate')
    
    df = down_df.merge(up_df, on='time', how='outer')
    
    physLoc_to_capDf[mondev.phys_loc] = df

In [None]:
(fig, ax) = plt.subplots(figsize=(12, 6))
for mondev in monitor_devices:
    color = plot_utils.LOC_TO_COLOR[mondev.phys_loc]
    
    capDf = physLoc_to_capDf[mondev.phys_loc]
    
    ax.plot(capDf['time'], capDf['down_rate'], label=f"{mondev.phys_loc} download", color=color, linestyle='solid')
    ax.plot(capDf['time'], capDf['up_rate'], label=f"{mondev.phys_loc} upload", color=color, linestyle='dashed')
ax.set_title('Capacity')
ax.set_ylabel('Mbps')
ax.legend()
None # Hide stdout output of the above line

### Utilization == Consumption / Capacity

In [None]:
def utilization(consume_df: pd.DataFrame, capacity_df: pd.DataFrame, factor: float) -> pd.DataFrame:
    '''
    @arg consume_df and capacity_df:
        Each DF should have two columns,
        1st column containing time, and
        2nd column containing quantity (float).
    @return:
        1st column contains time, unmodified from @arg consume_df.
        2nd column contains (consumed quantity / capacity quantity),
            where capacity quantity comes from the last measurement at or before the time.
    '''
    cap_i = 0
    cap_n = len(capacity_df)
    cap_quant_prev = None
    
    utilizn_rows = []
    
    for (_, (consu_time, consu_quant)) in consume_df.iterrows():
        if pd.isna(consu_quant):
            continue
        
        while cap_i < cap_n and capacity_df.iloc[cap_i][0] <= consu_time:
            cap_quant_prev = capacity_df.iloc[cap_i][1]
            cap_i += 1

        if pd.isna(cap_quant_prev):
            # No capacity was recorded before the current consumption timestamp.
            continue

        utilizn = consu_quant / cap_quant_prev * factor
        utilizn_row = (consu_time, utilizn)
        utilizn_rows.append(utilizn_row)

    return pd.DataFrame(utilizn_rows, columns=['time', 'utilization'])

In [None]:
mon_ct = len(monitor_devices)
(fig, axez) = plt.subplots(nrows=mon_ct, figsize=(12, 3 * mon_ct), constrained_layout=True)
for (ax, mondev) in zip(axez, monitor_devices):
    phys_loc = mondev.phys_loc
    logi_loc = plot_utils.PHYS_LOC_TO_LOGICAL_LOC[phys_loc]
    
    ## Get consumption at the monitoring device or at our LTE location.
    consumeDf = physLoc_to_consumeDf.get(phys_loc, physLoc_to_consumeDf[logi_loc])
    ## Get capacity at the monitoring device.
    capDf = physLoc_to_capDf[phys_loc]
    
    # Scale both numerator and denominator to bitsPerSec. Then scale to percentage.
    factor = (1/8) / pow(2,20) * 100
    
    color = plot_utils.LOC_TO_COLOR[phys_loc]

    utilzn_df = utilization(consumeDf[['time', 'INOCTETS']], capDf[['time', 'down_rate']], factor)
    ax.plot(utilzn_df['time'], utilzn_df['utilization'], label=f"{phys_loc} download", color=color, linestyle='solid')
    
    utilizn_df = utilization(consumeDf[['time', 'OUTOCTETS']], capDf[['time', 'up_rate']], factor)
    ax.plot(utilzn_df['time'], utilzn_df['utilization'], label=f"{phys_loc} upload", color=color, linestyle='dashed')
    
    ax.set_ylabel('Percent')
    ax.legend()
fig.suptitle('Utilization')
None # Hide stdout output of the above line