In [None]:
%matplotlib inline

%load_ext autoreload
%autoreload 2

## Stdlib

## Non-std libs
import matplotlib.pyplot as plt
import pandas as pd

## Local modules
from scn_rrd import config, librenms_meta_utils, librenms_rrd_utils, rrd_utils, plot_utils

### Data rate consumption

Investigating the unit of data in the RRD:

- https://oidref.com/1.3.6.1.2.1.2.2.1.10 shows that the snmp metric is for total number of bytes ever received/sent by a given interface. (The value decreases across time iff overflow.)
- LibreNMS does its magical php things and stores inOctets and outOctets metrics at `/opt/librenms/rrd/<device_ip_or_hostname>/port-<port_id>.rrd`
- `rrdtool info ..../port-<port_id>.rrd` shows that these RRD database's `INOCTETS` and `OUTOCTETS` DSes are of type `DERIVE`.
- Queries:
    - In response to `rrdtool fetch ..../port-<port_id>.rrd AVERAGE ...`:
        - Each value has the unit of Bytes/second and encodes the average(measured data rates within one time resolution).
    - In response to `rrdtool fetch ..../port-<port_id>.rrd MAX ...`:
        - Each value has the unit of Bytes/second and encodes the max(measured data rates within one time resolution).
    - The time resolution can optionally by specified as a command line arg; or, it's dynamically chosen according to the queried time span.
- Validation of the above interpretation:
    - We can compare various graphs generated by LibreNMS using `rrdgraph`.
        - Eg:
            1. `DEF:inoctets=Garfield-EPC/port-id190.rrd:INOCTETS:AVERAGE`
            1. `VDEF:totin=inoctets,TOTAL`
            1. According to https://oss.oetiker.ch/rrdtool/doc/rrdgraph_rpn.en.html#TOTAL, `TOTAL` multiplies logged values by the time resolution.
            1. `GPRINT:totin:'\(In %6.2lf%sB'    ...`
            1. The `TOTAL` aggregator yields Bytes. Hence, we can infer that each value yielded by `AVERAGE` is Bytes/sec, and to get the total value, we calculate, in pseudocode, `sum(val * time_resolution for val in raw_AVERAGE_values)`.

In [None]:
portGroups = [
    'epc-backhaul-interface',
    'backhaul-interface',
]

meta = librenms_meta_utils.read_meta()
meta = meta[
    ( meta['location'].isin(config.PHYS_LOCS) ) &
    ( meta['port_group_name'].isin(portGroups) )
]

In [None]:
# For each device, there should be one interface/port that's tagged as "backhaul interface" on LibreNMS.
len(meta['hostname'].unique()) == len(meta)

In [None]:
NMS_HOST_IP = config.DOTENV_ENTRIES['NMS_HOST_IP']
physLoc_to_consumeDf = dict()
for (_, row) in meta.iterrows():
    rrd_filename = librenms_rrd_utils.format_port_rrd_filename(row['port_id'])
    rrd_filepath = librenms_rrd_utils.format_rrd_filepath(row['hostname'], rrd_filename)
    df = rrd_utils.read_rrd(NMS_HOST_IP, rrd_filepath, '-3month')

    physLoc = row['location']

    physLoc_to_consumeDf[physLoc] = df

In [None]:
# Clean data.
for (physLoc, consumeDf) in physLoc_to_consumeDf.items():
    # We see unrealistic spikes in the retrieved data rates.
    # The cause could be anything in the whole data pipeline, from snmp to our python code.
    # Here, we choose an arbitrary threshold.
    thresh = 1e8
    consumeDf.loc[ consumeDf['INOCTETS'] > thresh, 'INOCTETS' ] = pd.NA
    consumeDf.loc[ consumeDf['OUTOCTETS'] > thresh, 'OUTOCTETS' ] = pd.NA

In [None]:
(fig, (ax_amount, ax_ratio)) = plt.subplots(nrows=2, figsize=(12, 6 * 2))
for (physLoc, consumeDf) in physLoc_to_consumeDf.items():
    resoln = (consumeDf['time'][1] - consumeDf['time'][0]).seconds
    
    # The conceptual value transformation goes like:
    #     Raw value is average Bytes/sec within one time resolution
    #     --(mult by time resolution)--> Bytes within one time resolution
    #     --(sum by day)--> Bytes within one day
    #     --(div by pow(2,30))--> GigaBytes within one day
    factor = resoln / pow(2, 30)
    aggr_df = (
        consumeDf[['time', 'INOCTETS', 'OUTOCTETS']]
        .groupby(by=pd.Grouper(key='time', freq='D'))
        .sum(numeric_only=True)
        * factor
    )
    
    color = plot_utils.LOC_TO_COLOR[physLoc]
    aggr_df['INOCTETS'].plot(ax=ax_amount, label=f"{physLoc} downloads", color=color, linestyle='solid')
    aggr_df['OUTOCTETS'].plot(ax=ax_amount, label=f"{physLoc} uploads", color=color, linestyle='dashed')
    
    aggr_df['out_in_ratio'] = aggr_df['OUTOCTETS'] / aggr_df['INOCTETS']
    aggr_df['out_in_ratio'].plot(ax=ax_ratio, label=physLoc, color=color)

ax_amount.set_title('Daily consumption')
ax_amount.set_ylabel('Gigabyte')
ax_amount.set_xlabel('Day')
ax_amount.legend()

ax_ratio.set_title('Daily consumption, ratio of uploads vs downloads')
ax_ratio.set_ylabel('Ratio')
ax_ratio.set_xlabel('Day')
ax_ratio.legend()

None # Hide stdout output of the above line

In [None]:
(fig, (ax_amount, ax_ratio)) = plt.subplots(nrows=2, figsize=(12, 6 * 2))
for (physLoc, consumeDf) in physLoc_to_consumeDf.items():
    resoln = (consumeDf['time'][1] - consumeDf['time'][0]).seconds
    days = ( max(consumeDf['time']) - min(consumeDf['time']) ).days + 1

    # The conceptual value transformation goes like:
    #     Raw value is average Bytes/sec within one time resolution
    #     --(mult by time resolution)--> Bytes within one time resolution
    #     --(sum by hour_of_day)--> Bytes within one hour_of_day across all days
    #     --(div by ct of days)--> Bytes within one hour_of_day in one average day
    #     --(div by pow(2,30))--> GigaBytes ditto
    factor = resoln / pow(2, 30) / days
    aggr_df = (
        consumeDf[['time', 'INOCTETS', 'OUTOCTETS']]
        .groupby(consumeDf['time'].dt.hour)
        .sum(numeric_only=True)
        * factor
    )

    color = plot_utils.LOC_TO_COLOR[physLoc]
    aggr_df['INOCTETS'].plot(ax=ax_amount, label=f"{physLoc} downloads", color=color, linestyle='solid')
    aggr_df['OUTOCTETS'].plot(ax=ax_amount, label=f"{physLoc} uploads", color=color, linestyle='dashed')
    
    aggr_df['out_in_ratio'] = aggr_df['OUTOCTETS'] / aggr_df['INOCTETS']
    aggr_df['out_in_ratio'].plot(ax=ax_ratio, label=physLoc, color=color)

ax_amount.set_title('Average hourly consumption')
ax_amount.set_ylabel('Gigabyte')
ax_amount.set_xlabel('Hour')
ax_amount.legend()

ax_ratio.set_title('Average hourly consumption, ratio of uploads vs downloads')
ax_ratio.set_ylabel('Ratio')
ax_ratio.set_xlabel('Day')
ax_ratio.legend()

ax_ratio.set_title('Average hourly consumption, ratio of uploads vs downloads')
None # Hide stdout output of the above line

### Data rate capacity (aka speedtest)

In [None]:
physLoc_to_capDf = dict()
for (ip, physLoc) in config.MONITOR_DEVICES.items():
    down_df = rrd_utils.read_rrd(ip, 'down_rate.rrd', '-3month')
    up_df   = rrd_utils.read_rrd(ip, 'up_rate.rrd',   '-3month')

    df = down_df.merge(up_df, on='time', how='outer')

    physLoc_to_capDf[physLoc] = df

In [None]:
(fig, ax) = plt.subplots(figsize=(12, 6))
for phys_loc in config.MONITOR_DEVICES.values():
    color = plot_utils.LOC_TO_COLOR[phys_loc]
    
    capDf = physLoc_to_capDf[phys_loc]
    
    ax.plot(capDf['time'], capDf['down_rate'], label=f"{phys_loc} download", color=color, linestyle='solid')
    ax.plot(capDf['time'], capDf['up_rate'], label=f"{phys_loc} upload", color=color, linestyle='dashed')
ax.set_title('Capacity')
ax.set_ylabel('Mbps')
ax.legend()
None # Hide stdout output of the above line

### Utilization == Consumption / Capacity

In [None]:
def utilization(consume_df: pd.DataFrame, capacity_df: pd.DataFrame, factor: float) -> pd.DataFrame:
    '''
    @arg consume_df and capacity_df:
        Each DF should have two columns,
        1st column containing time, and
        2nd column containing quantity (float).
    @return:
        1st column contains time, unmodified from @arg consume_df.
        2nd column contains (consumed quantity / capacity quantity),
            where capacity quantity comes from the last measurement at or before the time.
    '''
    cap_i = 0
    cap_n = len(capacity_df)
    cap_quant_prev = None
    
    utilizn_rows = []
    
    for (_, (consu_time, consu_quant)) in consume_df.iterrows():
        if pd.isna(consu_quant):
            continue
        
        while cap_i < cap_n and capacity_df.iloc[cap_i][0] <= consu_time:
            _cap_quant = capacity_df.iloc[cap_i][1]
            if not pd.isna(_cap_quant):
                cap_quant_prev = _cap_quant
            cap_i += 1

        if pd.isna(cap_quant_prev):
            # No capacity was recorded before the current consumption timestamp.
            continue

        utilizn = consu_quant / cap_quant_prev * factor
        utilizn_row = (consu_time, utilizn)
        utilizn_rows.append(utilizn_row)

    return pd.DataFrame(utilizn_rows, columns=['time', 'utilization'])

In [None]:
mon_ct = len(config.MONITOR_DEVICES)
(fig, axez) = plt.subplots(nrows=mon_ct, figsize=(12, 3 * mon_ct), constrained_layout=True)
for (ax, phys_loc) in zip(axez, config.MONITOR_DEVICES.values()):
    logi_loc = config.PHYS_LOC_TO_LOGICAL_LOC[phys_loc]
    
    ## Get consumption at the monitoring device or at our LTE location.
    consumeDf = physLoc_to_consumeDf.get(phys_loc, physLoc_to_consumeDf[logi_loc])
    ## Get capacity at the monitoring device.
    capDf = physLoc_to_capDf[phys_loc]
    
    # Scale both numerator and denominator to bitsPerSec. Then scale to percentage.
    factor = (1/8) / pow(2,20) * 100
    
    color = plot_utils.LOC_TO_COLOR[phys_loc]

    utilzn_df = utilization(consumeDf[['time', 'INOCTETS']], capDf[['time', 'down_rate']], factor)
    ax.plot(utilzn_df['time'], utilzn_df['utilization'], label=f"{phys_loc} download", color=color, linestyle='solid')
    
    utilizn_df = utilization(consumeDf[['time', 'OUTOCTETS']], capDf[['time', 'up_rate']], factor)
    ax.plot(utilzn_df['time'], utilzn_df['utilization'], label=f"{phys_loc} upload", color=color, linestyle='dashed')
    
    ax.set_ylabel('Percent')
    ax.legend()
fig.suptitle('Utilization')
None # Hide stdout output of the above line