# Coverage Analysis (High Density Scenario)

_Script adapted from full-factorial.ipynb_

In [1]:
import os
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from itertools import product, chain, combinations
from scipy import stats
from IPython.display import display, HTML
%matplotlib inline

def parse_if_number(s):
    try: return float(s)
    except: return True if s=="true" else False if s=="false" else s if s else None

def parse_ndarray(s):
    return np.fromstring(s, sep=' ') if s else None

def get_file_name(name):
    return name.replace(':', '-')

## Config

In [2]:
inputFile = 'coverage.csv'
repetitionsCount = -1 # -1 = auto-detect
factors = ['R', 'T', 'm', 'D']

tIntervalAlpha = 0.9

plotSize = (10, 10)
plotStyle = 'seaborn-whitegrid'
saveFigures = False

# Filter scalars
scalarsFilter = ['Floorplan.userCount', 'Floorplan.coveredUsers:sum']
# Filter vectors
vectorsFilter = []
# Percentiles
percentiles = [0.25, 0.5, 0.75, 0.9, 0.95]

# Performance indexes
perfIndexes = [
    ('coveredUsersPercent', 'percentage of covered users')
]

# Transformations
transformations = [
]

intPercentiles = [int(i*100) for i in percentiles]
vecPerfIndexes = []
#for intPercentile in intPercentiles:
#    vecPerfIndexes.append(('broadcastTime' + str(intPercentile), 'Broadcast time needed to reach the ' + str(intPercentile) + 'th percentile of the coverage'))
for v in vecPerfIndexes:
    perfIndexes.append(v)
    #transformations.append((v[0], lambda x: math.log(x)))

## Load scalars

In [3]:
df = pd.read_csv('exported_data/' + inputFile, converters = {
    'attrvalue': parse_if_number,
    'binedges': parse_ndarray,
    'binvalues': parse_ndarray,
    'vectime': parse_ndarray,
    'vecvalue': parse_ndarray,
})

Unnamed: 0,run,type,module,name,attrname,attrvalue,value,count,sumweights,mean,stddev,min,max,binedges,binvalues,vectime,vecvalue
0,HighDensityCoverage-248-20210602-14:04:17-7244,runattr,,,configname,HighDensityCoverage,,,,,,,,,,,
1,HighDensityCoverage-248-20210602-14:04:17-7244,runattr,,,datetime,20210602-14:04:17,,,,,,,,,,,
2,HighDensityCoverage-248-20210602-14:04:17-7244,runattr,,,experiment,"HighDensityCoverage-T=1,D=1,m=3,R=5,TO=1+1+1-",,,,,,,,,,,
3,HighDensityCoverage-248-20210602-14:04:17-7244,runattr,,,inifile,simulations.ini,,,,,,,,,,,
4,HighDensityCoverage-248-20210602-14:04:17-7244,runattr,,,iterationvars,"$T=1, $D=1, $m=3, $R=5, $TO=1+1+1",,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
605359,HighDensityCoverage-7489-20210602-15:16:32-17617,attr,Floorplan,coveredUsers:vector,unit,users,,,,,,,,,,,
605360,HighDensityCoverage-7489-20210602-15:16:32-17617,vector,Floorplan,totalCollisions:vector,,,,,,,,,,,,"[8.0, 8.0, 10.0, 10.0, 10.0, 10.0, 10.0, 12.0,...","[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, ..."
605361,HighDensityCoverage-7489-20210602-15:16:32-17617,attr,Floorplan,totalCollisions:vector,source,totalCollisionsSig,,,,,,,,,,,
605362,HighDensityCoverage-7489-20210602-15:16:32-17617,attr,Floorplan,totalCollisions:vector,title,Total number of colliding messages registered ...,,,,,,,,,,,


In [4]:
if repetitionsCount <= 0: # auto-detect
    repetitionsCount = int(df[df.attrname == 'repetition']['attrvalue'].max()) + 1
print('Repetitions:', repetitionsCount)

# Computed
factorsCount = len(factors)

if saveFigures:
    os.makedirs('figures', exist_ok=True)

Repetitions: 10


In [5]:
scalars = df[(df.type == 'scalar') | ((df.type == 'itervar') & (df.attrname != 'TO')) | ((df.type == 'param') & (df.attrname == 'Floorplan.userCount')) | ((df.type == 'runattr') & (df.attrname == 'repetition'))]
scalars = scalars.assign(qname = scalars.attrname.combine_first(scalars.module + '.' + scalars.name))
for index, row in scalars[scalars.type == 'itervar'].iterrows():
    val = scalars.loc[index, 'attrvalue']
    if isinstance(val, str)  and not all(c.isdigit() for c in val):
        scalars.loc[index, 'attrvalue'] = eval(val)
scalars.value = scalars.value.combine_first(scalars.attrvalue.astype('float64'))
scalars_wide = scalars.pivot_table(index=['run'], columns='qname', values='value')
scalars_wide.sort_values([*factors, 'repetition'], inplace=True)
count = 0
for index in scalars_wide.index:
    config = count // repetitionsCount
    scalars_wide.loc[index, 'config'] = config
    count += 1
scalars_wide = scalars_wide[['config', 'repetition', *factors, *scalarsFilter]]

configsCount = int(scalars_wide['config'].max()) + 1
totalSims = configsCount*repetitionsCount
display(HTML("<style>div.output_scroll { height: auto; max-height: 48em; }</style>"))
pd.set_option('display.max_rows', totalSims)
pd.set_option('display.max_columns', 100)

# coverage
scalars_wide['coveredUsersPercent'] = scalars_wide['Floorplan.coveredUsers:sum'] / (scalars_wide['Floorplan.userCount'] - 1)

## Load vectors

In [6]:
vectors = df[df.type == 'vector']
vectors = vectors.assign(qname = vectors.module + '.' + vectors.name)
for index in scalars_wide.index:
    r = index
    cfg = scalars_wide.loc[index, 'config']
    rep = scalars_wide.loc[index, 'repetition']
    vectors.loc[vectors.run == r, 'config'] = cfg
    vectors.loc[vectors.run == r, 'repetition'] = rep
vectors = vectors[vectors.qname.isin(vectorsFilter)]
vectors.sort_values(['config', 'repetition', 'qname'], inplace=True)
vectors = vectors[['config', 'repetition', 'qname', 'vectime', 'vecvalue']]

## Compute scalars from vectors

In [7]:
def get_percentile(percentile, vectime, vecvalue, totalvalue):
    tofind = percentile * totalvalue
    idx = 0
    csum = vecvalue.cumsum()
    for value in csum:
        if value >= tofind:
            return vectime[idx]
        idx += 1
    return math.inf

for index, row in vectors.iterrows():
    for vecPerf, percentile in zip(vecPerfIndexes, percentiles):
        vecPerfIndex = vecPerf[0]
        cfg = row['config']
        rep = row['repetition']
        if vecPerfIndex.startswith('broadcastTime'):
            total = scalars_wide[(scalars_wide['config'] == cfg) & (scalars_wide['repetition'] == rep)]['Floorplan.userCount'].values[0] - 1
        else:
            raise Exception('Need to specify total for ' + vecPerfIndex + '. (coding required)')
        value = get_percentile(percentile, row['vectime'], row['vecvalue'], total)
        scalars_wide.loc[(scalars_wide['config'] == cfg) & (scalars_wide['repetition'] == rep), vecPerfIndex] = value

## Apply transformations

In [8]:
for col, transform in transformations:
    scalars_wide[col] = scalars_wide[col].map(transform, 'ignore')

## Full factorial

In [9]:
for cfg in range(0, configsCount):
    for perfIndex, _ in perfIndexes:
        mean = scalars_wide[scalars_wide['config'] == cfg][perfIndex].mean()
        variance = scalars_wide[scalars_wide['config'] == cfg][perfIndex].var()
        _, positiveInterval = tuple(v*math.sqrt(variance/repetitionsCount) for v in stats.t.interval(tIntervalAlpha, repetitionsCount - 1))
        negerr = positiveInterval
        poserr = positiveInterval
        if perfIndex == 'coveredUsersPercent':
            poserr = min(1 - mean, positiveInterval)
        scalars_wide.loc[scalars_wide['config'] == cfg, perfIndex + 'Mean'] = mean
        scalars_wide.loc[scalars_wide['config'] == cfg, perfIndex + 'Variance'] = variance
        scalars_wide.loc[scalars_wide['config'] == cfg, perfIndex + 'Negerr'] = negerr
        scalars_wide.loc[scalars_wide['config'] == cfg, perfIndex + 'Poserr'] = poserr
scalars_wide = scalars_wide[scalars_wide['repetition'] == 0]

for perfIndex, _ in perfIndexes:
    del scalars_wide[perfIndex]
del scalars_wide['repetition']
del scalars_wide['Floorplan.userCount']
del scalars_wide['Floorplan.coveredUsers:sum']
del scalars_wide['config']

scalars_wide       

qname,R,T,m,D,coveredUsersPercentMean,coveredUsersPercentVariance,coveredUsersPercentNegerr,coveredUsersPercentPoserr
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
HighDensityCoverage-0-20210602-14:02:30-6985,1.0,1.0,1.0,1.0,0.000178,1.407164e-07,0.000217,0.000217
HighDensityCoverage-300-20210602-14:05:13-7377,1.0,1.0,1.0,2.0,8.9e-05,7.9153e-08,0.000163,0.000163
HighDensityCoverage-600-20210602-14:08:11-7798,1.0,1.0,1.0,3.0,8.9e-05,7.9153e-08,0.000163,0.000163
HighDensityCoverage-900-20210602-14:11:34-8271,1.0,1.0,1.0,4.0,0.000178,1.407164e-07,0.000217,0.000217
HighDensityCoverage-1200-20210602-14:14:43-8742,1.0,1.0,1.0,5.0,0.0,0.0,0.0,0.0
HighDensityCoverage-100-20210602-14:03:04-7075,1.0,1.0,2.0,1.0,0.000178,1.407164e-07,0.000217,0.000217
HighDensityCoverage-400-20210602-14:06:01-7493,1.0,1.0,2.0,2.0,8.9e-05,7.9153e-08,0.000163,0.000163
HighDensityCoverage-700-20210602-14:09:09-7934,1.0,1.0,2.0,3.0,0.000267,1.846903e-07,0.000249,0.000249
HighDensityCoverage-1000-20210602-14:12:30-8405,1.0,1.0,2.0,4.0,8.9e-05,7.9153e-08,0.000163,0.000163
HighDensityCoverage-1300-20210602-14:15:46-8889,1.0,1.0,2.0,5.0,0.000178,1.407164e-07,0.000217,0.000217


## Coverage results

Here we will print only the rows with a coverage mean > 0.99.

In [12]:
result = scalars_wide[scalars_wide['coveredUsersPercentMean'] >= 0.99]
sortedresult = result.sort_values(factors)
sortedresult

qname,R,T,m,D,coveredUsersPercentMean,coveredUsersPercentVariance,coveredUsersPercentNegerr,coveredUsersPercentPoserr
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
HighDensityCoverage-1360-20210602-14:16:02-8961,7.0,1.0,2.0,5.0,0.990036,2.036871e-05,0.002616,0.002616
HighDensityCoverage-5360-20210602-14:55:50-14661,7.0,4.0,3.0,3.0,0.991459,2.870615e-05,0.003106,0.003106
HighDensityCoverage-770-20210602-14:09:33-8000,8.0,1.0,2.0,3.0,0.991459,1.868011e-05,0.002505,0.002505
HighDensityCoverage-1370-20210602-14:16:18-9011,8.0,1.0,2.0,5.0,0.994395,2.287522e-05,0.002773,0.002773
HighDensityCoverage-1170-20210602-14:13:59-8646,8.0,1.0,3.0,4.0,0.995996,3.034198e-06,0.00101,0.00101
HighDensityCoverage-1470-20210602-14:17:20-9181,8.0,1.0,3.0,5.0,0.995285,9.155364e-06,0.001754,0.001754
HighDensityCoverage-1970-20210602-14:22:19-9903,8.0,2.0,2.0,2.0,0.991904,1.221595e-05,0.002026,0.002026
HighDensityCoverage-2270-20210602-14:25:29-10346,8.0,2.0,2.0,3.0,0.995107,6.024423e-06,0.001423,0.001423
HighDensityCoverage-2570-20210602-14:28:19-10750,8.0,2.0,2.0,4.0,0.99653,9.049826e-06,0.001744,0.001744
HighDensityCoverage-2870-20210602-14:31:29-11196,8.0,2.0,2.0,5.0,0.99484,1.333288e-05,0.002117,0.002117


In [14]:
for factor in factors:
    print('Minimize', factor)
    current = sortedresult[sortedresult[factor] == sortedresult[factor].min()]
    current = current.sort_values([factor, *[fac for fac in factors if fac != factor]])
    display(current)

Minimize R


qname,R,T,m,D,coveredUsersPercentMean,coveredUsersPercentVariance,coveredUsersPercentNegerr,coveredUsersPercentPoserr
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
HighDensityCoverage-1360-20210602-14:16:02-8961,7.0,1.0,2.0,5.0,0.990036,2e-05,0.002616,0.002616
HighDensityCoverage-5360-20210602-14:55:50-14661,7.0,4.0,3.0,3.0,0.991459,2.9e-05,0.003106,0.003106


Minimize T


qname,R,T,m,D,coveredUsersPercentMean,coveredUsersPercentVariance,coveredUsersPercentNegerr,coveredUsersPercentPoserr
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
HighDensityCoverage-1360-20210602-14:16:02-8961,7.0,1.0,2.0,5.0,0.990036,2e-05,0.002616,0.002616
HighDensityCoverage-770-20210602-14:09:33-8000,8.0,1.0,2.0,3.0,0.991459,1.9e-05,0.002505,0.002505
HighDensityCoverage-1370-20210602-14:16:18-9011,8.0,1.0,2.0,5.0,0.994395,2.3e-05,0.002773,0.002773
HighDensityCoverage-1170-20210602-14:13:59-8646,8.0,1.0,3.0,4.0,0.995996,3e-06,0.00101,0.00101
HighDensityCoverage-1470-20210602-14:17:20-9181,8.0,1.0,3.0,5.0,0.995285,9e-06,0.001754,0.001754
HighDensityCoverage-680-20210602-14:08:50-7894,9.0,1.0,1.0,3.0,0.990569,3.9e-05,0.003608,0.003608
HighDensityCoverage-980-20210602-14:12:06-8352,9.0,1.0,1.0,4.0,0.991548,5.9e-05,0.004445,0.004445
HighDensityCoverage-1280-20210602-14:15:35-8863,9.0,1.0,1.0,5.0,0.993594,3.7e-05,0.003513,0.003513
HighDensityCoverage-780-20210602-14:09:40-8016,9.0,1.0,2.0,3.0,0.99306,4e-06,0.001211,0.001211
HighDensityCoverage-1080-20210602-14:12:59-8506,9.0,1.0,2.0,4.0,0.996441,9e-06,0.001702,0.001702


Minimize m


qname,R,T,m,D,coveredUsersPercentMean,coveredUsersPercentVariance,coveredUsersPercentNegerr,coveredUsersPercentPoserr
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
HighDensityCoverage-680-20210602-14:08:50-7894,9.0,1.0,1.0,3.0,0.990569,3.9e-05,0.003608,0.003608
HighDensityCoverage-980-20210602-14:12:06-8352,9.0,1.0,1.0,4.0,0.991548,5.9e-05,0.004445,0.004445
HighDensityCoverage-1280-20210602-14:15:35-8863,9.0,1.0,1.0,5.0,0.993594,3.7e-05,0.003513,0.003513
HighDensityCoverage-690-20210602-14:08:59-7914,10.0,1.0,1.0,3.0,0.993327,2e-06,0.000815,0.000815
HighDensityCoverage-990-20210602-14:12:15-8372,10.0,1.0,1.0,4.0,0.995641,4e-06,0.001177,0.001177
HighDensityCoverage-1290-20210602-14:15:37-8871,10.0,1.0,1.0,5.0,0.996708,1.1e-05,0.001915,0.001915
HighDensityCoverage-2790-20210602-14:31:01-11125,10.0,2.0,1.0,5.0,0.992883,4.2e-05,0.003743,0.003743
HighDensityCoverage-4290-20210602-14:45:55-13239,10.0,3.0,1.0,5.0,0.992438,8.1e-05,0.005233,0.005233
HighDensityCoverage-6990-20210602-15:11:29-16981,10.0,5.0,1.0,4.0,0.992171,3.4e-05,0.003402,0.003402


Minimize D


qname,R,T,m,D,coveredUsersPercentMean,coveredUsersPercentVariance,coveredUsersPercentNegerr,coveredUsersPercentPoserr
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
HighDensityCoverage-1780-20210602-14:20:44-9668,9.0,2.0,3.0,1.0,0.992527,1.1e-05,0.001963,0.001963
HighDensityCoverage-3280-20210602-14:35:44-11799,9.0,3.0,3.0,1.0,0.991192,2.4e-05,0.002819,0.002819
HighDensityCoverage-4680-20210602-14:49:29-13749,9.0,4.0,2.0,1.0,0.990302,8e-06,0.001621,0.001621
HighDensityCoverage-4780-20210602-14:50:30-13893,9.0,4.0,3.0,1.0,0.990125,1e-05,0.001875,0.001875
HighDensityCoverage-6280-20210602-15:04:38-15936,9.0,5.0,3.0,1.0,0.991459,3.1e-05,0.003245,0.003245
HighDensityCoverage-1790-20210602-14:20:55-9692,10.0,2.0,3.0,1.0,0.990836,1.8e-05,0.002492,0.002492
HighDensityCoverage-3190-20210602-14:34:52-11675,10.0,3.0,2.0,1.0,0.99048,1.5e-05,0.002281,0.002281
HighDensityCoverage-3290-20210602-14:36:07-11847,10.0,3.0,3.0,1.0,0.99048,1.7e-05,0.002358,0.002358
HighDensityCoverage-4690-20210602-14:49:37-13769,10.0,4.0,2.0,1.0,0.993505,2.6e-05,0.002958,0.002958
HighDensityCoverage-4790-20210602-14:50:52-13943,10.0,4.0,3.0,1.0,0.994039,1.2e-05,0.002006,0.002006


## Observations

As we can see, we need at least R = 7m to get a mean coverage of 99%.

With R = 7m, the lowest config is: R=7m, T=1s, m=2, D=5s. D can be lowered to 3s if we increment T to 4s and m to 3.

With R = 8m we get a lot more possible configurations, but m needs to be at least 2 and D at least 3s. If we use R = 9m, we can have m = 1 and T = 1s but D must be at least 3s.

A "good" and "balanced" config, that does not use the value `1` for any of the parameters, is the one displayed below. From now on, for the high density scenario, we will consider the following as minimum parameters required to get the 99% coverage.

R = 8m

T = 2s

m = 2

D = 2s

In [15]:
display(sortedresult[(sortedresult['R'] == 8) & (sortedresult['T'] == 2) & (sortedresult['m'] == 2) & (sortedresult['D'] == 2)])

qname,R,T,m,D,coveredUsersPercentMean,coveredUsersPercentVariance,coveredUsersPercentNegerr,coveredUsersPercentPoserr
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
HighDensityCoverage-1970-20210602-14:22:19-9903,8.0,2.0,2.0,2.0,0.991904,1.2e-05,0.002026,0.002026
