# 2kr Analysis

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from itertools import product, chain, combinations
from scipy import stats
from IPython.display import display, HTML
%matplotlib inline

def parse_if_number(s):
    try: return float(s)
    except: return True if s=="true" else False if s=="false" else s if s else None

def parse_ndarray(s):
    return np.fromstring(s, sep=' ') if s else None

In [2]:
inputFile = '2kr-scalars-5rep.csv'
repetitionsCount = -1 # -1 = auto-detect
factors = ['R', 'T', 'm', 'D']

# Filter scalars
scalarsFilter = ['Floorplan.userCount', 'Floorplan.coveredUsers:sum', 'Floorplan.collisions:sum', 'Floorplan.activityTime:last']
# Performance indexes
perfIndexes = [
    ('Floorplan.collisions:sum', 'total number of collisions'),
]

## Load scalars

In [3]:
df = pd.read_csv('HighDensity/exported_data/' + inputFile, converters = {
    'attrvalue': parse_if_number,
    'binedges': parse_ndarray,
    'binvalues': parse_ndarray,
    'vectime': parse_ndarray,
    'vecvalue': parse_ndarray,
})

In [4]:
if repetitionsCount <= 0: # auto-detect
    repetitionsCount = int(df[df.attrname == 'repetition']['attrvalue'].max()) + 1
print('Repetitions:', repetitionsCount)

# Computed
factorsCount = len(factors)
configsCount = 2**factorsCount
totalSims = configsCount*repetitionsCount


display(HTML("<style>div.output_scroll { height: auto; max-height: 48em; }</style>"))
pd.set_option('display.max_rows', totalSims)
pd.set_option('display.max_columns', 2*configsCount*repetitionsCount+1)

Repetitions: 5


In [5]:
scalars = df[(df.type == 'scalar') | ((df.type == 'itervar') & (df.attrname != 'TO')) | ((df.type == 'param') & (df.attrname == 'Floorplan.userCount')) | ((df.type == 'runattr') & (df.attrname == 'repetition'))]
scalars = scalars.assign(qname = scalars.attrname.combine_first(scalars.module + '.' + scalars.name))
scalars.value = scalars.value.combine_first(scalars.attrvalue.astype('float64'))
scalars_wide = scalars.pivot_table(index=['run'], columns='qname', values='value')
scalars_wide.sort_values([*factors, 'repetition'], inplace=True)
count = 0
for index in scalars_wide.index:
    config = count // repetitionsCount
    scalars_wide.loc[index, 'config'] = config
    count += 1
scalars_wide = scalars_wide[['config', 'repetition', *factors, *scalarsFilter]]

In [6]:
# test data
test_data = [
    [110, 127, 124, 116, 121],
    [264, 275, 211, 211, 269],
    [187, 207, 188, 189, 183],
    [589, 541, 516, 445, 502],
    [52, 70, 82, 44, 50],
    [311, 156, 255, 264, 240],
    [125, 110, 89, 68, 70],
    [453, 546, 545, 508, 398],
    [120, 106, 116, 99, 106],
    [236, 276, 249, 226, 278],
    [200, 200, 193, 184, 199],
    [408, 559, 475, 539, 382],
    [46, 57, 51, 60, 66],
    [295, 260, 331, 245, 310],
    [122, 145, 139, 129, 139],
    [534, 472, 500, 491, 512],
]

In [7]:
combinedFactors = list(chain(*[list(map(''.join, combinations(''.join(factors), i))) for i in range(1, factorsCount+1)]))
cols = ['exp', 'I'] + combinedFactors
data = [[e, 1] + list(chain(*[list(map(np.prod, combinations(r, i))) for i in range(1, len(r)+1)])) for r,e in zip([list(x) for x in product([-1, 1], repeat=factorsCount)], range(0, configsCount))]
factorsMatrix = pd.DataFrame.from_records(data, columns=cols, index='exp')
for perfIndex, perfIndexDesc in perfIndexes:
    print("2^k*r analysis for the", perfIndexDesc)
    cols = ['exp'] + ['y' + str(i) for i in range(0, repetitionsCount)]
    data = [[i] + test_data[i] for i in range(0, configsCount)]
    ysMatrix = pd.DataFrame.from_records(data, columns=cols, index='exp')
    ysMatrix['mean'] = ysMatrix.mean(axis=1)
    for r in range(0, repetitionsCount):
        ysMatrix['e' + str(r)] = ysMatrix['y' + str(r)] - ysMatrix['mean']

    matrix = pd.concat([factorsMatrix, ysMatrix], axis=1)

    computeCols = ['idx', 'I'] + combinedFactors + ['e' + str(r) for r in range(0, repetitionsCount)]
    data = [['Sum'], ['Mean (Qi)'], ['SSQ']]
    SSE = 0
    for col in computeCols[1:]:
        sumvalue = np.sum(matrix[col].values * matrix['mean'].values)
        data[0].append(sumvalue)
        data[1].append(sumvalue / configsCount)
        if col in ['e' + str(r) for r in range(0, repetitionsCount)]:
            ssq = np.sum(matrix[col].values * matrix[col].values)
            data[2].append(ssq)
            SSE += ssq
        else:
            data[2].append('')
    computeMatrix = pd.DataFrame.from_records(data, columns=computeCols, index='idx')

    data = [['(2^k)*r*(qi)^2'], ['Variation'], ['Variation%']]
    SST = SSE
    for col in combinedFactors:
        qi = computeMatrix.loc['Mean (Qi)', col]
        r2kqi2 = configsCount*repetitionsCount*(qi**2)
        data[0].append(r2kqi2)
        SST += r2kqi2
    data[0].append(SSE)
    idx = 1
    for col in combinedFactors:
        r2kqi2 = data[0][idx]
        var = r2kqi2 / SST
        data[1].append(var)
        data[2].append('{:.2%}'.format(var))
        idx += 1
    data[1].append(SST)
    data[2].append('{:.2%}'.format(SSE/SST))
    resultMatrix = pd.DataFrame.from_records(data, columns=['idx'] + combinedFactors + ['SSE/SST'], index='idx')

    display(matrix)
    display(computeMatrix)
    display(resultMatrix)

    print('SSE =', SSE)
    print('SST =', SST)
    print('Unexplained Variation =', '{:.2%}'.format(SSE/SST))
    print('####################################')
    print()

2^k*r analysis for the total number of collisions


Unnamed: 0_level_0,I,R,T,m,D,RT,Rm,RD,Tm,TD,mD,RTm,RTD,RmD,TmD,RTmD,y0,y1,y2,y3,y4,mean,e0,e1,e2,e3,e4
exp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
0,1,-1,-1,-1,-1,1,1,1,1,1,1,-1,-1,-1,-1,1,110,127,124,116,121,119.6,-9.6,7.4,4.4,-3.6,1.4
1,1,-1,-1,-1,1,1,1,-1,1,-1,-1,-1,1,1,1,-1,264,275,211,211,269,246.0,18.0,29.0,-35.0,-35.0,23.0
2,1,-1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,1,-1,187,207,188,189,183,190.8,-3.8,16.2,-2.8,-1.8,-7.8
3,1,-1,-1,1,1,1,-1,-1,-1,-1,1,1,1,-1,-1,1,589,541,516,445,502,518.6,70.4,22.4,-2.6,-73.6,-16.6
4,1,-1,1,-1,-1,-1,1,1,-1,-1,1,1,1,-1,1,-1,52,70,82,44,50,59.6,-7.6,10.4,22.4,-15.6,-9.6
5,1,-1,1,-1,1,-1,1,-1,-1,1,-1,1,-1,1,-1,1,311,156,255,264,240,245.2,65.8,-89.2,9.8,18.8,-5.2
6,1,-1,1,1,-1,-1,-1,1,1,-1,-1,-1,1,1,-1,1,125,110,89,68,70,92.4,32.6,17.6,-3.4,-24.4,-22.4
7,1,-1,1,1,1,-1,-1,-1,1,1,1,-1,-1,-1,1,-1,453,546,545,508,398,490.0,-37.0,56.0,55.0,18.0,-92.0
8,1,1,-1,-1,-1,-1,-1,-1,1,1,1,1,1,1,-1,-1,120,106,116,99,106,109.4,10.6,-3.4,6.6,-10.4,-3.4
9,1,1,-1,-1,1,-1,-1,1,1,-1,-1,1,-1,-1,1,1,236,276,249,226,278,253.0,-17.0,23.0,-4.0,-27.0,25.0


Unnamed: 0_level_0,I,R,T,m,D,RT,Rm,RD,Tm,TD,mD,RTm,RTD,RmD,TmD,RTmD,e0,e1,e2,e3,e4
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Sum,3973.2,48.8,-237.2,1219.2,2057.6,138.4,-23.6,-17.2,-79.2,307.2,682.0,53.2,49.2,-144.8,11.6,-9.6,22725.6,56107.6,32540.2,-34509.4,-76864.0
Mean (Qi),248.325,3.05,-14.825,76.2,128.6,8.65,-1.475,-1.075,-4.95,19.2,42.625,3.325,3.075,-9.05,0.725,-0.6,1420.35,3506.725,2033.7625,-2156.8375,-4804.0
SSQ,,,,,,,,,,,,,,,,,18150.4,22987.4,6841.2,15578.4,19508.6


Unnamed: 0_level_0,R,T,m,D,RT,Rm,RD,Tm,TD,mD,RTm,RTD,RmD,TmD,RTmD,SSE/SST
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
(2^k)*r*(qi)^2,744.2,17582.45,464515.2,1323036.8,5985.8,174.05,92.45,1960.2,29491.2,145351.25,884.45,756.45,6552.2,42.05,28.8,83066.0
Variation,0.000358,0.008452,0.223296,0.635995,0.002877,0.000084,0.000044,0.000942,0.014177,0.069872,0.000425,0.000364,0.00315,0.00002,0.000014,2080263.55
Variation%,0.04%,0.85%,22.33%,63.60%,0.29%,0.01%,0.00%,0.09%,1.42%,6.99%,0.04%,0.04%,0.31%,0.00%,0.00%,3.99%


SSE = 83066.0
SST = 2080263.5499999998
Unexplained Variation = 3.99%
####################################

