In [1]:
import warnings
warnings.filterwarnings('ignore')
# For ignoring warnings printed by R magic commands

In [2]:
import os
import glob

In [3]:
# Import pandas
import pandas as pd

In [4]:
# Load R magic
%load_ext rpy2.ipython
# Import ggplot2
%R require(ggplot2);

In [5]:
# If experiment parameters change, modify this function
def AddParametersAsColumns(df, folder):
    expParams = folder.split('.')
    cloudParams = expParams[1].split('_')
    optimizerParams = expParams[2].split('_')
    df['groupSizeDist'] = cloudParams[9]
    df['placementDist'] = cloudParams[10]
    df['colocateNumHostsPerLeaf'] = 'uniform' if cloudParams[11] == '-1' else cloudParams[11]
    df['seed'] = int(cloudParams[14])
    df['algorithm'] = optimizerParams[1]
    df['numBitmaps'] = int(optimizerParams[2])
    df['numLeafsPerBitmap'] = int(optimizerParams[3])
    df['redundancyPerBitmap'] = int(optimizerParams[4])
    df['numRulesPerLeaf'] = int(optimizerParams[5])
    
# Turns all files of a given filname across a set of folders into a single dataframe
def DataSetAsDataFrame(filename, folders, headers, header=None):
    # Read dataset as dataframe
    def ReadDataSet(folder):
        df = pd.read_csv(folder + '/' + filename, sep=',', header=header, names=headers)
        AddParametersAsColumns(df, folder)
        return df
    # Get the list of dataframes
    dfs = map(ReadDataSet, folders)
    # Combine into a single dataframe
    df = pd.concat(dfs)
    return df

In [6]:
%%R 
plotTheme <- theme(legend.title=element_blank(), legend.position="top", legend.text=element_text(size=6))

In [7]:
directory = '/mnt/sdb1/baseerat/numerical-evals/11-29-2017/logs-1M/'
folders = glob.glob(directory + "logs*_0.*")

In [8]:
ruleCountPerLeaf = DataSetAsDataFrame('rule_count_per_leaf.csv', 
                                      folders, ['leafId', 'numRules'])

In [9]:
ruleCountPerLeaf.head(2)

Unnamed: 0,leafId,numRules,groupSizeDist,placementDist,colocateNumHostsPerLeaf,seed,algorithm,numBitmaps,numLeafsPerBitmap,redundancyPerBitmap,numRulesPerLeaf
0,0,10000,uniform,colocate-random-random,12,0,exact-match,10,3,0,10000
1,1,10000,uniform,colocate-random-random,12,0,exact-match,10,3,0,10000


In [10]:
plotRuleCountPerLeaf = ruleCountPerLeaf[ruleCountPerLeaf['numRulesPerLeaf'] != 32000]

In [12]:
plotRuleCountPerLeaf.to_csv(directory + 'rule_count_per_leaf.csv', index=False)

In [11]:
# %%R -i plotRuleCountPerLeaf
# plt <- ggplot(plotRuleCountPerLeaf) +
#    geom_boxplot(aes(y=numRules, x=factor(numBitmaps), 
#                     fill=interaction(algorithm, numLeafsPerBitmap, redundancyPerBitmap)),
#                 outlier.size=0.5) +
#    facet_grid(groupSizeDist * numRulesPerLeaf ~ colocateNumHostsPerLeaf) +
#    xlab("Number of bitmaps") +
#    ylab("Number of rules") +
#    plotTheme
# ggsave(plt, file="rule_count_per_leaf.pdf", height=5, width=7)

In [19]:
groupCountPerTenant = DataSetAsDataFrame('group_count_per_tenant.csv', 
                                         folders, ['tenantId', 'groupCount'])

In [22]:
groupCountPerTenant.head(2)

Unnamed: 0,tenantId,groupCount,groupSizeDist,placementDist,colocateNumHostsPerLeaf,seed,algorithm,numBitmaps,numLeafsPerBitmap,redundancyPerBitmap,numRulesPerLeaf
0,0,341,uniform,colocate-random-random,12,0,exact-match,10,3,0,10000
1,1,85,uniform,colocate-random-random,12,0,exact-match,10,3,0,10000


In [27]:
# groupCountPerTenant[
#     (groupCountPerTenant['algorithm'] == 'exact-match') &
#     (groupCountPerTenant['groupSizeDist'] == 'wve') &
#     (groupCountPerTenant['colocateNumHostsPerLeaf'] == '48') & 
#     (groupCountPerTenant['numBitmaps'] == 10) & 
#     (groupCountPerTenant['numLeafsPerBitmap'] == 3) &
#     (groupCountPerTenant['numRulesPerLeaf'] == 10000)
# ]['groupCount'].quantile([0.0, 0.5, 0.99, 0.999, 0.9999, 1.0])
groupCountPerTenant[
    (groupCountPerTenant['algorithm'] == 'exact-match') &
    (groupCountPerTenant['groupSizeDist'] == 'wve') &
    (groupCountPerTenant['colocateNumHostsPerLeaf'] == '48') & 
    (groupCountPerTenant['numBitmaps'] == 10) & 
    (groupCountPerTenant['numLeafsPerBitmap'] == 3) &
    (groupCountPerTenant['numRulesPerLeaf'] == 10000)
]['groupCount'].describe()

count    3000.000000
mean      332.833333
std       802.921490
min        18.000000
25%        85.000000
50%       179.000000
75%       337.000000
max      9272.000000
Name: groupCount, dtype: float64

In [28]:
vmCountPerTenant = DataSetAsDataFrame('vm_count_per_tenant.csv', 
                                      folders, 
                                      ['tenantId', 'vmCount'])

In [19]:
vmCountPerTenant.head(2)

Unnamed: 0,tenantId,vmCount,groupSizeDist,placementDist,colocateNumHostsPerLeaf,seed,algorithm,numBitmaps,numLeafsPerBitmap,redundancyPerBitmap,numRulesPerLeaf
0,0,184,uniform,colocate-random-random,12,0,exact-match,10,1,0,10000
1,1,46,uniform,colocate-random-random,12,0,exact-match,10,1,0,10000


In [32]:
# vmCountPerTenant[
#     (vmCountPerTenant['algorithm'] == 'exact-match') & 
#     (vmCountPerTenant['groupSizeDist'] == 'wve') & 
#     (vmCountPerTenant['colocateNumHostsPerLeaf'] == '24') & 
#     (vmCountPerTenant['numBitmaps'] == 10) & 
#     (vmCountPerTenant['numLeafsPerBitmap'] == 3) &
#     (vmCountPerTenant['numRulesPerLeaf'] == 10000)
# ]['vmCount'].quantile([0.0, 0.5, 0.99, 0.999, 0.9999, 1.0])
vmCountPerTenant[
    (vmCountPerTenant['algorithm'] == 'exact-match') & 
    (vmCountPerTenant['groupSizeDist'] == 'wve') & 
    (vmCountPerTenant['colocateNumHostsPerLeaf'] == '24') & 
    (vmCountPerTenant['numBitmaps'] == 10) & 
    (vmCountPerTenant['numLeafsPerBitmap'] == 3) &
    (vmCountPerTenant['numRulesPerLeaf'] == 10000)
]['vmCount'].describe()

count    3000.000000
mean      179.701000
std       432.859193
min        10.000000
25%        46.000000
50%        97.000000
75%       182.000000
max      4999.000000
Name: vmCount, dtype: float64

In [8]:
groupSizePerGroupPerTenant = DataSetAsDataFrame('group_size_per_group_per_tenant.csv', 
                                                folders, ['groupId', 'groupSize'])

In [12]:
groupSizePerGroupPerTenant.head(2)

Unnamed: 0,groupId,groupSize,groupSizeDist,placementDist,colocateNumHostsPerLeaf,seed,algorithm,numBitmaps,numLeafsPerBitmap,redundancyPerBitmap,numRulesPerLeaf
0,0,103,uniform,colocate-random-random,12,0,exact-match,10,3,0,10000
1,1,107,uniform,colocate-random-random,12,0,exact-match,10,3,0,10000


In [10]:
groupSizePerGroupPerTenant[
    (groupSizePerGroupPerTenant['algorithm'] == 'exact-match') &
    (groupSizePerGroupPerTenant['colocateNumHostsPerLeaf'] == '24') & 
    (groupSizePerGroupPerTenant['numBitmaps'] == 10) & 
    (groupSizePerGroupPerTenant['numLeafsPerBitmap'] == 3) &
    (groupSizePerGroupPerTenant['numRulesPerLeaf'] == 10000)
].groupby('groupSizeDist')['groupSize'].quantile(
    [0.0, 0.5, 0.8, 0.99, 0.991, 0.992, 0.993, 0.994, 0.995, 0.996, 0.999, 0.9999, 1.0])

groupSizeDist        
uniform        0.0000       5.000
               0.5000     141.000
               0.8000     977.000
               0.9900    4291.000
               0.9910    4346.000
               0.9920    4400.000
               0.9930    4458.000
               0.9940    4512.000
               0.9950    4566.000
               0.9960    4621.000
               0.9990    4834.000
               0.9999    4972.000
               1.0000    4999.000
wve            0.0000       5.000
               0.5000      12.000
               0.8000      61.000
               0.9900     390.000
               0.9910     417.000
               0.9920     457.000
               0.9930     520.000
               0.9940     694.000
               0.9950    2395.505
               0.9960    3146.000
               0.9990    4742.000
               0.9999    4962.000
               1.0000    4998.000
Name: groupSize, dtype: float64

In [11]:
plotGroupSizePerGroupPerTenant = groupSizePerGroupPerTenant[
    (groupSizePerGroupPerTenant['algorithm'] == 'exact-match') &
    (groupSizePerGroupPerTenant['colocateNumHostsPerLeaf'] == '24') & 
    (groupSizePerGroupPerTenant['numBitmaps'] == 10) & 
    (groupSizePerGroupPerTenant['numLeafsPerBitmap'] == 3) &
    (groupSizePerGroupPerTenant['numRulesPerLeaf'] == 10000)]

In [13]:
plotGroupSizePerGroupPerTenant.to_csv(directory + 'group_size_per_group_per_tenant.csv', index=False)

In [9]:
groupsCoveredWithBitmapsOnly = DataSetAsDataFrame('groups_covered_with_bitmaps_only.csv', 
                                                  folders, 
                                                  ['index', 'groupsCovered', 'groupsCoveredWithoutDefaultBitmap'],
                                                  header=0)

In [10]:
plotGroupsCoveredWithBitmapsOnly = groupsCoveredWithBitmapsOnly[
    groupsCoveredWithBitmapsOnly['numRulesPerLeaf'] != 32000]

In [11]:
plotGroupsCoveredWithBitmapsOnly.to_csv(directory + 'groups_covered_with_bitmaps_only.csv', index=False)

In [None]:
# %%R -i plotGroupsCoveredWithBitmapsOnly
# plt <- ggplot(plotGroupsCoveredWithBitmapsOnly) +
#    geom_bar(aes(y=groupsCoveredWithoutDefaultBitmap, x=factor(numBitmaps), 
#                 fill=interaction(algorithm, numLeafsPerBitmap, redundancyPerBitmap)),
#             stat="identity",position=position_dodge()) +
#    facet_grid(groupSizeDist * numRulesPerLeaf ~ colocateNumHostsPerLeaf) +
#    xlab("Number of bitmaps") +
#    ylab("Groups covered (without default bitmap)") +
#    plotTheme
# ggsave(plt, file="groups_covered_with_bitmaps_only.pdf")

In [30]:
# trafficOverhead = DataSetAsDataFrame('traffic_overhead.csv', 
#                                      folders, 
#                                      ['index', 'trafficOverhead'])

In [9]:
# %%R -i trafficOverhead
# ggplot(trafficOverhead) +
#    geom_bar(aes(y=trafficOverhead, x=factor(numBitmaps), 
#                 fill=interaction(algorithm, numLeafsPerBitmap, redundancyPerBitmap)),
#             stat="identity",position=position_dodge()) +
#    facet_grid(groupSizeDist * numRulesPerLeaf ~ colocateNumHostsPerLeaf) +
#    xlab("Number of bitmaps") +
#    ylab("Traffic Overhead") +
#    plotTheme

In [8]:
leafSpineTraffic = DataSetAsDataFrame('leaf_spine_traffic_per_group_per_tenant.csv', 
                                      folders, 
                                      ['groupId', 'multicastTraffic', 'unicastTraffic', 'overlayTraffic',
                                       'baseeratTraffic'],
                                       header=0)

In [9]:
leafSpineTraffic.head(2)

Unnamed: 0,groupId,multicastTraffic,unicastTraffic,overlayTraffic,baseeratTraffic,groupSizeDist,placementDist,colocateNumHostsPerLeaf,seed,algorithm,numBitmaps,numLeafsPerBitmap,redundancyPerBitmap,numRulesPerLeaf
0,0,121,412,238,121,uniform,colocate-random-random,12,0,exact-match,10,3,0,10000
1,1,125,428,246,125,uniform,colocate-random-random,12,0,exact-match,10,3,0,10000


In [10]:
# leafSpineTraffic.to_csv(directory + 'leaf_spine_traffic_per_group_per_tenant.csv', index=False)

In [11]:
leafSpineOverhead = leafSpineTraffic.groupby([
    'groupSizeDist', 'placementDist', 'colocateNumHostsPerLeaf', 'algorithm', 'numBitmaps', 'numLeafsPerBitmap', 
    'redundancyPerBitmap', 'numRulesPerLeaf', 'seed'])[
    ['unicastTraffic', 'multicastTraffic', 'baseeratTraffic', 'overlayTraffic']].agg('sum')
leafSpineOverhead.reset_index(inplace=True)

In [12]:
leafSpineOverhead.head(2)

Unnamed: 0,groupSizeDist,placementDist,colocateNumHostsPerLeaf,algorithm,numBitmaps,numLeafsPerBitmap,redundancyPerBitmap,numRulesPerLeaf,seed,unicastTraffic,multicastTraffic,baseeratTraffic,overlayTraffic
0,uniform,colocate-random-random,12,exact-match,10,3,0,10000,0,2345430212,664912636,3022179814,1325831272
1,uniform,colocate-random-random,12,exact-match,10,3,0,32000,0,2345430212,664912636,2524882904,1325831272


In [15]:
plotLeafSpineOverhead = leafSpineOverhead[leafSpineOverhead['numRulesPerLeaf'] != 32000]

In [16]:
plotLeafSpineOverhead.to_csv(directory + 'leaf_spine_overhead_per_group_per_tenant.csv', index=False)