In [1]:
import warnings
warnings.filterwarnings('ignore')
# For ignoring warnings printed by R magic commands

In [2]:
import os
import glob

In [3]:
# Import pandas
import pandas as pd

In [4]:
# Load R magic
%load_ext rpy2.ipython
# Import ggplot2
%R require(ggplot2);

In [5]:
import progressbar

def bar_range(x, desc):
    widgets = [
        '%s: ' % desc, progressbar.Percentage(),
        ' ', progressbar.Bar(),
        ' ', progressbar.ETA(),
    ]
    bar = progressbar.ProgressBar(widgets=widgets)
    if isinstance(x, range) or isinstance(x, list):
        return bar(x)
    else:
        return bar(range(x))

In [6]:
# If experiment parameters change, modify this function
def AddParametersAsColumns(df, folder):
    expParams = folder.split('/')[-1].split('.')
    
    cloudParams = expParams[1].split('_')
    df['groupSizeDist'] = cloudParams[10]
    df['placementDist'] = cloudParams[11]
    df['placementNumHostsPerLeaf'] = 'uniform' if cloudParams[12] == '-1' else cloudParams[12]
    df['seed'] = int(cloudParams[15])
    
    optimizerParams_0 = expParams[2].split('_')
    node_type_0 = optimizerParams_0[7]
    df['%sAlgorithm' % node_type_0] = optimizerParams_0[0]
    df['%sNumBitmaps' % node_type_0] = int(optimizerParams_0[1])
    df['%sNumNodesPerBitmap' % node_type_0] = int(optimizerParams_0[2])
    df['%sRedundancyPerBitmap' % node_type_0] = int(optimizerParams_0[3])
    df['%sNumRules' % node_type_0] = int(optimizerParams_0[4])
    
    if len(expParams) > 3:
        optimizerParams_1 = expParams[3].split('_')
        node_type_1 = optimizerParams_1[7]
        df['%sAlgorithm' % node_type_1] = optimizerParams_1[0]
        df['%sNumBitmaps' % node_type_1] = int(optimizerParams_1[1])
        df['%sNumNodesPerBitmap' % node_type_1] = int(optimizerParams_1[2])
        df['%sRedundancyPerBitmap' % node_type_1] = int(optimizerParams_1[3])
        df['%sNumRules' % node_type_1] = int(optimizerParams_1[4])
    
# Turns all files of a given filname across a set of folders into a single dataframe
def DataSetAsDataFrame(filename, folders, headers, header=None, reset_index=True):
    # Read dataset as dataframe
    def ReadDataSet(folder):
        df = pd.read_csv(folder + '/' + filename, sep=',', header=header, names=headers)
        AddParametersAsColumns(df, folder)
        return df
    # Get the list of dataframes
    dfs = map(ReadDataSet, folders)
    # Combine into a single dataframe
    df = pd.concat(dfs)
    if reset_index:
        df.reset_index(inplace=True)
        df.drop('index', axis=1, inplace=True)
    return df

In [7]:
%%R 
plotTheme <- theme(legend.title=element_blank(), legend.position="top", legend.text=element_text(size=6))

In [10]:
directory = '/mnt/sdb1/baseerat/numerical-evals/12-11-2017/logs-1M/'
folders = glob.glob(directory + "logs.*_0.*_pods")
leafs_folders = glob.glob(directory + "logs.*_0.*_leafs*")

In [9]:
vmCountPerTenant = DataSetAsDataFrame('vm_count_per_tenant.csv', 
                                      folders, 
                                      ['vmCount'])

In [10]:
vmCountPerTenant.head(2)

Unnamed: 0,vmCount,groupSizeDist,placementDist,placementNumHostsPerLeaf,seed,podsAlgorithm,podsNumBitmaps,podsNumNodesPerBitmap,podsRedundancyPerBitmap,podsNumRules
0,184,uniform,colocate-uniform,uniform,0,exact-match,1,3,0,10000
1,46,uniform,colocate-uniform,uniform,0,exact-match,1,3,0,10000


In [10]:
criterion = ~((vmCountPerTenant['groupSizeDist'] == 'wve') & 
              (vmCountPerTenant['placementDist'] == 'colocate-uniform') & 
              (vmCountPerTenant['placementNumHostsPerLeaf'] == 'uniform') & 
              (vmCountPerTenant['podsAlgorithm'] == 'exact-match') & 
              (vmCountPerTenant['podsNumBitmaps'] == 1) & 
              (vmCountPerTenant['podsNumNodesPerBitmap'] == 3) &
              (vmCountPerTenant['podsRedundancyPerBitmap'] == 0) &
              (vmCountPerTenant['podsNumRules'] == 10000))
vmCountPerTenant.drop(vmCountPerTenant.index[criterion], inplace=True)
vmCountPerTenant.drop(['groupSizeDist', 'placementDist', 'placementNumHostsPerLeaf',
                      'podsAlgorithm', 'podsNumBitmaps', 'podsNumNodesPerBitmap', 'podsRedundancyPerBitmap',
                      'podsNumRules'], axis=1, inplace=True)

In [11]:
# vmCountPerTenant['vmCount'].quantile([0.0, 0.5, 0.99, 0.999, 0.9999, 1.0])
vmCountPerTenant['vmCount'].describe()

count    9000.000000
mean      178.771111
std       409.708421
min        10.000000
25%        46.000000
50%        97.000000
75%       183.000000
max      4999.000000
Name: vmCount, dtype: float64

In [12]:
vmCountPerTenant.to_csv(directory + 'vm_count_per_tenant.csv', index=False)

In [13]:
groupCountPerTenant = DataSetAsDataFrame('group_count_per_tenant.csv', 
                                         folders, ['groupCount'])

In [14]:
criterion = ~((groupCountPerTenant['groupSizeDist'] == 'wve') & 
              (groupCountPerTenant['placementDist'] == 'colocate-uniform') & 
              (groupCountPerTenant['placementNumHostsPerLeaf'] == 'uniform') & 
              (groupCountPerTenant['podsAlgorithm'] == 'exact-match') & 
              (groupCountPerTenant['podsNumBitmaps'] == 1) & 
              (groupCountPerTenant['podsNumNodesPerBitmap'] == 3) &
              (groupCountPerTenant['podsRedundancyPerBitmap'] == 0) &
              (groupCountPerTenant['podsNumRules'] == 10000))
groupCountPerTenant.drop(groupCountPerTenant.index[criterion], inplace=True)
groupCountPerTenant.drop(['groupSizeDist', 'placementDist', 'placementNumHostsPerLeaf',
                          'podsAlgorithm', 'podsNumBitmaps', 'podsNumNodesPerBitmap', 'podsRedundancyPerBitmap',
                          'podsNumRules'], axis=1, inplace=True)

In [15]:
# groupCountPerTenant['groupCount'].quantile([0.0, 0.5, 0.99, 0.999, 0.9999, 1.0])
groupCountPerTenant['groupCount'].describe()

count    9000.000000
mean      332.835111
std       761.993524
min        18.000000
25%        85.000000
50%       181.000000
75%       343.000000
max      9272.000000
Name: groupCount, dtype: float64

In [16]:
groupCountPerTenant.to_csv(directory + 'group_count_per_tenant.csv', index=False)

In [9]:
_folders = glob.glob(directory + "logs.*_0.random-fuzzy-match_*_pods")

In [10]:
groupSizePerGroupPerTenant = DataSetAsDataFrame('group_size_per_group_per_tenant.csv', 
                                                _folders, ['groupSize'])

In [11]:
len(groupSizePerGroupPerTenant)

143784000

In [18]:
criterion = ~((groupSizePerGroupPerTenant['placementDist'] == 'colocate-uniform') & 
              (groupSizePerGroupPerTenant['placementNumHostsPerLeaf'] == 'uniform') & 
              (groupSizePerGroupPerTenant['podsAlgorithm'] == 'exact-match') & 
              (groupSizePerGroupPerTenant['podsNumBitmaps'] == 1) & 
              (groupSizePerGroupPerTenant['podsNumNodesPerBitmap'] == 3) &
              (groupSizePerGroupPerTenant['podsRedundancyPerBitmap'] == 0) &
              (groupSizePerGroupPerTenant['podsNumRules'] == 10000))
groupSizePerGroupPerTenant.drop(groupSizePerGroupPerTenant.index[criterion], inplace=True)
groupSizePerGroupPerTenant.drop(['placementDist', 'placementNumHostsPerLeaf',
                                 'podsAlgorithm', 'podsNumBitmaps', 'podsNumNodesPerBitmap', 
                                 'podsRedundancyPerBitmap', 'podsNumRules'], axis=1, inplace=True)

In [19]:
groupSizePerGroupPerTenant.groupby('groupSizeDist')['groupSize'].quantile(
    [0.0, 0.5, 0.8, 0.99, 0.991, 0.992, 0.993, 0.994, 0.995, 0.996, 0.999, 0.9999, 1.0])

groupSizeDist        
uniform        0.0000       5.000
               0.5000     138.000
               0.8000     823.000
               0.9900    4029.000
               0.9910    4087.000
               0.9920    4149.000
               0.9930    4214.000
               0.9940    4283.000
               0.9950    4355.000
               0.9960    4437.000
               0.9990    4732.000
               0.9999    4940.000
               1.0000    4999.000
wve            0.0000       5.000
               0.5000      12.000
               0.8000      54.000
               0.9900     371.000
               0.9910     401.000
               0.9920     439.000
               0.9930     502.000
               0.9940     652.000
               0.9950    1684.425
               0.9960    2648.000
               0.9990    4503.000
               0.9999    4931.000
               1.0000    4998.000
Name: groupSize, dtype: float64

In [20]:
groupSizePerGroupPerTenant.to_csv(directory + 'group_size_per_group_per_tenant.csv', index=False)

In [10]:
_folders = glob.glob(directory + "logs.*_0.*_pods") + glob.glob(directory + "logs.*_1.*_pods")

In [11]:
leafCountPerGroupPerTenant = DataSetAsDataFrame('leaf_count_per_group_per_tenant.csv', 
                                                _folders, ['leafCount'])

In [12]:
criterion = ~((leafCountPerGroupPerTenant['podsAlgorithm'] == 'exact-match') & 
              (leafCountPerGroupPerTenant['podsNumBitmaps'] == 1) & 
              (leafCountPerGroupPerTenant['podsNumNodesPerBitmap'] == 3) &
              (leafCountPerGroupPerTenant['podsRedundancyPerBitmap'] == 0) &
              (leafCountPerGroupPerTenant['podsNumRules'] == 10000))
leafCountPerGroupPerTenant.drop(leafCountPerGroupPerTenant.index[criterion], inplace=True)
leafCountPerGroupPerTenant.drop(['podsAlgorithm', 'podsNumBitmaps', 'podsNumNodesPerBitmap', 
                                 'podsRedundancyPerBitmap', 'podsNumRules'], axis=1, inplace=True)

In [13]:
# leafCountPerGroupPerTenant.groupby(['groupSizeDist', 'placementDist', 'placementNumHostsPerLeaf'])[
#     'leafCount'].quantile([0.0, 0.5, 0.8, 0.99, 0.999, 0.9999, 1.0])
leafCountPerGroupPerTenant.groupby(['groupSizeDist', 'placementDist', 'placementNumHostsPerLeaf'])[
    'leafCount'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,mean,std,min,25%,50%,75%,max
groupSizeDist,placementDist,placementNumHostsPerLeaf,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
uniform,colocate-colocate-uniform,12,1997050.0,13.408252,14.204431,1.0,3.0,6.0,23.0,46.0
uniform,colocate-colocate-uniform,24,1997050.0,13.334334,14.09936,1.0,3.0,6.0,23.0,59.0
uniform,colocate-colocate-uniform,48,1997050.0,13.234965,13.985526,1.0,3.0,6.0,22.0,46.0
uniform,colocate-uniform,uniform,1997050.0,51.158862,32.715848,3.0,32.0,46.0,48.0,192.0
wve,colocate-colocate-uniform,12,1997050.0,10.489194,11.376734,1.0,3.0,5.0,14.0,47.0
wve,colocate-colocate-uniform,24,1997050.0,10.523424,11.373267,1.0,3.0,5.0,14.0,45.0
wve,colocate-colocate-uniform,48,1997050.0,10.553487,11.499393,1.0,3.0,5.0,14.0,62.0
wve,colocate-uniform,uniform,1997050.0,21.316959,23.384927,2.0,7.0,10.0,26.0,240.0


In [14]:
leafCountPerGroupPerTenant.to_csv(directory + 'leaf_count_per_group_per_tenant.csv', index=False)

In [8]:
_folders = glob.glob(directory + "logs.*_0.*_pods") + glob.glob(directory + "logs.*_1.*_pods")

In [None]:
podCountPerGroupPerTenant = DataSetAsDataFrame('pod_count_per_group_per_tenant.csv', 
                                                _folders, ['podCount'])

In [10]:
criterion = ~((podCountPerGroupPerTenant['podsAlgorithm'] == 'exact-match') & 
              (podCountPerGroupPerTenant['podsNumBitmaps'] == 1) & 
              (podCountPerGroupPerTenant['podsNumNodesPerBitmap'] == 3) &
              (podCountPerGroupPerTenant['podsRedundancyPerBitmap'] == 0) &
              (podCountPerGroupPerTenant['podsNumRules'] == 10000))
podCountPerGroupPerTenant.drop(podCountPerGroupPerTenant.index[criterion], inplace=True)
podCountPerGroupPerTenant.drop(['podsAlgorithm', 'podsNumBitmaps', 'podsNumNodesPerBitmap', 
                                'podsRedundancyPerBitmap', 'podsNumRules'], axis=1, inplace=True)

In [11]:
# podCountPerGroupPerTenant.groupby(['groupSizeDist', 'placementDist', 'placementNumHostsPerLeaf'])[
#     'podCount'].quantile([0.0, 0.5, 0.8, 0.99, 0.999, 0.9999, 1.0])
podCountPerGroupPerTenant.groupby(['groupSizeDist', 'placementDist', 'placementNumHostsPerLeaf'])[
    'podCount'].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,mean,std,min,25%,50%,75%,max
groupSizeDist,placementDist,placementNumHostsPerLeaf,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
uniform,colocate-colocate-uniform,12,1997050.0,1.016901,0.151501,1.0,1.0,1.0,1.0,3.0
uniform,colocate-colocate-uniform,24,1997050.0,1.016685,0.177343,1.0,1.0,1.0,1.0,3.0
uniform,colocate-colocate-uniform,48,1997050.0,1.009034,0.094619,1.0,1.0,1.0,1.0,2.0
uniform,colocate-uniform,uniform,1997050.0,1.297471,0.60673,1.0,1.0,1.0,1.0,4.0
wve,colocate-colocate-uniform,12,1997050.0,1.005387,0.073198,1.0,1.0,1.0,1.0,2.0
wve,colocate-colocate-uniform,24,1997050.0,1.01529,0.122702,1.0,1.0,1.0,1.0,2.0
wve,colocate-colocate-uniform,48,1997050.0,1.019931,0.16832,1.0,1.0,1.0,1.0,3.0
wve,colocate-uniform,uniform,1997050.0,1.28737,0.605536,1.0,1.0,1.0,1.0,5.0


In [12]:
podCountPerGroupPerTenant.to_csv(directory + 'pod_count_per_group_per_tenant.csv', index=False)

In [12]:
_folders = glob.glob(directory + "logs.*_0.random-fuzzy-match_*_pods") # + glob.glob(directory + "logs.*_1.*_pods")

In [13]:
podsAlgorithmElapseTime = DataSetAsDataFrame('pods_algorithm_elapse_time.csv', 
                                              _folders, ['elapseTime'])

In [14]:
len(podsAlgorithmElapseTime)

143784000

In [14]:
podsAlgorithmElapseTime.head(2)

Unnamed: 0,elapseTime,groupSizeDist,placementDist,placementNumHostsPerLeaf,seed,podsAlgorithm,podsNumBitmaps,podsNumNodesPerBitmap,podsRedundancyPerBitmap,podsNumRules
0,1.4e-05,uniform,colocate-uniform,uniform,0,exact-match,1,3,0,10000
1,4e-06,uniform,colocate-uniform,uniform,0,exact-match,1,3,0,10000


In [11]:
podsAlgorithmElapseTime.drop(['groupSizeDist', 'placementDist', 'placementNumHostsPerLeaf'], axis=1, inplace=True)

In [14]:
# podsAlgorithmElapseTime.groupby(['podsAlgorithm', 'podsNumBitmaps', 'podsNumNodesPerBitmap', 
#                                 'podsRedundancyPerBitmap', 'podsNumRules'])['elapseTime'].describe()
podsAlgorithmElapseTime.groupby('podsAlgorithm')['elapseTime'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
podsAlgorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
exact-match,95858400.0,3.671867e-07,1.5e-05,4.377216e-08,1.131557e-07,1.178123e-07,1.378357e-07,0.024256
random-fuzzy-match,287575200.0,6.035337e-07,3.2e-05,4.284084e-08,1.136214e-07,1.178123e-07,1.448207e-07,0.20699


In [None]:
podsAlgorithmElapseTime.to_csv(directory + 'pods_algorithm_elapse_time.csv', index=False)

In [15]:
podsAlgorithmElapseTime['groupSize'] = groupSizePerGroupPerTenant['groupSize']

In [16]:
podsAlgorithmElapseTimeTrim = podsAlgorithmElapseTime[
    (((podsAlgorithmElapseTime['placementDist'] == 'colocate-uniform') &
      (podsAlgorithmElapseTime['placementNumHostsPerLeaf'] == 'uniform')) |
     ((podsAlgorithmElapseTime['placementDist'] == 'colocate-colocate-uniform') &
      (podsAlgorithmElapseTime['placementNumHostsPerLeaf'] == '12'))) &
    (podsAlgorithmElapseTime['podsNumRules'] == 64000)
]

In [17]:
podsAlgorithmElapseTimeTrim['placementNumHostsPerLeaf'].unique()

array(['12', 'uniform'], dtype=object)

In [18]:
podsAlgorithmElapseTimeTrim.to_csv(directory + 'pods_algorithm_elapse_time_trim.csv', index=False)

In [None]:
# @Lalith

In [13]:
# podsAlgorithmElapseTime = pd.read_csv(directory + 'pods_algorithm_elapse_time.csv')

In [17]:
# podsAlgorithmElapseTimeTrim = podsAlgorithmElapseTime[
#     (((podsAlgorithmElapseTime['placementDist'] == 'colocate-uniform') &
#       (podsAlgorithmElapseTime['placementNumHostsPerLeaf'] == 'uniform')) |
#      ((podsAlgorithmElapseTime['placementDist'] == 'colocate-colocate-uniform') &
#       (podsAlgorithmElapseTime['placementNumHostsPerLeaf'] == '12'))) &
#     (podsAlgorithmElapseTime['seed'] == 0) &
#     (podsAlgorithmElapseTime['podsAlgorithm'] == 'random-fuzzy-match') &
#     (podsAlgorithmElapseTime['podsNumRules'] == 64000)
# ]

In [None]:
# podsAlgorithmElapseTimeTrim['podsNumRules'].unique()

In [None]:
# podsAlgorithmElapseTimeTrim.to_csv(directory + 'pods_algorithm_elapse_time_trim.csv', index=False)

In [26]:
_leafs_folders = (glob.glob(directory + "logs.*_0.*.random-fuzzy-match_*_3_0_*_2_3_leafs_*") +
                  glob.glob(directory + "logs.*_0.*.random-fuzzy-match_*_3_6_*_2_3_leafs_*") +
                  glob.glob(directory + "logs.*_0.*.random-fuzzy-match_*_3_12_*_2_3_leafs_*"))

In [27]:
leafsAlgorithmElapseTime = DataSetAsDataFrame('leafs_algorithm_elapse_time.csv', 
                                              _leafs_folders, ['elapseTime'])

In [28]:
len(leafsAlgorithmElapseTime)

143784000

In [11]:
leafsAlgorithmElapseTime.head(2)

Unnamed: 0,elapseTime,groupSizeDist,placementDist,placementNumHostsPerLeaf,seed,podsAlgorithm,podsNumBitmaps,podsNumNodesPerBitmap,podsRedundancyPerBitmap,podsNumRules,leafsAlgorithm,leafsNumBitmaps,leafsNumNodesPerBitmap,leafsRedundancyPerBitmap,leafsNumRules
0,1.6e-05,uniform,colocate-colocate-uniform,12,0,random-fuzzy-match,2,3,6,64000,exact-match,10,3,0,10000
1,4e-06,uniform,colocate-colocate-uniform,12,0,random-fuzzy-match,2,3,6,64000,exact-match,10,3,0,10000


In [None]:
leafsAlgorithmElapseTime.drop(['groupSizeDist', 'placementDist', 'placementNumHostsPerLeaf',
                               'podsAlgorithm', 'podsNumBitmaps', 'podsNumNodesPerBitmap', 
                               'podsRedundancyPerBitmap', 'podsNumRules'], axis=1, inplace=True)

In [12]:
leafsAlgorithmElapseTime.groupby('leafsAlgorithm')['elapseTime'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
leafsAlgorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
exact-match,47928000.0,1.2e-05,8.6e-05,4.377216e-08,5.122274e-08,1.178123e-07,1.9e-05,0.018701
random-fuzzy-match,239640000.0,0.000109,0.000296,4.377216e-08,5.215406e-08,1.187436e-07,0.000117,0.030002


In [12]:
leafsAlgorithmElapseTime.to_csv(directory + 'leafs_algorithm_elapse_time.csv', index=False)

In [29]:
leafsAlgorithmElapseTime['groupSize'] = groupSizePerGroupPerTenant['groupSize']

In [30]:
leafsAlgorithmElapseTimeTrim = leafsAlgorithmElapseTime[
    (((leafsAlgorithmElapseTime['placementDist'] == 'colocate-uniform') &
      (leafsAlgorithmElapseTime['placementNumHostsPerLeaf'] == 'uniform')) |
     ((leafsAlgorithmElapseTime['placementDist'] == 'colocate-colocate-uniform') &
      (leafsAlgorithmElapseTime['placementNumHostsPerLeaf'] == '12'))) &
    (leafsAlgorithmElapseTime['podsNumRules'] == 64000)
]

In [31]:
leafsAlgorithmElapseTimeTrim['placementNumHostsPerLeaf'].unique()

array(['12', 'uniform'], dtype=object)

In [32]:
leafsAlgorithmElapseTimeTrim.to_csv(directory + 'leafs_algorithm_elapse_time_trim.csv', index=False)

In [35]:
leafsAlgorithmElapseTimeTrim[leafsAlgorithmElapseTimeTrim['groupSize'] > 100][['groupSize', 'elapseTime']].head(10)

Unnamed: 0,groupSize,elapseTime
0,103,1.8e-05
1,107,4e-06
3,124,3e-06
5,176,4e-06
7,148,3e-06
10,122,4e-06
12,153,4e-06
18,183,3e-06
20,118,4e-06
22,174,3e-06


In [None]:
# @Lalith

In [10]:
# leafsAlgorithmElapseTime = pd.read_csv(directory + 'leafs_algorithm_elapse_time.csv')

In [12]:
# leafsAlgorithmElapseTimeTrim = leafsAlgorithmElapseTime[
#     (((leafsAlgorithmElapseTime['placementDist'] == 'colocate-uniform') &
#       (leafsAlgorithmElapseTime['placementNumHostsPerLeaf'] == 'uniform')) |
#      ((leafsAlgorithmElapseTime['placementDist'] == 'colocate-colocate-uniform') &
#       (leafsAlgorithmElapseTime['placementNumHostsPerLeaf'] == '12'))) &
#     (leafsAlgorithmElapseTime['seed'] == 0) &
#     (leafsAlgorithmElapseTime['leafsAlgorithm'] == 'random-fuzzy-match') &
#     (leafsAlgorithmElapseTime['leafsNumRules'] == 64000)
# ]

In [13]:
# leafsAlgorithmElapseTimeTrim.to_csv(directory + 'leafs_algorithm_elapse_time_trim.csv', index=False)

In [8]:
groupsCoveredWithBitmapsOnlyForPods = DataSetAsDataFrame('groups_covered_with_bitmaps_only_for_pods.csv', 
                                                         folders, 
                                                         ['groupsCovered', 'groupsCoveredWithoutDefaultBitmap'],
                                                         header=0)

In [9]:
# %%R -i groupsCoveredWithBitmapsOnlyForPods
# ggplot(groupsCoveredWithBitmapsOnlyForPods) +
#    geom_bar(aes(y=groupsCoveredWithoutDefaultBitmap, x=factor(podsNumBitmaps), 
#                 fill=interaction(podsAlgorithm, podsNumNodesPerBitmap, podsRedundancyPerBitmap)),
#             stat="identity",position=position_dodge()) +
#    facet_grid(groupSizeDist * podsNumRules ~ factor(placementNumHostsPerLeaf, levels=c('uniform','12','24','48'))) +
#    xlab("Number of bitmaps") +
#    ylab("Groups covered (without default bitmap)") +
#    plotTheme

In [10]:
groupsCoveredWithBitmapsOnlyForPods.to_csv(directory + 'groups_covered_with_bitmaps_only_for_pods.csv', index=False)

In [11]:
groupsCoveredWithBitmapsOnlyForLeafs = DataSetAsDataFrame('groups_covered_with_bitmaps_only_for_leafs.csv', 
                                                          leafs_folders, 
                                                          ['groupsCovered', 'groupsCoveredWithoutDefaultBitmap'],
                                                          header=0)

In [12]:
# %%R -i groupsCoveredWithBitmapsOnlyForLeafs
# ggplot(groupsCoveredWithBitmapsOnlyForLeafs) +
#    geom_bar(aes(y=groupsCoveredWithoutDefaultBitmap, x=factor(leafsNumBitmaps), 
#                 fill=interaction(leafsAlgorithm, leafsNumNodesPerBitmap, leafsRedundancyPerBitmap)),
#             stat="identity",position=position_dodge()) +
#    facet_grid(groupSizeDist * leafsNumRules ~ factor(placementNumHostsPerLeaf, levels=c('uniform','12','24','48'))) +
#    xlab("Number of bitmaps") +
#    ylab("Groups covered (without default bitmap)") +
#    plotTheme

In [13]:
groupsCoveredWithBitmapsOnlyForLeafs.to_csv(directory + 'groups_covered_with_bitmaps_only_for_leafs.csv', index=False)

In [14]:
ruleCountForPods = DataSetAsDataFrame('rule_count_for_pods.csv', 
                                      folders, ['numRules'])

In [15]:
# %%R -i ruleCountForPods
# ggplot(ruleCountForPods) +
#     geom_boxplot(aes(y=numRules, x=factor(podsNumBitmaps), 
#                      fill=interaction(podsAlgorithm, podsNumNodesPerBitmap, podsRedundancyPerBitmap)),
#                  outlier.size=0.5) +
#     facet_grid(groupSizeDist * podsNumRules ~ factor(placementNumHostsPerLeaf, levels=c('uniform','12','24','48'))) +
#     xlab("Number of bitmaps") +
#     ylab("Number of rules") +
#     plotTheme

In [16]:
ruleCountForPods.to_csv(directory + 'rule_count_for_pods.csv', index=False)

In [17]:
ruleCountForLeafs = DataSetAsDataFrame('rule_count_for_leafs.csv', 
                                       leafs_folders, ['numRules'])

In [18]:
# %%R -i ruleCountForLeafs
# ggplot(ruleCountForLeafs) +
#     geom_boxplot(aes(y=numRules, x=factor(leafsNumBitmaps), 
#                      fill=interaction(leafsAlgorithm, leafsNumNodesPerBitmap, leafsRedundancyPerBitmap)),
#                  outlier.size=0.5) +
#     facet_grid(groupSizeDist * leafsNumRules ~ factor(placementNumHostsPerLeaf, levels=c('uniform','12','24','48'))) +
#     xlab("Number of bitmaps") +
#     ylab("Number of rules") +
#     plotTheme

In [19]:
ruleCountForLeafs.to_csv(directory + 'rule_count_for_leafs.csv', index=False)

In [8]:
# trafficOverheadPerGroupPerTenantForPods = DataSetAsDataFrame(
#     'traffic_overhead_per_group_per_tenant_for_pods.csv', folders, ['trafficOverhead'])

In [9]:
# trafficOverheadPerGroupPerTenantForPods = trafficOverheadPerGroupPerTenantForPods.groupby(
#     ['groupSizeDist', 'placementDist', 'placementNumHostsPerLeaf', 'seed',
#      'podsAlgorithm', 'podsNumBitmaps', 'podsNumNodesPerBitmap', 'podsRedundancyPerBitmap',
#     'podsNumRules']).mean()

In [10]:
# trafficOverheadPerGroupPerTenantForPods = trafficOverheadPerGroupPerTenantForPods.reset_index()

In [11]:
# %%R -i trafficOverheadPerGroupPerTenantForPods
# ggplot(trafficOverheadPerGroupPerTenantForPods) +
#     geom_boxplot(aes(y=trafficOverhead, x=factor(podsNumBitmaps), 
#                      fill=interaction(podsAlgorithm, podsNumNodesPerBitmap, podsRedundancyPerBitmap))) +
#     facet_grid(groupSizeDist * podsNumRules ~ factor(placementNumHostsPerLeaf, levels=c('uniform','12','24','48'))) +
#     xlab("Number of bitmaps") +
#     ylab("Traffic overhead per group") +
#     plotTheme

In [12]:
# trafficOverheadPerGroupPerTenantForLeafs = DataSetAsDataFrame(
#     'traffic_overhead_per_group_per_tenant_for_leafs.csv', leafs_folders, ['trafficOverhead'])

In [13]:
# trafficOverheadPerGroupPerTenantForLeafs = trafficOverheadPerGroupPerTenantForLeafs.groupby(
#     ['groupSizeDist', 'placementDist', 'placementNumHostsPerLeaf', 'seed',
#      'podsAlgorithm', 'podsNumBitmaps', 'podsNumNodesPerBitmap', 'podsRedundancyPerBitmap',
#      'podsNumRules', 
#      'leafsAlgorithm', 'leafsNumBitmaps', 'leafsNumNodesPerBitmap', 'leafsRedundancyPerBitmap',
#      'leafsNumRules']).mean()

In [14]:
# trafficOverheadPerGroupPerTenantForLeafs = trafficOverheadPerGroupPerTenantForLeafs.reset_index()

In [15]:
# %%R -i trafficOverheadPerGroupPerTenantForLeafs
# ggplot(trafficOverheadPerGroupPerTenantForLeafs) +
#     geom_boxplot(aes(y=trafficOverhead, x=factor(leafsNumBitmaps), 
#                      fill=interaction(leafsAlgorithm, leafsNumNodesPerBitmap, leafsRedundancyPerBitmap))) +
#     facet_grid(groupSizeDist * leafsNumRules ~ factor(placementNumHostsPerLeaf, levels=c('uniform','12','24','48'))) +
#     xlab("Number of bitmaps") +
#     ylab("Traffic overhead per group") +
#     plotTheme

In [None]:
# trafficPerGroupPerTenant = DataSetAsDataFrame('traffic_per_group_per_tenant.csv', 
#                                               leafs_folders, 
#                                               ['multicastTraffic', 'unicastTraffic', 
#                                                'overlayTraffic', 'baseeratTraffic'],
#                                               header=0)

In [None]:
# trafficPerGroupPerTenant = trafficPerGroupPerTenant.groupby(
#     ['groupSizeDist', 'placementDist', 'placementNumHostsPerLeaf', 'seed',
#      'podsAlgorithm', 'podsNumBitmaps', 'podsNumNodesPerBitmap', 'podsRedundancyPerBitmap', 'podsNumRules', 
#      'leafsAlgorithm', 'leafsNumBitmaps', 'leafsNumNodesPerBitmap', 'leafsRedundancyPerBitmap', 'leafsNumRules'])[
#     ['unicastTraffic', 'multicastTraffic', 'baseeratTraffic', 'overlayTraffic']].agg('sum')
# trafficPerGroupPerTenant.reset_index(inplace=True)

In [None]:
# trafficPerGroupPerTenant.to_csv(directory + 'traffic_per_group_per_tenant.csv', index=False)

In [18]:
_leafs_folders_0 = glob.glob(directory + "logs.*_0.*_leafs*")

In [None]:
trafficPerGroupPerTenant_0 = DataSetAsDataFrame('traffic_per_group_per_tenant.csv', 
                                                _leafs_folders_0, 
                                                ['multicastTraffic', 'unicastTraffic', 
                                                 'overlayTraffic', 'baseeratTraffic'],
                                                header=0, reset_index=False)

In [10]:
trafficPerGroupPerTenant_0 = trafficPerGroupPerTenant_0.groupby(
    ['groupSizeDist', 'placementDist', 'placementNumHostsPerLeaf', 'seed',
     'podsAlgorithm', 'podsNumBitmaps', 'podsNumNodesPerBitmap', 'podsRedundancyPerBitmap', 'podsNumRules', 
     'leafsAlgorithm', 'leafsNumBitmaps', 'leafsNumNodesPerBitmap', 'leafsRedundancyPerBitmap', 'leafsNumRules'])[
    ['unicastTraffic', 'multicastTraffic', 'baseeratTraffic', 'overlayTraffic']].agg('sum')
trafficPerGroupPerTenant_0.reset_index(inplace=True)

In [11]:
trafficPerGroupPerTenant_0.to_csv(directory + 'traffic_per_group_per_tenant_0.csv', index=False)

In [8]:
_leafs_folders_1 = glob.glob(directory + "logs.*_1.*_leafs*")

In [9]:
trafficPerGroupPerTenant_1 = DataSetAsDataFrame('traffic_per_group_per_tenant.csv', 
                                                _leafs_folders_1, 
                                                ['multicastTraffic', 'unicastTraffic', 
                                                 'overlayTraffic', 'baseeratTraffic'],
                                                header=0, reset_index=False)

In [10]:
trafficPerGroupPerTenant_1 = trafficPerGroupPerTenant_1.groupby(
    ['groupSizeDist', 'placementDist', 'placementNumHostsPerLeaf', 'seed',
     'podsAlgorithm', 'podsNumBitmaps', 'podsNumNodesPerBitmap', 'podsRedundancyPerBitmap', 'podsNumRules', 
     'leafsAlgorithm', 'leafsNumBitmaps', 'leafsNumNodesPerBitmap', 'leafsRedundancyPerBitmap', 'leafsNumRules'])[
    ['unicastTraffic', 'multicastTraffic', 'baseeratTraffic', 'overlayTraffic']].agg('sum')
trafficPerGroupPerTenant_1.reset_index(inplace=True)

In [11]:
trafficPerGroupPerTenant_1.to_csv(directory + 'traffic_per_group_per_tenant_1.csv', index=False)

In [8]:
_leafs_folders_2 = glob.glob(directory + "logs.*_2.*_leafs*")

In [9]:
trafficPerGroupPerTenant_2 = DataSetAsDataFrame('traffic_per_group_per_tenant.csv', 
                                                _leafs_folders_2, 
                                                ['multicastTraffic', 'unicastTraffic', 
                                                 'overlayTraffic', 'baseeratTraffic'],
                                                header=0, reset_index=False)

In [10]:
trafficPerGroupPerTenant_2 = trafficPerGroupPerTenant_2.groupby(
    ['groupSizeDist', 'placementDist', 'placementNumHostsPerLeaf', 'seed',
     'podsAlgorithm', 'podsNumBitmaps', 'podsNumNodesPerBitmap', 'podsRedundancyPerBitmap', 'podsNumRules', 
     'leafsAlgorithm', 'leafsNumBitmaps', 'leafsNumNodesPerBitmap', 'leafsRedundancyPerBitmap', 'leafsNumRules'])[
    ['unicastTraffic', 'multicastTraffic', 'baseeratTraffic', 'overlayTraffic']].agg('sum')
trafficPerGroupPerTenant_2.reset_index(inplace=True)

In [11]:
trafficPerGroupPerTenant_2.to_csv(directory + 'traffic_per_group_per_tenant_2.csv', index=False)

In [8]:
trafficPerGroupPerTenant_0 = pd.read_csv(directory + 'traffic_per_group_per_tenant_0.csv', sep=',')

In [10]:
trafficPerGroupPerTenant_1 = pd.read_csv(directory + 'traffic_per_group_per_tenant_1.csv', sep=',')

In [12]:
trafficPerGroupPerTenant_2 = pd.read_csv(directory + 'traffic_per_group_per_tenant_2.csv', sep=',')

In [14]:
trafficPerGroupPerTenant = pd.concat([trafficPerGroupPerTenant_0, trafficPerGroupPerTenant_1, trafficPerGroupPerTenant_2])

In [17]:
trafficPerGroupPerTenant.to_csv(directory + 'traffic_per_group_per_tenant.csv', index=False)

In [13]:
def CorrectedDataFrameToCSV(filename, folders, params_directory):
    # Read dataset as dataframe
    def ReadDataSet(folder):
        df = pd.read_csv(folder + '/' + filename, sep=',', header=0)
        
        params_foldername = folder[folder.rfind('/') + 1:]
        params_foldername = params_foldername[:params_foldername.rfind('.')]
        params_folder = params_directory + '/' + params_foldername
        params_df = pd.read_csv(params_folder + '/' + filename, sep=',', header=0, names=['pods', 'leafs',
                                                                                          'pods_traffic'])
        df['overlay_corrected'] = ((6 * params_df['pods']) + params_df['pods_traffic'] + 
                                   (df['overlay'] - (6 * params_df['leafs'])))
        
        params_filename = filename[:filename.rfind('.')] + "_corrected" + filename[filename.rfind('.'):]
        df.to_csv(folder + '/' + params_filename, index=False)
    
    for folder in bar_range(folders, 'progress:'):
        ReadDataSet(folder)

In [14]:
params_directory = '/mnt/sdb1/baseerat/numerical-evals/1-10-2018/logs-1M/'

In [15]:
CorrectedDataFrameToCSV('traffic_per_group_per_tenant.csv', leafs_folders, params_directory)

progress:: 100% |###############################################| Time: 0:51:03


In [16]:
_leafs_folders_0 = glob.glob(directory + "logs.*_0.*_leafs*")

In [17]:
trafficPerGroupPerTenant_0 = DataSetAsDataFrame('traffic_per_group_per_tenant_corrected.csv', 
                                                _leafs_folders_0, 
                                                ['multicastTraffic', 'unicastTraffic', 
                                                 'overlayTraffic', 'baseeratTraffic', 'overlayTrafficCorrected'],
                                                header=0, reset_index=False)

In [18]:
trafficPerGroupPerTenant_0 = trafficPerGroupPerTenant_0.groupby(
    ['groupSizeDist', 'placementDist', 'placementNumHostsPerLeaf', 'seed',
     'podsAlgorithm', 'podsNumBitmaps', 'podsNumNodesPerBitmap', 'podsRedundancyPerBitmap', 'podsNumRules', 
     'leafsAlgorithm', 'leafsNumBitmaps', 'leafsNumNodesPerBitmap', 'leafsRedundancyPerBitmap', 'leafsNumRules'])[
    ['unicastTraffic', 'multicastTraffic', 'baseeratTraffic', 'overlayTraffic', 'overlayTrafficCorrected']].agg('sum')
trafficPerGroupPerTenant_0.reset_index(inplace=True)

In [19]:
trafficPerGroupPerTenant_0.to_csv(directory + 'traffic_per_group_per_tenant_corrected_0.csv', index=False)

In [20]:
_leafs_folders_1 = glob.glob(directory + "logs.*_1.*_leafs*")

In [21]:
trafficPerGroupPerTenant_1 = DataSetAsDataFrame('traffic_per_group_per_tenant_corrected.csv', 
                                                _leafs_folders_1, 
                                                ['multicastTraffic', 'unicastTraffic', 
                                                 'overlayTraffic', 'baseeratTraffic', 'overlayTrafficCorrected'],
                                                header=0, reset_index=False)

In [22]:
trafficPerGroupPerTenant_1 = trafficPerGroupPerTenant_1.groupby(
    ['groupSizeDist', 'placementDist', 'placementNumHostsPerLeaf', 'seed',
     'podsAlgorithm', 'podsNumBitmaps', 'podsNumNodesPerBitmap', 'podsRedundancyPerBitmap', 'podsNumRules', 
     'leafsAlgorithm', 'leafsNumBitmaps', 'leafsNumNodesPerBitmap', 'leafsRedundancyPerBitmap', 'leafsNumRules'])[
    ['unicastTraffic', 'multicastTraffic', 'baseeratTraffic', 'overlayTraffic', 'overlayTrafficCorrected']].agg('sum')
trafficPerGroupPerTenant_1.reset_index(inplace=True)

In [23]:
trafficPerGroupPerTenant_1.to_csv(directory + 'traffic_per_group_per_tenant_corrected_1.csv', index=False)

In [24]:
_leafs_folders_2 = glob.glob(directory + "logs.*_2.*_leafs*")

In [25]:
trafficPerGroupPerTenant_2 = DataSetAsDataFrame('traffic_per_group_per_tenant_corrected.csv', 
                                                _leafs_folders_2, 
                                                ['multicastTraffic', 'unicastTraffic', 
                                                 'overlayTraffic', 'baseeratTraffic', 'overlayTrafficCorrected'],
                                                header=0, reset_index=False)

In [26]:
trafficPerGroupPerTenant_2 = trafficPerGroupPerTenant_2.groupby(
    ['groupSizeDist', 'placementDist', 'placementNumHostsPerLeaf', 'seed',
     'podsAlgorithm', 'podsNumBitmaps', 'podsNumNodesPerBitmap', 'podsRedundancyPerBitmap', 'podsNumRules', 
     'leafsAlgorithm', 'leafsNumBitmaps', 'leafsNumNodesPerBitmap', 'leafsRedundancyPerBitmap', 'leafsNumRules'])[
    ['unicastTraffic', 'multicastTraffic', 'baseeratTraffic', 'overlayTraffic', 'overlayTrafficCorrected']].agg('sum')
trafficPerGroupPerTenant_2.reset_index(inplace=True)

In [27]:
trafficPerGroupPerTenant_2.to_csv(directory + 'traffic_per_group_per_tenant_corrected_2.csv', index=False)

In [28]:
trafficPerGroupPerTenant_0 = pd.read_csv(directory + 'traffic_per_group_per_tenant_corrected_0.csv', sep=',')

In [29]:
trafficPerGroupPerTenant_1 = pd.read_csv(directory + 'traffic_per_group_per_tenant_corrected_1.csv', sep=',')

In [30]:
trafficPerGroupPerTenant_2 = pd.read_csv(directory + 'traffic_per_group_per_tenant_corrected_2.csv', sep=',')

In [31]:
trafficPerGroupPerTenant = pd.concat([trafficPerGroupPerTenant_0, trafficPerGroupPerTenant_1, trafficPerGroupPerTenant_2])

In [32]:
trafficPerGroupPerTenant.to_csv(directory + 'traffic_per_group_per_tenant_corrected.csv', index=False)

In [9]:
trafficPerGroupPerTenant = pd.read_csv(directory + 'traffic_per_group_per_tenant_corrected.csv')

In [11]:
trafficPerGroupPerTenant.head()

Unnamed: 0,groupSizeDist,placementDist,placementNumHostsPerLeaf,seed,podsAlgorithm,podsNumBitmaps,podsNumNodesPerBitmap,podsRedundancyPerBitmap,podsNumRules,leafsAlgorithm,leafsNumBitmaps,leafsNumNodesPerBitmap,leafsRedundancyPerBitmap,leafsNumRules,unicastTraffic,multicastTraffic,baseeratTraffic,overlayTraffic,overlayTrafficCorrected
0,uniform,colocate-colocate-uniform,12,0,random-fuzzy-match,2,3,6,64000,exact-match,10,3,0,10000,2385267270,415953027,467320090,852635990,825915054
1,uniform,colocate-colocate-uniform,12,0,random-fuzzy-match,2,3,6,64000,exact-match,10,3,0,64000,2385267270,415953027,415953036,852635990,825915054
2,uniform,colocate-colocate-uniform,12,0,random-fuzzy-match,2,3,6,64000,exact-match,20,3,0,10000,2385267270,415953027,423927544,852635990,825915054
3,uniform,colocate-colocate-uniform,12,0,random-fuzzy-match,2,3,6,64000,exact-match,20,3,0,64000,2385267270,415953027,415953036,852635990,825915054
4,uniform,colocate-colocate-uniform,12,0,random-fuzzy-match,2,3,6,64000,exact-match,30,3,0,10000,2385267270,415953027,415953036,852635990,825915054


In [29]:
vxlan_header_size = 50

In [30]:
message_size = 64 + vxlan_header_size

In [31]:
multicastBytes = trafficPerGroupPerTenant['multicastTraffic'].mean() * message_size

In [32]:
unicastBytes = trafficPerGroupPerTenant['unicastTraffic'].mean() * message_size

In [33]:
overlayBytes = trafficPerGroupPerTenant['overlayTrafficCorrected'].mean() * message_size

In [34]:
unicastBytes / multicastBytes

5.4719708275085406

In [35]:
overlayBytes / multicastBytes

1.9764351625587373