In [1]:
import warnings
warnings.filterwarnings('ignore')
# For ignoring warnings printed by R magic commands

In [2]:
import os
import glob
from math import ceil

In [3]:
# Import pandas
import pandas as pd

In [4]:
# Load R magic
%load_ext rpy2.ipython
# Import ggplot2
%R require(ggplot2);

In [5]:
# If experiment parameters change, modify this function
def AddParametersAsColumns(df, folder):
    expParams = folder.split('/')[-1].split('.')
    
    cloudParams = expParams[1].split('_')
    df['groupSizeDist'] = cloudParams[10]
    df['placementDist'] = cloudParams[11]
    df['placementNumHostsPerLeaf'] = 'uniform' if cloudParams[12] == '-1' else cloudParams[12]
    df['seed'] = int(cloudParams[15])
    
    optimizerParams_0 = expParams[2].split('_')
    node_type_0 = optimizerParams_0[7]
    df['%sAlgorithm' % node_type_0] = optimizerParams_0[0]
    df['%sNumBitmaps' % node_type_0] = int(optimizerParams_0[1])
    df['%sNumNodesPerBitmap' % node_type_0] = int(optimizerParams_0[2])
    df['%sRedundancyPerBitmap' % node_type_0] = int(optimizerParams_0[3])
    df['%sNumRules' % node_type_0] = int(optimizerParams_0[4])
    
    if len(expParams) > 3:
        optimizerParams_1 = expParams[3].split('_')
        node_type_1 = optimizerParams_1[7]
        df['%sAlgorithm' % node_type_1] = optimizerParams_1[0]
        df['%sNumBitmaps' % node_type_1] = int(optimizerParams_1[1])
        df['%sNumNodesPerBitmap' % node_type_1] = int(optimizerParams_1[2])
        df['%sRedundancyPerBitmap' % node_type_1] = int(optimizerParams_1[3])
        df['%sNumRules' % node_type_1] = int(optimizerParams_1[4])
        
        dynamicParams = expParams[4].split('_')
        df['numEvents'] = int(dynamicParams[0])
        df['failedNodeType'] = dynamicParams[1]
        df['numSpinesPerPod'] = dynamicParams[2]
    else:
        dynamicParams = expParams[3].split('_')
        df['numEvents'] = int(dynamicParams[0])
        df['failedNodeType'] = dynamicParams[1]
        df['numSpinesPerPod'] = dynamicParams[2]
    
# Turns all files of a given filname across a set of folders into a single dataframe
def DataSetAsDataFrame(filename, folders, headers, header=None, reset_index=True):
    # Read dataset as dataframe
    def ReadDataSet(folder):
        df = pd.read_csv(folder + '/' + filename, sep=',', header=header, names=headers)
        AddParametersAsColumns(df, folder)
        return df
    # Get the list of dataframes
    dfs = map(ReadDataSet, folders)
    # Combine into a single dataframe
    df = pd.concat(dfs)
    if reset_index:
        df.reset_index(inplace=True)
        df.drop('index', axis=1, inplace=True)
    return df

In [6]:
%%R 
plotTheme <- theme(legend.title=element_blank(), legend.position="top", legend.text=element_text(size=6))

In [10]:
directory = '/mnt/sdb1/baseerat/numerical-evals/5-28-2018/logs-1M/'
# folders = glob.glob("logs*_0.*")
folders = glob.glob(directory + "dynamic-logs.*.*")

In [11]:
groupCount = DataSetAsDataFrame('group_count.csv',
                                 folders, ['count'])

In [12]:
groupCount

Unnamed: 0,count,groupSizeDist,placementDist,placementNumHostsPerLeaf,seed,podsAlgorithm,podsNumBitmaps,podsNumNodesPerBitmap,podsRedundancyPerBitmap,podsNumRules,leafsAlgorithm,leafsNumBitmaps,leafsNumNodesPerBitmap,leafsRedundancyPerBitmap,leafsNumRules,numEvents,failedNodeType,numSpinesPerPod
0,258110,wve,colocate-uniform,uniform,0,random-fuzzy-match,2,3,6,64000,random-fuzzy-match,30,3,12,64000,10000,core,4
1,122536,wve,colocate-uniform,uniform,0,random-fuzzy-match,2,3,6,64000,random-fuzzy-match,30,3,12,64000,10000,spine,4
2,183937,wve,colocate-uniform,uniform,1,random-fuzzy-match,2,3,6,64000,random-fuzzy-match,30,3,48,64000,10000,core,4
3,105598,wve,colocate-uniform,uniform,1,random-fuzzy-match,2,3,6,64000,random-fuzzy-match,30,3,48,64000,10000,spine,4
4,226816,wve,colocate-uniform,uniform,2,random-fuzzy-match,2,3,6,64000,random-fuzzy-match,30,3,48,64000,10000,core,4
5,104691,wve,colocate-uniform,uniform,2,random-fuzzy-match,2,3,6,64000,random-fuzzy-match,30,3,48,64000,10000,spine,4


In [13]:
groupCount['%count'] = groupCount['count'] / 1000000 * 100.0

In [14]:
groupCount[['count', '%count']]

Unnamed: 0,count,%count
0,258110,25.811
1,122536,12.2536
2,183937,18.3937
3,105598,10.5598
4,226816,22.6816
5,104691,10.4691


In [16]:
groupCount[groupCount['failedNodeType'] == 'core'][['count', '%count']].describe()

Unnamed: 0,count,%count
count,3.0,3.0
mean,222954.333333,22.295433
std,37236.982079,3.723698
min,183937.0,18.3937
25%,205376.5,20.53765
50%,226816.0,22.6816
75%,242463.0,24.2463
max,258110.0,25.811


In [17]:
groupCount[groupCount['failedNodeType'] == 'spine'][['count', '%count']].describe()

Unnamed: 0,count,%count
count,3.0,3.0
mean,110941.666667,11.094167
std,10051.223126,1.005122
min,104691.0,10.4691
25%,105144.5,10.51445
50%,105598.0,10.5598
75%,114067.0,11.4067
max,122536.0,12.2536


In [18]:
perSwitchUpdateCount = DataSetAsDataFrame('per_switch_update_count.csv',
                                          folders, ['updates', 'switch'],
                                          header=0)

In [19]:
perSwitchUpdateCount.head()

Unnamed: 0,updates,switch,groupSizeDist,placementDist,placementNumHostsPerLeaf,seed,podsAlgorithm,podsNumBitmaps,podsNumNodesPerBitmap,podsRedundancyPerBitmap,podsNumRules,leafsAlgorithm,leafsNumBitmaps,leafsNumNodesPerBitmap,leafsRedundancyPerBitmap,leafsNumRules,numEvents,failedNodeType,numSpinesPerPod
0,805,virtual,wve,colocate-uniform,uniform,0,random-fuzzy-match,2,3,6,64000,random-fuzzy-match,30,3,12,64000,10000,core,4
1,703,virtual,wve,colocate-uniform,uniform,0,random-fuzzy-match,2,3,6,64000,random-fuzzy-match,30,3,12,64000,10000,core,4
2,1126,virtual,wve,colocate-uniform,uniform,0,random-fuzzy-match,2,3,6,64000,random-fuzzy-match,30,3,12,64000,10000,core,4
3,526,virtual,wve,colocate-uniform,uniform,0,random-fuzzy-match,2,3,6,64000,random-fuzzy-match,30,3,12,64000,10000,core,4
4,863,virtual,wve,colocate-uniform,uniform,0,random-fuzzy-match,2,3,6,64000,random-fuzzy-match,30,3,12,64000,10000,core,4


In [24]:
s = sum([1 for i in perSwitchUpdateCount[
                (perSwitchUpdateCount['failedNodeType'] == 'core') &
                (perSwitchUpdateCount['switch'] == 'virtual')]['updates'] if i > 0]) / 3

print(s, s / 27648 * 100.0)

27491.666666666668 99.43455825617285


In [25]:
perSwitchUpdateCount[
    (perSwitchUpdateCount['failedNodeType'] == 'core') &
    (perSwitchUpdateCount['switch'] == 'virtual')]['updates'].describe()

count    82944.000000
mean       674.899233
std        324.732786
min          0.000000
25%        450.000000
50%        629.000000
75%        883.000000
max       1853.000000
Name: updates, dtype: float64

In [26]:
s = sum([1 for i in perSwitchUpdateCount[
                (perSwitchUpdateCount['failedNodeType'] == 'spine') &
                (perSwitchUpdateCount['switch'] == 'virtual')]['updates'] if i > 0]) / 3

print(s, s / 27648 * 100.0)

13146.0 47.54774305555556


In [27]:
perSwitchUpdateCount[
    (perSwitchUpdateCount['failedNodeType'] == 'spine') &
    (perSwitchUpdateCount['switch'] == 'virtual')]['updates'].describe()

count    82944.000000
mean       176.868490
std        320.046861
min          0.000000
25%          0.000000
50%          0.000000
75%        195.000000
max       1712.000000
Name: updates, dtype: float64