In [1]:
import warnings
warnings.filterwarnings('ignore')
# For ignoring warnings printed by R magic commands

In [2]:
import os
import glob

In [3]:
# Import pandas
import pandas as pd

In [4]:
# Load R magic
%load_ext rpy2.ipython
# Import ggplot2
%R require(ggplot2);

In [5]:
# If experiment parameters change, modify this function
def AddParametersAsColumns(df, folder):
    expParams = folder.split('/')[-1].split('.')
    
    cloudParams = expParams[1].split('_')
    df['groupSizeDist'] = cloudParams[10]
    df['placementDist'] = cloudParams[11]
    df['placementNumHostsPerLeaf'] = 'uniform' if cloudParams[12] == '-1' else cloudParams[12]
    df['seed'] = int(cloudParams[15])
    
    optimizerParams_0 = expParams[2].split('_')
    node_type_0 = optimizerParams_0[7]
    df['%sAlgorithm' % node_type_0] = optimizerParams_0[0]
    df['%sNumBitmaps' % node_type_0] = int(optimizerParams_0[1])
    df['%sNumNodesPerBitmap' % node_type_0] = int(optimizerParams_0[2])
    df['%sRedundancyPerBitmap' % node_type_0] = int(optimizerParams_0[3])
    df['%sNumRules' % node_type_0] = int(optimizerParams_0[4])
    
    if len(expParams) > 3:
        optimizerParams_1 = expParams[3].split('_')
        node_type_1 = optimizerParams_1[7]
        df['%sAlgorithm' % node_type_1] = optimizerParams_1[0]
        df['%sNumBitmaps' % node_type_1] = int(optimizerParams_1[1])
        df['%sNumNodesPerBitmap' % node_type_1] = int(optimizerParams_1[2])
        df['%sRedundancyPerBitmap' % node_type_1] = int(optimizerParams_1[3])
        df['%sNumRules' % node_type_1] = int(optimizerParams_1[4])
        
        dynamicParams = expParams[4].split('_')
        df['numEvents'] = int(dynamicParams[0])
    else:
        dynamicParams = expParams[3].split('_')
        df['numEvents'] = int(dynamicParams[0])
    
# Turns all files of a given filname across a set of folders into a single dataframe
def DataSetAsDataFrame(filename, folders, headers, header=None, reset_index=True):
    # Read dataset as dataframe
    def ReadDataSet(folder):
        df = pd.read_csv(folder + '/' + filename, sep=',', header=header, names=headers)
        AddParametersAsColumns(df, folder)
        return df
    # Get the list of dataframes
    dfs = map(ReadDataSet, folders)
    # Combine into a single dataframe
    df = pd.concat(dfs)
    if reset_index:
        df.reset_index(inplace=True)
        df.drop('index', axis=1, inplace=True)
    return df

In [6]:
%%R 
plotTheme <- theme(legend.title=element_blank(), legend.position="top", legend.text=element_text(size=6))

In [7]:
directory = '/mnt/sdb1/baseerat/numerical-evals/5-22-2018/logs-1M/'
# folders = glob.glob("logs*_0.*")
folders = glob.glob(directory + "dynamic-logs.*_0.*")

In [8]:
perSwitchUpdateCount = DataSetAsDataFrame('per_switch_update_count.csv',
                                          folders, ['updates', 'switch'],
                                          header=0)

In [9]:
perSwitchUpdateCount.head()

Unnamed: 0,updates,switch,groupSizeDist,placementDist,placementNumHostsPerLeaf,seed,podsAlgorithm,podsNumBitmaps,podsNumNodesPerBitmap,podsRedundancyPerBitmap,podsNumRules,leafsAlgorithm,leafsNumBitmaps,leafsNumNodesPerBitmap,leafsRedundancyPerBitmap,leafsNumRules,numEvents
0,25660,virtual,uniform,colocate-uniform,uniform,0,random-fuzzy-match,2,3,6,64000,random-fuzzy-match,30,3,12,64000,1000000
1,20488,virtual,uniform,colocate-uniform,uniform,0,random-fuzzy-match,2,3,6,64000,random-fuzzy-match,30,3,12,64000,1000000
2,25888,virtual,uniform,colocate-uniform,uniform,0,random-fuzzy-match,2,3,6,64000,random-fuzzy-match,30,3,12,64000,1000000
3,15690,virtual,uniform,colocate-uniform,uniform,0,random-fuzzy-match,2,3,6,64000,random-fuzzy-match,30,3,12,64000,1000000
4,21804,virtual,uniform,colocate-uniform,uniform,0,random-fuzzy-match,2,3,6,64000,random-fuzzy-match,30,3,12,64000,1000000


In [13]:
num_events = 1000000
events_per_sec = 1000

In [10]:
perSwitchUpdateCount[
    (perSwitchUpdateCount['groupSizeDist'] == 'wve') &
    (perSwitchUpdateCount['switch'] == 'virtual')]['updates'].describe()

count    27648.000000
mean     20956.514034
std       7948.062247
min       3043.000000
25%      14998.750000
50%      20784.500000
75%      27248.250000
max      45099.000000
Name: updates, dtype: float64

In [11]:
perSwitchUpdateCount[
    (perSwitchUpdateCount['groupSizeDist'] == 'uniform') &
    (perSwitchUpdateCount['switch'] == 'virtual')]['updates'].describe()

count    27648.000000
mean     30137.885634
std      13531.281889
min        941.000000
25%      19371.250000
50%      27691.500000
75%      40172.000000
max      72382.000000
Name: updates, dtype: float64

In [20]:
max_updates = perSwitchUpdateCount[
    (perSwitchUpdateCount['groupSizeDist'] == 'wve') &
    (perSwitchUpdateCount['switch'] == 'virtual')]['updates'].max()
print("virtual switch updates with %s events per seconds: " % (events_per_sec) + str(max_updates/num_events*events_per_sec))

virtual switch updates with 1000 events per seconds: 45.099000000000004


In [15]:
perSwitchUpdateCount[
    (perSwitchUpdateCount['groupSizeDist'] == 'wve') &
    (perSwitchUpdateCount['switch'] == 'leaf')]['updates'].describe()

count      576.000000
mean      4297.439236
std       2281.949367
min        432.000000
25%       2816.750000
50%       3915.000000
75%       5807.500000
max      12670.000000
Name: updates, dtype: float64

In [16]:
perSwitchUpdateCount[
    (perSwitchUpdateCount['groupSizeDist'] == 'uniform') &
    (perSwitchUpdateCount['switch'] == 'leaf')]['updates'].describe()

count      576.000000
mean      5337.942708
std       2327.189449
min        730.000000
25%       3280.500000
50%       5618.500000
75%       6879.000000
max      11756.000000
Name: updates, dtype: float64

In [21]:
max_updates = perSwitchUpdateCount[
    (perSwitchUpdateCount['groupSizeDist'] == 'wve') &
    (perSwitchUpdateCount['switch'] == 'leaf')]['updates'].max()
print("leaf switch updates with %s events per seconds: " % (events_per_sec) + str(max_updates/num_events*events_per_sec))

leaf switch updates with 1000 events per seconds: 12.67


In [22]:
perSwitchUpdateCount[
    (perSwitchUpdateCount['groupSizeDist'] == 'wve') &
    (perSwitchUpdateCount['switch'] == 'pod')]['updates'].describe()

count      12.000000
mean     3584.166667
std      2170.420271
min       609.000000
25%      1864.000000
50%      3293.500000
75%      5601.250000
max      6601.000000
Name: updates, dtype: float64

In [24]:
perSwitchUpdateCount[
    (perSwitchUpdateCount['groupSizeDist'] == 'uniform') &
    (perSwitchUpdateCount['switch'] == 'pod')]['updates'].describe()

count    12.000000
mean     16.916667
std      10.672465
min       0.000000
25%       8.750000
50%      16.500000
75%      23.250000
max      39.000000
Name: updates, dtype: float64

In [25]:
max_updates = perSwitchUpdateCount[
    (perSwitchUpdateCount['groupSizeDist'] == 'wve') &
    (perSwitchUpdateCount['switch'] == 'pod')]['updates'].max()
print("spine switch updates with %s events per seconds: " % (events_per_sec) + str(max_updates/num_events*events_per_sec))

spine switch updates with 1000 events per seconds: 6.601
