In [63]:
import pandas as pd
import glob
import configparser
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import anderson

plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (40, 20)
plt.rcParams.update({
    "lines.color": "black",
    "patch.edgecolor": "black",
    "text.color": "black",
    "axes.facecolor": "white",
    "axes.edgecolor": "black",
    "axes.labelcolor": "black",
    "xtick.color": "black",
    "ytick.color": "black",
    "grid.color": "gray",
    "figure.facecolor": "white",
    "figure.edgecolor": "white",
    "savefig.facecolor": "white",
    "savefig.edgecolor": "white",
    "font.size": 30,
    "xtick.labelsize":30,
    "ytick.labelsize":30,
    "lines.linewidth":1.,
    "legend.fontsize": 10,
    })

__ns3_path = os.popen('locate "ns-3.41" | grep /ns-3.41$').read().splitlines()[0]
sample_rate = 0.01
confidenceValue = 1.96 # 95% confidence interval

In [64]:
# convert strings like "2Mbps" to float
def convert_to_float(x):
    if 'Mbps' in x:
        return float(x[:-4])
    elif 'Kbps' in x:
        return float(x[:-4]) / 1000
    elif 'Gbps' in x:
        return float(x[:-4]) * 1000
    elif 'ms' in x:
        return float(x[:-2])
    elif 'us' in x:
        return float(x[:-2]) / 1000
    else:
        return float(x)
    
def sample_data(data, sample_column):
    exit = False
    while not exit:
        # option 1: sample data with a fixed rate
        data_copy = data.sample(frac=sample_rate).sort_values(by=[sample_column])
        
        # option 2: sample data with a poisson process. Pick the closest packet to the arrival time
        # interArrivals = np.random.exponential(1/poisson_sample_rate, int(duration * poisson_sample_rate)) * 1000000000
        # interArrivals = np.cumsum(interArrivals)
        # interArrivals  = interArrivals + steadyStart * 1000000000
        # interArrivals = interArrivals[interArrivals > steadyStart * 1000000000]
        # interArrivals = interArrivals[interArrivals < steadyEnd * 1000000000]
        # data_copy = pd.DataFrame()
        # for i in range(len(interArrivals)):
        #     data_copy = pd.concat([data_copy, data.iloc[(data[sample_column] - interArrivals[i]).abs().argsort()[:1]]])

        # option 3: sample data with a poisson process. Pick the packets based on the exp distribution not the arrival time
        # exps = np.random.exponential(1/poisson_sample_rate, len(data))
        # c = np.abs(exps - 1/poisson_sample_rate) / (1/poisson_sample_rate) < 0.01
        # data_copy = data.copy()
        # data_copy['IsSample'] = c
        # data_copy = data_copy[data_copy['IsSample'] == True]
        # data_copy = data_copy.sort_values(by=[sample_column])


        data_copy['InterArrivalTime'] = data_copy[sample_column].diff()
        data_copy = data_copy.dropna().reset_index(drop=True)
        anderson_statistic, anderson_critical_values, _ = anderson(data_copy['InterArrivalTime'], 'expon')
        if anderson_statistic < anderson_critical_values[2]:
            # print('Anderson-Darling test passed')
            exit = True
    return data_copy.drop(columns=['InterArrivalTime'])

def get_switch_samples_delays(flowIndicatorDf, switchDf):
    l_df = flowIndicatorDf.copy()
    l_df = pd.merge(l_df, switchDf, on=['SourceIp', 'SourcePort', 'DestinationIp', 'DestinationPort', 'PayloadSize', 'SequenceNb'], how='right')
    l_df['ReceiveTime'] = l_df['ReceiveTime'].fillna(l_df['SampleTime'])
    l_df['SentTime'] = l_df['SentTime'].fillna(l_df['DepartTime'])
    return l_df

def switch_data(flowIndicatorDf, switchDf, sampling):
    l_df = flowIndicatorDf.copy()
    l_df = pd.merge(l_df, switchDf, on=['SourceIp', 'SourcePort', 'DestinationIp', 'DestinationPort', 'PayloadSize', 'SequenceNb'], how='inner')
    if sampling:
        l_df = sample_data(l_df, 'ReceiveTime')
    return l_df

def intermediateLink_data(flowIndicatorDf, source, dest):
    l_df = flowIndicatorDf.copy()
    l_df = pd.merge(l_df, source.drop(columns=['ReceiveTime']), on=['SourceIp', 'SourcePort', 'DestinationIp', 'DestinationPort', 'PayloadSize', 'SequenceNb'], how='inner')
    l_df = pd.merge(l_df, dest.drop(columns=['SentTime']), on=['SourceIp', 'SourcePort', 'DestinationIp', 'DestinationPort', 'PayloadSize', 'SequenceNb'], how='inner')
    # l_df = sample_data(l_df, 'SentTime')
    return l_df

def get_delayMean(data):
    data['Delay'] = abs(data['ReceiveTime'] - data['SentTime'])
    return data['Delay'].mean()

def get_delayStd(data):
    data['Delay'] = abs(data['ReceiveTime'] - data['SentTime'])
    return data['Delay'].std()

def get_statistics(data):
    statistics = {}
    statistics['DelayMean'] = get_delayMean(data)
    statistics['DelayStd'] = get_delayStd(data)
    statistics['sampleSize'] = len(data)
    return statistics

In [65]:
def ECNMC(endToEnd_delayMean, sumOfSegments_DelayMeans, endToEnd_delayStd, MinSampleSize, confidenceValue):
    if abs(endToEnd_delayMean - sumOfSegments_DelayMeans) <= confidenceValue * (endToEnd_delayStd / np.sqrt(MinSampleSize)):
        return True
    else:  
        return False

def ECNMC_V2(endToEnd_delayMean, sumOfSegments_DelayMeans, maxEpsilon):
    if abs(endToEnd_delayMean - sumOfSegments_DelayMeans) / endToEnd_delayMean <= maxEpsilon:
        return True
    else:  
        return False
    
def check_single_delayConsistency(endToEnd_statistics, switches_statistics, interLinks_statistics, confidenceValue):
    switches_delayMeans = [value['DelayMean'] for value in switches_statistics.values()]
    interLinks_delaymeans = [value['DelayMean'] for value in interLinks_statistics.values()]
    switches_sampleSizes = [value['sampleSize'] for value in switches_statistics.values()]
    MinSampleSize = min(switches_sampleSizes)
    sumOfSegmentsDelayMeans = sum(switches_delayMeans + interLinks_delaymeans)

    return ECNMC(endToEnd_statistics['DelayMean'], sumOfSegmentsDelayMeans, endToEnd_statistics['DelayStd'], MinSampleSize, confidenceValue)
        
def check_single_delayConsistency_V2(endToEnd_statistics, switches_statistics, interLinks_statistics, confidenceValue):
    # calculate the epsilon = confidenceValue * (switches_delayStd / (sqrt(switches_sampleSize) * switches_delayMean)) for each switch
    switches_delayMeans = [value['DelayMean'] for value in switches_statistics.values()]
    switches_delayStds = [value['DelayStd'] for value in switches_statistics.values()]
    switches_sampleSizes = [value['sampleSize'] for value in switches_statistics.values()]
    interLinks_delaymeans = [value['DelayMean'] for value in interLinks_statistics.values()]
    interLinks_delayStds = [value['DelayStd'] for value in interLinks_statistics.values()]
    interLinks_sampleSizes = [value['sampleSize'] for value in interLinks_statistics.values()]

    segments_delayMeans = switches_delayMeans + interLinks_delaymeans
    segments_delayStds = switches_delayStds + interLinks_delayStds
    segments_sampleSizes = switches_sampleSizes + interLinks_sampleSizes

    epsilons = [confidenceValue * (segments_delayStds[i] / (np.sqrt(segments_sampleSizes[i]) * segments_delayMeans[i])) for i in range(len(segments_delayMeans))]
    maxEpsilon = max(epsilons)
    sumOfSegmentsDelayMeans = sum(segments_delayMeans)

    return ECNMC_V2(endToEnd_statistics['DelayMean'], sumOfSegmentsDelayMeans, maxEpsilon)




def check_all_delayConsistency(endToEnd_statistics, switches_statistics, interLinks_statistics, confidenceValue):
    res = {}
    res['DominantAssumption'] = {}
    res['General'] = {}
    for flow in endToEnd_statistics.keys():
        # print("Flow: {}\n Result: {}".format(flow, 
        #                                               check_single_delayConsistency(endToEnd_statistics[flow], switches_statistics[flow], interLinks_statistics[flow], confidenceValue)))
        res['DominantAssumption'][flow] = check_single_delayConsistency(endToEnd_statistics[flow], switches_statistics[flow], interLinks_statistics[flow], confidenceValue)
        res['General'][flow] = check_single_delayConsistency_V2(endToEnd_statistics[flow], switches_statistics[flow], interLinks_statistics[flow], confidenceValue)
    return res

In [66]:
class AppKey:
    def __init__(self, sourceIp, sourcePort, destIp, destPort):
        self.sourceIp = sourceIp
        self.sourcePort = sourcePort
        self.destIp = destIp
        self.destPort = destPort

In [67]:
config = configparser.ConfigParser()
config.read('Parameters.config')
hostToTorLinkRate = convert_to_float(config.get('Settings', 'hostToTorLinkRate'))
torToAggLinkRate = convert_to_float(config.get('Settings', 'torToAggLinkRate'))
aggToCoreLinkRate = convert_to_float(config.get('Settings', 'aggToCoreLinkRate'))
hostToTorLinkDelay = convert_to_float(config.get('Settings', 'hostToTorLinkDelay'))
torToAggLinkDelay = convert_to_float(config.get('Settings', 'torToAggLinkDelay'))
aggToCoreLinkDelay = convert_to_float(config.get('Settings', 'aggToCoreLinkDelay'))
pctPacedBack = convert_to_float(config.get('Settings', 'pctPacedBack'))
appDataRate = convert_to_float(config.get('Settings', 'appDataRate'))
duration = convert_to_float(config.get('Settings', 'duration'))
steadyStart = convert_to_float(config.get('Settings', 'steadyStart'))
steadyEnd = convert_to_float(config.get('Settings', 'steadyEnd'))
sampleRate = convert_to_float(config.get('Settings', 'sampleRate'))
experiments = int(config.get('Settings', 'experiments'))

print("hostToTorLinkRate: ", hostToTorLinkRate, " Mbps")
print("torToAggLinkRate: ", torToAggLinkRate, " Mbps")
print("aggToCoreLinkRate: ", aggToCoreLinkRate, " Mbps")
print("hostToTorLinkDelay: ", hostToTorLinkDelay, " ms")
print("torToAggLinkDelay: ", torToAggLinkDelay, " ms")
print("aggToCoreLinkDelay: ", aggToCoreLinkDelay, " ms")
print("pctPacedBack: ", pctPacedBack, " %")
print("appDataRate: ", appDataRate, " Mbps")
print("duration: ", duration, " s")
print("steadyStart: ", steadyStart, " s")
print("steadyEnd: ", steadyEnd, " s")
print("sampleRate", sampleRate)
print("experiments: ", experiments)

hostToTorLinkRate:  50.0  Mbps
torToAggLinkRate:  50.0  Mbps
aggToCoreLinkRate:  50.0  Mbps
hostToTorLinkDelay:  3.0  ms
torToAggLinkDelay:  3.0  ms
aggToCoreLinkDelay:  3.0  ms
pctPacedBack:  0.8  %
appDataRate:  50.0  Mbps
duration:  10.0  s
steadyStart:  2.0  s
steadyEnd:  9.0  s
sampleRate 1000.0
experiments:  2


# Reading the Groundtruth

In [68]:
file_paths = glob.glob('{}/scratch/Results/50Mbps/0/*_EndToEnd.csv'.format(__ns3_path))
endToEnd_dfs = {}
apps = []
print(file_paths)
for file_path in file_paths:
    df_name = file_path.split('/')[-1].split('_')[0]
    df = pd.read_csv(file_path)
    df = df[df['IsReceived'] == 1]
    df = df.reset_index(drop=True)
    # df['EndToEndDelay'] = (df['ReceiveTime'] - df['SentTime'])
    # keep the packets their sent time is after 1s
    df = df[df['SentTime'] > steadyStart * 1000000000]
    df = df[df['SentTime'] < steadyEnd * 1000000000]
    # df = df.drop(columns=['IsReceived', 'ReceiveTime', 'SentTime'])
    df = df.drop(columns=['IsReceived'])
    print(len(df))
    endToEnd_dfs[df_name] = df

['/home/mahdi/Documents/NAL/ns-allinone-3.41/ns-3.41/scratch/Results/50Mbps/0/R0h1R1h1_EndToEnd.csv', '/home/mahdi/Documents/NAL/ns-allinone-3.41/ns-3.41/scratch/Results/50Mbps/0/R0h0R1h0_EndToEnd.csv']
16637
19090


In [69]:
file_paths = glob.glob('{}/scratch/Results/50Mbps/0/*_Switch.csv'.format(__ns3_path))
switch_dfs = {}

for file_path in file_paths:
    df_name = file_path.split('/')[-1].split('_')[0]
    df = pd.read_csv(file_path)
    df = df[df['IsSent'] == 1]
    df = df.reset_index(drop=True)
    # df['SegmentDelay'] = (df['SentTime'] - df['ReceiveTime'])
    # keep the packets their sent time is after 1s
    df = df[df['ReceiveTime'] > steadyStart * 1000000000]
    df = df[df['ReceiveTime'] < steadyEnd * 1000000000]
    # drop IsReceived, SourcePort, DestinationPort, SequenceNb, ReceiveTime, SentTime
    df = df.drop(columns=['IsSent'])
    print(len(df))
    switch_dfs[df_name] = df

switch_dfs[list(switch_dfs.keys())[0]].head()

35771
35725


Unnamed: 0,SourceIp,SourcePort,DestinationIp,DestinationPort,SequenceNb,PayloadSize,ReceiveTime,SentTime
2981,10.1.2.1,49200,10.2.2.1,7419,144156,1448,8999595066,8999595066
2982,10.1.1.1,49538,10.2.1.1,4241,766,14,8999354746,8999468026
2984,10.1.2.1,49222,10.2.2.1,6164,636086,666,8999343866,8999343866
2985,10.1.1.1,49176,10.2.1.1,4502,187604,1442,8999176826,8999176826
2986,10.1.2.1,49440,10.2.2.1,7189,24042,359,8998456826,8998456826


# Reading the Samples

In [70]:
file_paths = glob.glob('{}/scratch/Results/50Mbps/0/*_PoissonSampler.csv'.format(__ns3_path))
samples_dfs = {}

for file_path in file_paths:
    df_name = file_path.split('/')[-1].split('_')[0]
    df = pd.read_csv(file_path)
    df = df[df['IsDeparted'] == 1]
    df = df.reset_index(drop=True)
    # keep the packets their sent time is after 1s
    df = df[df['SampleTime'] > steadyStart * 1000000000]
    df = df[df['SampleTime'] < steadyEnd * 1000000000]
    df = df.drop(columns=['IsDeparted'])
    print(len(df))
    samples_dfs[df_name] = df

samples_dfs[list(samples_dfs.keys())[0]].head()

6979
6937
7027


Unnamed: 0,SourceIp,SourcePort,DestinationIp,DestinationPort,SequenceNb,PayloadSize,SampleTime,DepartTime
393,10.1.2.1,49668,10.2.2.1,6825,1,1448,8996294801,9047092826
394,10.1.2.1,49663,10.2.2.1,6807,34753,1448,8992706890,9042878586
395,10.1.2.1,49200,10.2.2.1,7419,148500,411,8987852931,9036949306
396,10.1.2.1,49239,10.2.2.1,7269,700027,1448,8987453506,9036708986
397,10.1.2.1,49243,10.2.2.1,7287,713454,1448,8987191125,9036468666


# Intermediate links groundtruth statistics

In [71]:
interLinks_statistics = {}
for flow in endToEnd_dfs.keys():
    interLinks_statistics[flow] = {}
    interLinks_statistics[flow][('source', 'T0')] = get_statistics(intermediateLink_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), endToEnd_dfs[flow], switch_dfs['T0']))
    interLinks_statistics[flow][('T0', 'T1')] = get_statistics(intermediateLink_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T0'], switch_dfs['T1']))
    interLinks_statistics[flow][('T1', 'dest')] = get_statistics(intermediateLink_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T1'], endToEnd_dfs[flow]))

interLinks_statistics

{'R0h1R1h1': {('source', 'T0'): {'DelayMean': 3414553.4708924703,
   'DelayStd': 473713.91394338995,
   'sampleSize': 16628},
  ('T0', 'T1'): {'DelayMean': 3196272.830577261,
   'DelayStd': 79506.29668074592,
   'sampleSize': 16509},
  ('T1', 'dest'): {'DelayMean': 3196272.830577261,
   'DelayStd': 79506.29668074592,
   'sampleSize': 16509}},
 'R0h0R1h0': {('source', 'T0'): {'DelayMean': 3477290.1963527747,
   'DelayStd': 559034.3420664251,
   'sampleSize': 19083},
  ('T0', 'T1'): {'DelayMean': 3195216.6485229614,
   'DelayStd': 82632.2947406187,
   'sampleSize': 18923},
  ('T1', 'dest'): {'DelayMean': 3195216.6485229614,
   'DelayStd': 82632.2947406187,
   'sampleSize': 18923}}}

# Switches statistics

In [72]:
samples_statistics = {}
samples_statistics['Overall'] = {}
samples_statistics['PerTrafficStream'] = {}

for flow in endToEnd_dfs.keys():
    samples_statistics['Overall'][flow] = {}
    samples_statistics['Overall'][flow]['T0'] = get_statistics(get_switch_samples_delays(switch_dfs['T0'], samples_dfs['T0T1']))
    samples_statistics['Overall'][flow]['T1'] = get_statistics(get_switch_samples_delays(switch_dfs['T1'], samples_dfs['T1.R' + flow.split('R')[-1]]))

    samples_statistics['PerTrafficStream'][flow] = {}
    samples_statistics['PerTrafficStream'][flow]['T0'] = get_statistics(switch_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T0'], True))
    samples_statistics['PerTrafficStream'][flow]['T1'] = get_statistics(switch_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T1'], True))

samples_statistics

{'Overall': {'R0h1R1h1': {'T0': {'DelayMean': 54537980.1488752,
    'DelayStd': 13694480.42715089,
    'sampleSize': 6979},
   'T1': {'DelayMean': 10431.808737725914,
    'DelayStd': 45075.75800796941,
    'sampleSize': 7027}},
  'R0h0R1h0': {'T0': {'DelayMean': 54537980.1488752,
    'DelayStd': 13694480.42715089,
    'sampleSize': 6979},
   'T1': {'DelayMean': 14348.865503820096,
    'DelayStd': 52920.105069117715,
    'sampleSize': 6937}}},
 'PerTrafficStream': {'R0h1R1h1': {'T0': {'DelayMean': 52573587.28484848,
    'DelayStd': 13835437.17511221,
    'sampleSize': 165},
   'T1': {'DelayMean': 18635.121951219513,
    'DelayStd': 55580.641689889504,
    'sampleSize': 164}},
  'R0h0R1h0': {'T0': {'DelayMean': 54380686.55789474,
    'DelayStd': 13006878.360653859,
    'sampleSize': 190},
   'T1': {'DelayMean': 27461.27659574468,
    'DelayStd': 65397.47181876928,
    'sampleSize': 188}}}}

In [73]:
groundtruth_statistics = {}
groundtruth_statistics['Overall'] = {}
groundtruth_statistics['PerTrafficStream'] = {}

for flow in endToEnd_dfs.keys():
    groundtruth_statistics['Overall'][flow] = {}
    groundtruth_statistics['Overall'][flow]['T0'] = get_statistics(switch_dfs['T0'])
    groundtruth_statistics['Overall'][flow]['T1'] = get_statistics(switch_dfs['T1'])

    groundtruth_statistics['PerTrafficStream'][flow] = {}
    groundtruth_statistics['PerTrafficStream'][flow]['T0'] = get_statistics(switch_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T0'], False))
    groundtruth_statistics['PerTrafficStream'][flow]['T1'] = get_statistics(switch_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T1'], False))

groundtruth_statistics

{'Overall': {'R0h1R1h1': {'T0': {'DelayMean': 54234348.80285514,
    'DelayStd': 13609224.596293403,
    'sampleSize': 35725},
   'T1': {'DelayMean': 28275.060803444132,
    'DelayStd': 68902.48939861961,
    'sampleSize': 35771}},
  'R0h0R1h0': {'T0': {'DelayMean': 54234348.80285514,
    'DelayStd': 13609224.596293403,
    'sampleSize': 35725},
   'T1': {'DelayMean': 28275.060803444132,
    'DelayStd': 68902.48939861961,
    'sampleSize': 35771}}},
 'PerTrafficStream': {'R0h1R1h1': {'T0': {'DelayMean': 54164268.459766656,
    'DelayStd': 13546179.823522119,
    'sampleSize': 16628},
   'T1': {'DelayMean': 26428.0186564904,
    'DelayStd': 66447.9473846724,
    'sampleSize': 16509}},
  'R0h0R1h0': {'T0': {'DelayMean': 54291891.6870513,
    'DelayStd': 13668349.575525953,
    'sampleSize': 19083},
   'T1': {'DelayMean': 29883.64212862654,
    'DelayStd': 70970.36885575454,
    'sampleSize': 18923}}}}

In [74]:
### Anova Test for the delays of the segments
# temp = {}
# for flow in endToEnd_dfs.keys():
#     temp[flow] = {}
#     temp[flow]['T0'] = switch_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T0'])
#     temp[flow]['T0']['Delay'] = abs(temp[flow]['T0']['ReceiveTime'] - temp[flow]['T0']['SentTime'])

# # test if for T0, the delays of all flow are from the same distribution with ANova test
# from scipy.stats import f_oneway
# f_oneway(*[temp[flow]['T0']['Delay'] for flow in endToEnd_dfs.keys()])

In [75]:
# plot the delay distribution of SWitch T0 and Sample T0
# fig, ax = plt.subplots(1, 2)
# l_df = get_switch_samples_delays(switch_dfs['T0'], samples_dfs['T0T1'])
# # l2_df = switch_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T0'])
# l2_df = switch_dfs['T0']
# sns.histplot(l_df['SentTime'] - l_df['ReceiveTime'], ax=ax[0])
# sns.histplot(l2_df['SentTime'] - l2_df['ReceiveTime'], ax=ax[1])
# ax[0].set_title('Sample T0')
# ax[1].set_title('Switch T0')
# plt.show()

In [76]:
# # plot the delay distribution of SWitch T0 for all flows and the aggregated delay distribution
# fig, ax = plt.subplots()
# for flow in endToEnd_dfs.keys():
#     l_df = switch_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T0'])
#     sns.histplot(l_df['SentTime'] - l_df['ReceiveTime'], label=flow)
#     ax.set_title('Switch T0')
# # l_df = switch_dfs['T0']
# # sns.histplot(l_df['SentTime'] - l_df['ReceiveTime'], ax=ax[1])
# # ax[1].set_title('Switch T0')
# plt.show()

# Groundtruth delay mean and std

In [77]:
# calculate the mean and std of thet delay of each flow
endToEnd_statistics = {}
for flow in endToEnd_dfs.keys():
    # endToEnd_statistics[flow] = get_statistics(sample_data(endToEnd_dfs[flow], 'SentTime'))
    endToEnd_statistics[flow] = get_statistics(endToEnd_dfs[flow])

print(endToEnd_statistics)

{'R0h1R1h1': {'DelayMean': 63997046.37729158, 'DelayStd': 13563011.057140183, 'sampleSize': 16637}, 'R0h0R1h0': {'DelayMean': 64189111.16914615, 'DelayStd': 13692861.639190746, 'sampleSize': 19090}}


# End to End and Persegment Compatibility Check

In [78]:
results = {}
results['Overall'] = {}
results['PerTrafficStream'] = {}
results['Overall']['groundtruth'] = check_all_delayConsistency(endToEnd_statistics, groundtruth_statistics['Overall'], interLinks_statistics, confidenceValue)
results['Overall']['samples'] = check_all_delayConsistency(endToEnd_statistics, samples_statistics['Overall'], interLinks_statistics, confidenceValue)
results['PerTrafficStream']['groundtruth'] = check_all_delayConsistency(endToEnd_statistics, groundtruth_statistics['PerTrafficStream'], interLinks_statistics, confidenceValue)
results['PerTrafficStream']['samples'] = check_all_delayConsistency(endToEnd_statistics, samples_statistics['PerTrafficStream'], interLinks_statistics, confidenceValue)

results

{'Overall': {'groundtruth': {'DominantAssumption': {'R0h1R1h1': True,
    'R0h0R1h0': True},
   'General': {'R0h1R1h1': True, 'R0h0R1h0': True}},
  'samples': {'DominantAssumption': {'R0h1R1h1': False, 'R0h0R1h0': True},
   'General': {'R0h1R1h1': True, 'R0h0R1h0': True}}},
 'PerTrafficStream': {'groundtruth': {'DominantAssumption': {'R0h1R1h1': True,
    'R0h0R1h0': True},
   'General': {'R0h1R1h1': True, 'R0h0R1h0': True}},
  'samples': {'DominantAssumption': {'R0h1R1h1': True, 'R0h0R1h0': True},
   'General': {'R0h1R1h1': True, 'R0h0R1h0': True}}}}

# Repeat sampling to check if the relation holds more than 95% of the time

In [82]:
rounds_results = {}
rounds_results['Overall'] = {}
rounds_results['PerTrafficStream'] = {}
rounds_results['Overall']['groundtruth'] = {}
rounds_results['Overall']['samples'] = {}
rounds_results['PerTrafficStream']['groundtruth'] = {}
rounds_results['PerTrafficStream']['samples'] = {}
rounds_results['Overall']['groundtruth']['DominantAssumption'] = {}
rounds_results['Overall']['groundtruth']['General'] = {}
rounds_results['Overall']['samples']['DominantAssumption'] = {}
rounds_results['Overall']['samples']['General'] = {}
rounds_results['PerTrafficStream']['groundtruth']['DominantAssumption'] = {}
rounds_results['PerTrafficStream']['groundtruth']['General'] = {}
rounds_results['PerTrafficStream']['samples']['DominantAssumption'] = {}
rounds_results['PerTrafficStream']['samples']['General'] = {}
for flow in endToEnd_dfs.keys():
    rounds_results['Overall']['groundtruth']['DominantAssumption'][flow] = 0
    rounds_results['Overall']['groundtruth']['General'][flow] = 0
    rounds_results['Overall']['samples']['DominantAssumption'][flow] = 0
    rounds_results['Overall']['samples']['General'][flow] = 0
    rounds_results['PerTrafficStream']['groundtruth']['DominantAssumption'][flow] = 0
    rounds_results['PerTrafficStream']['groundtruth']['General'][flow] = 0
    rounds_results['PerTrafficStream']['samples']['DominantAssumption'][flow] = 0
    rounds_results['PerTrafficStream']['samples']['General'][flow] = 0

for experiment in range(experiments):
# for experiment in range(1):
    # Reading the Groundtruth
    file_paths = glob.glob('{}/scratch/Results/50Mbps/{}/*_EndToEnd.csv'.format(__ns3_path, experiment))
    endToEnd_dfs = {}
    for file_path in file_paths:
        df_name = file_path.split('/')[-1].split('_')[0]
        df = pd.read_csv(file_path)
        df = df[df['IsReceived'] == 1]
        df = df.reset_index(drop=True)
        df = df[df['SentTime'] > steadyStart * 1000000000]
        df = df[df['SentTime'] < steadyEnd * 1000000000]
        df = df.drop(columns=['IsReceived'])
        endToEnd_dfs[df_name] = df
    
    file_paths = glob.glob('{}/scratch/Results/50Mbps/{}/*_Switch.csv'.format(__ns3_path, experiment))
    switch_dfs = {}
    for file_path in file_paths:
        df_name = file_path.split('/')[-1].split('_')[0]
        df = pd.read_csv(file_path)
        df = df[df['IsSent'] == 1]
        df = df.reset_index(drop=True)
        df = df[df['ReceiveTime'] > steadyStart * 1000000000]
        df = df[df['ReceiveTime'] < steadyEnd * 1000000000]
        df = df.drop(columns=['IsSent'])
        switch_dfs[df_name] = df

    # Reading the Samples
    file_paths = glob.glob('{}/scratch/Results/50Mbps/{}/*_PoissonSampler.csv'.format(__ns3_path, experiment))
    samples_dfs = {}
    for file_path in file_paths:
        df_name = file_path.split('/')[-1].split('_')[0]
        df = pd.read_csv(file_path)
        df = df[df['IsDeparted'] == 1]
        df = df.reset_index(drop=True)
        df = df[df['SampleTime'] > steadyStart * 1000000000]
        df = df[df['SampleTime'] < steadyEnd * 1000000000]
        df = df.drop(columns=['IsDeparted'])
        samples_dfs[df_name] = df

    # Intermediate links groundtruth statistics
    interLinks_statistics = {}
    for flow in endToEnd_dfs.keys():
        interLinks_statistics[flow] = {}
        interLinks_statistics[flow][('source', 'T0')] = get_statistics(intermediateLink_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), endToEnd_dfs[flow], switch_dfs['T0']))
        interLinks_statistics[flow][('T0', 'T1')] = get_statistics(intermediateLink_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T0'], switch_dfs['T1']))
        interLinks_statistics[flow][('T1', 'dest')] = get_statistics(intermediateLink_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T1'], endToEnd_dfs[flow]))


    # samples switches statistics
    samples_statistics = {}
    samples_statistics['Overall'] = {}
    samples_statistics['PerTrafficStream'] = {}

    for flow in endToEnd_dfs.keys():
        samples_statistics['Overall'][flow] = {}
        samples_statistics['Overall'][flow]['T0'] = get_statistics(get_switch_samples_delays(switch_dfs['T0'], samples_dfs['T0T1']))
        samples_statistics['Overall'][flow]['T1'] = get_statistics(get_switch_samples_delays(switch_dfs['T1'], samples_dfs['T1.R' + flow.split('R')[-1]]))

        samples_statistics['PerTrafficStream'][flow] = {}
        samples_statistics['PerTrafficStream'][flow]['T0'] = get_statistics(switch_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T0'], True))
        samples_statistics['PerTrafficStream'][flow]['T1'] = get_statistics(switch_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T1'], True))

    # groundtruth switches statistics
    groundtruth_statistics = {}
    groundtruth_statistics['Overall'] = {}
    groundtruth_statistics['PerTrafficStream'] = {}

    for flow in endToEnd_dfs.keys():
        groundtruth_statistics['Overall'][flow] = {}
        groundtruth_statistics['Overall'][flow]['T0'] = get_statistics(switch_dfs['T0'])
        groundtruth_statistics['Overall'][flow]['T1'] = get_statistics(switch_dfs['T1'])

        groundtruth_statistics['PerTrafficStream'][flow] = {}
        groundtruth_statistics['PerTrafficStream'][flow]['T0'] = get_statistics(switch_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T0'], False))
        groundtruth_statistics['PerTrafficStream'][flow]['T1'] = get_statistics(switch_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T1'], False))

    # endToEnd_statistics
    endToEnd_statistics = {}
    for flow in endToEnd_dfs.keys():
        endToEnd_statistics[flow] = get_statistics(endToEnd_dfs[flow])

    # End to End and Persegment Compatibility Check
    results = {}
    results['Overall'] = {}
    results['PerTrafficStream'] = {}
    results['Overall']['groundtruth'] = check_all_delayConsistency(endToEnd_statistics, groundtruth_statistics['Overall'], interLinks_statistics, confidenceValue)
    results['Overall']['samples'] = check_all_delayConsistency(endToEnd_statistics, samples_statistics['Overall'], interLinks_statistics, confidenceValue)
    results['PerTrafficStream']['groundtruth'] = check_all_delayConsistency(endToEnd_statistics, groundtruth_statistics['PerTrafficStream'], interLinks_statistics, confidenceValue)
    results['PerTrafficStream']['samples'] = check_all_delayConsistency(endToEnd_statistics, samples_statistics['PerTrafficStream'], interLinks_statistics, confidenceValue)

    for flow in endToEnd_dfs.keys():
        if results['Overall']['groundtruth']['DominantAssumption'][flow]:
            rounds_results['Overall']['groundtruth']['DominantAssumption'][flow] += 1
        if results['Overall']['groundtruth']['General'][flow]:
            rounds_results['Overall']['groundtruth']['General'][flow] += 1
        if results['Overall']['samples']['DominantAssumption'][flow]:
            rounds_results['Overall']['samples']['DominantAssumption'][flow] += 1
        if results['Overall']['samples']['General'][flow]:
            rounds_results['Overall']['samples']['General'][flow] += 1
        if results['PerTrafficStream']['groundtruth']['DominantAssumption'][flow]:
            rounds_results['PerTrafficStream']['groundtruth']['DominantAssumption'][flow] += 1
        if results['PerTrafficStream']['groundtruth']['General'][flow]:
            rounds_results['PerTrafficStream']['groundtruth']['General'][flow] += 1
        if results['PerTrafficStream']['samples']['DominantAssumption'][flow]:
            rounds_results['PerTrafficStream']['samples']['DominantAssumption'][flow] += 1
        if results['PerTrafficStream']['samples']['General'][flow]:
            rounds_results['PerTrafficStream']['samples']['General'][flow] += 1

# rounds_results
# convert the results to jason and save it in results.json
import json
with open('results/results.json', 'w') as f:
    json.dump(rounds_results, f)
print(rounds_results)