In [10]:
import pandas as pd
import glob
import configparser
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import anderson

plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (40, 20)
plt.rcParams.update({
    "lines.color": "black",
    "patch.edgecolor": "black",
    "text.color": "black",
    "axes.facecolor": "white",
    "axes.edgecolor": "black",
    "axes.labelcolor": "black",
    "xtick.color": "black",
    "ytick.color": "black",
    "grid.color": "gray",
    "figure.facecolor": "white",
    "figure.edgecolor": "white",
    "savefig.facecolor": "white",
    "savefig.edgecolor": "white",
    "font.size": 30,
    "xtick.labelsize":30,
    "ytick.labelsize":30,
    "lines.linewidth":1.,
    "legend.fontsize": 10,
    })

__ns3_path = os.popen('locate "ns-3.41" | grep /ns-3.41$').read().splitlines()[0]
sample_rate = 0.01
confidenceValue = 1.96 # 95% confidence interval
poisson_sample_rate = 10

In [11]:
# convert strings like "2Mbps" to float
def convert_to_float(x):
    if 'Mbps' in x:
        return float(x[:-4])
    elif 'Kbps' in x:
        return float(x[:-4]) / 1000
    elif 'Gbps' in x:
        return float(x[:-4]) * 1000
    elif 'ms' in x:
        return float(x[:-2])
    elif 'us' in x:
        return float(x[:-2]) / 1000
    else:
        return float(x)
    
def sample_data(data, sample_column):
    exit = False
    while not exit:
        # option 1: sample data with a fixed rate
        # data_copy = data.sample(frac=sample_rate).sort_values(by=[sample_column])
        
        # option 2: sample data with a poisson process. Pick the closest packet to the arrival time
        # interArrivals = np.random.exponential(1/poisson_sample_rate, int(duration * poisson_sample_rate)) * 1000000000
        # interArrivals = np.cumsum(interArrivals)
        # interArrivals  = interArrivals + steadyStart * 1000000000
        # interArrivals = interArrivals[interArrivals > steadyStart * 1000000000]
        # interArrivals = interArrivals[interArrivals < steadyEnd * 1000000000]
        # data_copy = pd.DataFrame()
        # for i in range(len(interArrivals)):
        #     data_copy = pd.concat([data_copy, data.iloc[(data[sample_column] - interArrivals[i]).abs().argsort()[:1]]])

        # option 3: sample data with a poisson process. Pick the packets based on the exp distribution not the arrival time
        exps = np.random.exponential(1/poisson_sample_rate, len(data))
        c = np.abs(exps - 1/poisson_sample_rate) / (1/poisson_sample_rate) < 0.01
        data_copy = data.copy()
        data_copy['IsSample'] = c
        data_copy = data_copy[data_copy['IsSample'] == True]
        data_copy = data_copy.sort_values(by=[sample_column])


        data_copy['InterArrivalTime'] = data_copy[sample_column].diff()
        data_copy = data_copy.dropna().reset_index(drop=True)
        anderson_statistic, anderson_critical_values, _ = anderson(data_copy['InterArrivalTime'], 'expon')
        if anderson_statistic < anderson_critical_values[2]:
            # print('Anderson-Darling test passed')
            exit = True
    return data_copy.drop(columns=['InterArrivalTime'])


def switch_data(flowIndicatorDf, switchDf):
    l_df = flowIndicatorDf.copy()
    l_df = pd.merge(l_df, switchDf, on=['SourceIp', 'SourcePort', 'DestinationIp', 'DestinationPort', 'PayloadSize', 'SequenceNb'], how='inner')
    l_df = sample_data(l_df, 'SentTime')
    return l_df

def intermediateLink_data(flowIndicatorDf, source, dest):
    l_df = flowIndicatorDf.copy()
    l_df = pd.merge(l_df, source.drop(columns=['ReceiveTime']), on=['SourceIp', 'SourcePort', 'DestinationIp', 'DestinationPort', 'PayloadSize', 'SequenceNb'], how='inner')
    l_df = pd.merge(l_df, dest.drop(columns=['SentTime']), on=['SourceIp', 'SourcePort', 'DestinationIp', 'DestinationPort', 'PayloadSize', 'SequenceNb'], how='inner')
    l_df = sample_data(l_df, 'SentTime')
    return l_df

def get_delayMean(data):
    data['Delay'] = abs(data['ReceiveTime'] - data['SentTime'])
    return data['Delay'].mean()

def get_delayStd(data):
    data['Delay'] = abs(data['ReceiveTime'] - data['SentTime'])
    return data['Delay'].std()

def get_statistics(data):
    statistics = {}
    statistics['DelayMean'] = get_delayMean(data)
    statistics['DelayStd'] = get_delayStd(data)
    statistics['sampleSize'] = len(data)
    return statistics

In [12]:
def ECNMC(endToEnd_delayMean, sumOfSegments_DelayMeans, endToEnd_delayStd, MinSampleSize, confidenceValue):
    if abs(endToEnd_delayMean - sumOfSegments_DelayMeans) <= confidenceValue * (endToEnd_delayStd / np.sqrt(MinSampleSize)):
        return True
    else:  
        return False


def check_single_delayConsistency(endToEnd_statistics, switches_statistics, interLinks_statistics, confidenceValue):
    switches_delayMeans = [value['DelayMean'] for value in switches_statistics.values()]
    interLinks_delaymeans = [value['DelayMean'] for value in interLinks_statistics.values()]
    switches_sampleSizes = [value['sampleSize'] for value in switches_statistics.values()]
    interLinks_sampleSizes = [value['sampleSize'] for value in interLinks_statistics.values()]
    MinSampleSize = min(switches_sampleSizes + interLinks_sampleSizes)
    sumOfSegmentsDelayMeans = sum(switches_delayMeans) + sum(interLinks_delaymeans)

    return ECNMC(endToEnd_statistics['DelayMean'], sumOfSegmentsDelayMeans, endToEnd_statistics['DelayStd'], MinSampleSize, confidenceValue)
        



def check_all_delayConsistency(endToEnd_statistics, switches_statistics, interLinks_statistics, confidenceValue):
    res = {}
    for flow in endToEnd_statistics.keys():
        # print("Flow: {}\n Result: {}".format(flow, 
        #                                               check_single_delayConsistency(endToEnd_statistics[flow], switches_statistics[flow], interLinks_statistics[flow], confidenceValue)))
        res[flow] = check_single_delayConsistency(endToEnd_statistics[flow], switches_statistics[flow], interLinks_statistics[flow], confidenceValue)
    return res

In [13]:
class AppKey:
    def __init__(self, sourceIp, sourcePort, destIp, destPort):
        self.sourceIp = sourceIp
        self.sourcePort = sourcePort
        self.destIp = destIp
        self.destPort = destPort

In [14]:
config = configparser.ConfigParser()
config.read('Parameters.config')
hostToTorLinkRate = convert_to_float(config.get('Settings', 'hostToTorLinkRate'))
torToAggLinkRate = convert_to_float(config.get('Settings', 'torToAggLinkRate'))
aggToCoreLinkRate = convert_to_float(config.get('Settings', 'aggToCoreLinkRate'))
hostToTorLinkDelay = convert_to_float(config.get('Settings', 'hostToTorLinkDelay'))
torToAggLinkDelay = convert_to_float(config.get('Settings', 'torToAggLinkDelay'))
aggToCoreLinkDelay = convert_to_float(config.get('Settings', 'aggToCoreLinkDelay'))
pctPacedBack = convert_to_float(config.get('Settings', 'pctPacedBack'))
appDataRate = convert_to_float(config.get('Settings', 'appDataRate'))
duration = convert_to_float(config.get('Settings', 'duration'))
steadyStart = convert_to_float(config.get('Settings', 'steadyStart'))
steadyEnd = convert_to_float(config.get('Settings', 'steadyEnd'))

print("hostToTorLinkRate: ", hostToTorLinkRate, " Mbps")
print("torToAggLinkRate: ", torToAggLinkRate, " Mbps")
print("aggToCoreLinkRate: ", aggToCoreLinkRate, " Mbps")
print("hostToTorLinkDelay: ", hostToTorLinkDelay, " ms")
print("torToAggLinkDelay: ", torToAggLinkDelay, " ms")
print("aggToCoreLinkDelay: ", aggToCoreLinkDelay, " ms")
print("pctPacedBack: ", pctPacedBack, " %")
print("appDataRate: ", appDataRate, " Mbps")
print("duration: ", duration, " s")
print("steadyStart: ", steadyStart, " s")
print("steadyEnd: ", steadyEnd, " s")

hostToTorLinkRate:  50.0  Mbps
torToAggLinkRate:  50.0  Mbps
aggToCoreLinkRate:  50.0  Mbps
hostToTorLinkDelay:  3.0  ms
torToAggLinkDelay:  3.0  ms
aggToCoreLinkDelay:  3.0  ms
pctPacedBack:  0.8  %
appDataRate:  50.0  Mbps
duration:  60.0  s
steadyStart:  5.0  s
steadyEnd:  55.0  s


# Reading the Groundtruth

In [15]:
file_paths = glob.glob('{}/scratch/Results/*_EndToEnd.csv'.format(__ns3_path))
endToEnd_dfs = {}
apps = []
print(file_paths)
for file_path in file_paths:
    df_name = file_path.split('/')[-1].split('_')[0]
    df = pd.read_csv(file_path)
    df = df[df['IsReceived'] == 1]
    df = df.reset_index(drop=True)
    # df['EndToEndDelay'] = (df['ReceiveTime'] - df['SentTime'])
    # keep the packets their sent time is after 1s
    df = df[df['SentTime'] > steadyStart * 1000000000]
    df = df[df['SentTime'] < steadyEnd * 1000000000]
    # df = df.drop(columns=['IsReceived', 'ReceiveTime', 'SentTime'])
    df = df.drop(columns=['IsReceived'])
    print(len(df))
    endToEnd_dfs[df_name] = df

['/home/mahdi/Documents/ns-allinone-3.41/ns-3.41/scratch/Results/R0h1R1h1_EndToEnd.csv', '/home/mahdi/Documents/ns-allinone-3.41/ns-3.41/scratch/Results/R0h0R1h0_EndToEnd.csv']
94134
162535


In [16]:
file_paths = glob.glob('{}/scratch/Results/*_Switch.csv'.format(__ns3_path))
switch_dfs = {}

for file_path in file_paths:
    df_name = file_path.split('/')[-1].split('_')[0]
    df = pd.read_csv(file_path)
    df = df[df['IsSent'] == 1]
    df = df.reset_index(drop=True)
    # df['SegmentDelay'] = (df['SentTime'] - df['ReceiveTime'])
    # keep the packets their sent time is after 1s
    df = df[df['ReceiveTime'] > steadyStart * 1000000000]
    df = df[df['ReceiveTime'] < steadyEnd * 1000000000]
    # drop IsReceived, SourcePort, DestinationPort, SequenceNb, ReceiveTime, SentTime
    df = df.drop(columns=['IsSent'])
    print(len(df))
    switch_dfs[df_name] = df

switch_dfs[list(switch_dfs.keys())[0]].head()

256667
256621


Unnamed: 0,SourceIp,SourcePort,DestinationIp,DestinationPort,SequenceNb,PayloadSize,ReceiveTime,SentTime
12891,10.1.1.1,49896,10.2.1.1,4988,64909,1448,54999918353,55050882961
12892,10.1.1.1,50329,10.2.1.1,4752,191151,1448,54999678033,55050642641
12893,10.1.1.1,50331,10.2.1.1,4698,23079,1448,54999183473,55050148081
12894,10.1.1.1,50036,10.2.1.1,4381,1402445,1448,54998703793,55049499761
12895,10.1.2.1,49153,10.2.2.1,50000,115994001,1000,54998665401,55049331121


# Intermediate links statistics

In [17]:
interLinks_statistics = {}
for flow in endToEnd_dfs.keys():
    interLinks_statistics[flow] = {}
    interLinks_statistics[flow][('source', 'T0')] = get_statistics(intermediateLink_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), endToEnd_dfs[flow], switch_dfs['T0']))
    interLinks_statistics[flow][('T0', 'T1')] = get_statistics(intermediateLink_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T0'], switch_dfs['T1']))
    interLinks_statistics[flow][('T1', 'dest')] = get_statistics(intermediateLink_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T1'], endToEnd_dfs[flow]))

interLinks_statistics

{'R0h1R1h1': {('source', 'T0'): {'DelayMean': 3234728.4131736527,
   'DelayStd': 168688.35526223027,
   'sampleSize': 668},
  ('T0', 'T1'): {'DelayMean': 3184070.3511450384,
   'DelayStd': 30787.04290335104,
   'sampleSize': 655},
  ('T1', 'dest'): {'DelayMean': 3185205.1355206845,
   'DelayStd': 33021.96589960182,
   'sampleSize': 701}},
 'R0h0R1h0': {('source', 'T0'): {'DelayMean': 4817721.561739131,
   'DelayStd': 3971698.9390566046,
   'sampleSize': 1150},
  ('T0', 'T1'): {'DelayMean': 3198060.199335548,
   'DelayStd': 78401.92476448856,
   'sampleSize': 1204},
  ('T1', 'dest'): {'DelayMean': 3203143.5215946846,
   'DelayStd': 73652.43381161569,
   'sampleSize': 1204}}}

# Switches statistics

In [18]:
switches_statistics = {}
for flow in endToEnd_dfs.keys():
    switches_statistics[flow] = {}
    switches_statistics[flow]['T0'] = get_statistics(switch_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T0']))
    switches_statistics[flow]['T1'] = get_statistics(switch_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T1']))

switches_statistics

{'R0h1R1h1': {'T0': {'DelayMean': 36380699.03930131,
   'DelayStd': 14563740.661017666,
   'sampleSize': 687},
  'T1': {'DelayMean': 740.0589970501475,
   'DelayStd': 9905.234818315397,
   'sampleSize': 678}},
 'R0h0R1h0': {'T0': {'DelayMean': 41978400.392255895,
   'DelayStd': 15550406.450109836,
   'sampleSize': 1188},
  'T1': {'DelayMean': 29864.924623115578,
   'DelayStd': 69585.09808001792,
   'sampleSize': 1194}}}

In [142]:
# free the switch_dfs
# switch_dfs = None

# Groundtruth delay mean and std

In [19]:
# calculate the mean and std of thet delay of each flow
endToEnd_statistics = {}
for flow in endToEnd_dfs.keys():
    endToEnd_statistics[flow] = get_statistics(endToEnd_dfs[flow])

print(endToEnd_statistics)

{'R0h1R1h1': {'DelayMean': 45431745.190473154, 'DelayStd': 14634494.045921398, 'sampleSize': 94134}, 'R0h0R1h0': {'DelayMean': 54022334.55726459, 'DelayStd': 18523515.97545317, 'sampleSize': 162535}}


# End to End and Persegment Compatibility Check

In [20]:

check_all_delayConsistency(endToEnd_statistics, switches_statistics, interLinks_statistics, confidenceValue)

{'R0h1R1h1': True, 'R0h0R1h0': True}

# Repeat sampling to check if the relation holds more than 95% of the time

In [21]:
rounds_results = {}
rounds = 100
for flow in endToEnd_dfs.keys():
    rounds_results[flow] = 0

for i in range(rounds):
    # print("Round: ", i)
    interLinks_statistics = {}
    for flow in endToEnd_dfs.keys():
        interLinks_statistics[flow] = {}
        interLinks_statistics[flow][('source', 'T0')] = get_statistics(intermediateLink_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), endToEnd_dfs[flow], switch_dfs['T0']))
        interLinks_statistics[flow][('T0', 'T1')] = get_statistics(intermediateLink_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T0'], switch_dfs['T1']))
        interLinks_statistics[flow][('T1', 'dest')] = get_statistics(intermediateLink_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T1'], endToEnd_dfs[flow]))
    switches_statistics = {}

    for flow in endToEnd_dfs.keys():
        switches_statistics[flow] = {}
        switches_statistics[flow]['T0'] = get_statistics(switch_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T0']))
        switches_statistics[flow]['T1'] = get_statistics(switch_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T1']))
    # switch_dfs = None
    endToEnd_statistics = {}
    for flow in endToEnd_dfs.keys():
        endToEnd_statistics[flow] = get_statistics(endToEnd_dfs[flow])

    res = check_all_delayConsistency(endToEnd_statistics, switches_statistics, interLinks_statistics, confidenceValue)
    for flow in endToEnd_dfs.keys():
        rounds_results[flow] += res[flow]

    print("Round {} Ended".format(i))

for flow in endToEnd_dfs.keys():
    print("Flow: ", flow, " Consistency: ", rounds_results[flow] / rounds)
    

Round 0 Ended
Round 1 Ended
Round 2 Ended
Round 3 Ended
Round 4 Ended
Round 5 Ended
Round 6 Ended
Round 7 Ended
Round 8 Ended
Round 9 Ended
Round 10 Ended
Round 11 Ended
Round 12 Ended
Round 13 Ended
Round 14 Ended
Round 15 Ended
Round 16 Ended
Round 17 Ended
Round 18 Ended
Round 19 Ended
Round 20 Ended
Round 21 Ended
Round 22 Ended
Round 23 Ended
Round 24 Ended
Round 25 Ended
Round 26 Ended
Round 27 Ended
Round 28 Ended
Round 29 Ended
Round 30 Ended
Round 31 Ended
Round 32 Ended
Round 33 Ended
Round 34 Ended
Round 35 Ended
Round 36 Ended
Round 37 Ended
Round 38 Ended
Round 39 Ended
Round 40 Ended
Round 41 Ended
Round 42 Ended
Round 43 Ended
Round 44 Ended
Round 45 Ended
Round 46 Ended
Round 47 Ended
Round 48 Ended
Round 49 Ended
Round 50 Ended
Round 51 Ended
Round 52 Ended
Round 53 Ended
Round 54 Ended
Round 55 Ended
Round 56 Ended
Round 57 Ended
Round 58 Ended
Round 59 Ended
Round 60 Ended
Round 61 Ended
Round 62 Ended
Round 63 Ended
Round 64 Ended
Round 65 Ended
Round 66 Ended
Round