In [1]:
import pandas as pd
import glob
import configparser
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import anderson

plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (40, 20)
plt.rcParams.update({
    "lines.color": "black",
    "patch.edgecolor": "black",
    "text.color": "black",
    "axes.facecolor": "white",
    "axes.edgecolor": "black",
    "axes.labelcolor": "black",
    "xtick.color": "black",
    "ytick.color": "black",
    "grid.color": "gray",
    "figure.facecolor": "white",
    "figure.edgecolor": "white",
    "savefig.facecolor": "white",
    "savefig.edgecolor": "white",
    "font.size": 30,
    "xtick.labelsize":30,
    "ytick.labelsize":30,
    "lines.linewidth":1.,
    "legend.fontsize": 10,
    })

__ns3_path = os.popen('locate "ns-3-dev" | grep /ns-3-dev$').read().splitlines()[0]
sample_rate = 0.05
confidenceValue = 1.96 # 95% confidence interval



In [2]:
# convert strings like "2Mbps" to float
def convert_to_float(x):
    if 'Mbps' in x:
        return float(x[:-4])
    elif 'Kbps' in x:
        return float(x[:-4]) / 1000
    elif 'Gbps' in x:
        return float(x[:-4]) * 1000
    elif 'ms' in x:
        return float(x[:-2])
    elif 'us' in x:
        return float(x[:-2]) / 1000
    else:
        return float(x)
    
def sample_data(data, sample_rate, sample_column):
    exit = False
    while not exit:
        data_copy = data.sample(frac=sample_rate).sort_values(by=[sample_column])
        data_copy['InterArrivalTime'] = data_copy[sample_column].diff()
        data_copy = data_copy.dropna().reset_index(drop=True)
        anderson_statistic, anderson_critical_values, _ = anderson(data_copy['InterArrivalTime'], 'expon')
        if anderson_statistic < anderson_critical_values[2]:
            exit = True
    return data_copy.drop(columns=['InterArrivalTime'])

def switch_data(flowIndicatorDf, switchDf):
    l_df = flowIndicatorDf.copy()
    l_df = pd.merge(l_df, switchDf, on=['SourceIp', 'SourcePort', 'DestinationIp', 'DestinationPort', 'PayloadSize', 'SequenceNb'], how='inner')
    l_df = sample_data(l_df, sample_rate, 'SentTime')
    return l_df

def intermediateLink_data(flowIndicatorDf, source, dest):
    l_df = flowIndicatorDf.copy()
    l_df = pd.merge(l_df, source.drop(columns=['ReceiveTime']), on=['SourceIp', 'SourcePort', 'DestinationIp', 'DestinationPort', 'PayloadSize', 'SequenceNb'], how='inner')
    l_df = pd.merge(l_df, dest.drop(columns=['SentTime']), on=['SourceIp', 'SourcePort', 'DestinationIp', 'DestinationPort', 'PayloadSize', 'SequenceNb'], how='inner')
    l_df = sample_data(l_df, sample_rate, 'SentTime')
    return l_df

def get_delayMean(data):
    data['Delay'] = abs(data['ReceiveTime'] - data['SentTime'])
    return data['Delay'].mean()

def get_delayStd(data):
    data['Delay'] = abs(data['ReceiveTime'] - data['SentTime'])
    return data['Delay'].std()

def get_statistics(data):
    statistics = {}
    statistics['DelayMean'] = get_delayMean(data)
    statistics['DelayStd'] = get_delayStd(data)
    statistics['sampleSize'] = len(data)
    return statistics

In [3]:
def ECNMC(endToEnd_delayMean, sumOfSegments_DelayMeans, endToEnd_delayStd, MinSampleSize, confidenceValue):
    if abs(endToEnd_delayMean - sumOfSegments_DelayMeans) <= confidenceValue * (endToEnd_delayStd / np.sqrt(MinSampleSize)):
        return True
    else:  
        return False


def check_single_delayConsistency(endToEnd_statistics, switches_statistics, interLinks_statistics, confidenceValue):
    switches_delayMeans = [value['DelayMean'] for value in switches_statistics.values()]
    interLinks_delaymeans = [value['DelayMean'] for value in interLinks_statistics.values()]
    switches_sampleSizes = [value['sampleSize'] for value in switches_statistics.values()]
    interLinks_sampleSizes = [value['sampleSize'] for value in interLinks_statistics.values()]
    MinSampleSize = min(switches_sampleSizes + interLinks_sampleSizes)
    sumOfSegmentsDelayMeans = sum(switches_delayMeans) + sum(interLinks_delaymeans)

    return ECNMC(endToEnd_statistics['DelayMean'], sumOfSegmentsDelayMeans, endToEnd_statistics['DelayStd'], MinSampleSize, confidenceValue)
        



def check_all_delayConsistency(endToEnd_statistics, switches_statistics, interLinks_statistics, confidenceValue):
    for flow in endToEnd_statistics.keys():
        print("Flow: {}\n Result: {}".format(flow, 
                                                      check_single_delayConsistency(endToEnd_statistics[flow], switches_statistics[flow], interLinks_statistics[flow], confidenceValue)))

In [4]:
class AppKey:
    def __init__(self, sourceIp, sourcePort, destIp, destPort):
        self.sourceIp = sourceIp
        self.sourcePort = sourcePort
        self.destIp = destIp
        self.destPort = destPort

In [5]:
config = configparser.ConfigParser()
config.read('Parameters.config')
hostToTorLinkRate = convert_to_float(config.get('Settings', 'hostToTorLinkRate'))
torToAggLinkRate = convert_to_float(config.get('Settings', 'torToAggLinkRate'))
aggToCoreLinkRate = convert_to_float(config.get('Settings', 'aggToCoreLinkRate'))
hostToTorLinkDelay = convert_to_float(config.get('Settings', 'hostToTorLinkDelay'))
torToAggLinkDelay = convert_to_float(config.get('Settings', 'torToAggLinkDelay'))
aggToCoreLinkDelay = convert_to_float(config.get('Settings', 'aggToCoreLinkDelay'))
pctPacedBack = convert_to_float(config.get('Settings', 'pctPacedBack'))
appDataRate = convert_to_float(config.get('Settings', 'appDataRate'))
duration = convert_to_float(config.get('Settings', 'duration'))

print("hostToTorLinkRate: ", hostToTorLinkRate, " Mbps")
print("torToAggLinkRate: ", torToAggLinkRate, " Mbps")
print("aggToCoreLinkRate: ", aggToCoreLinkRate, " Mbps")
print("hostToTorLinkDelay: ", hostToTorLinkDelay, " ms")
print("torToAggLinkDelay: ", torToAggLinkDelay, " ms")
print("aggToCoreLinkDelay: ", aggToCoreLinkDelay, " ms")
print("pctPacedBack: ", pctPacedBack, " %")
print("appDataRate: ", appDataRate, " Mbps")
print("duration: ", duration, " s")

hostToTorLinkRate:  50.0  Mbps
torToAggLinkRate:  50.0  Mbps
aggToCoreLinkRate:  50.0  Mbps
hostToTorLinkDelay:  3.0  ms
torToAggLinkDelay:  3.0  ms
aggToCoreLinkDelay:  3.0  ms
pctPacedBack:  0.8  %
appDataRate:  50.0  Mbps
duration:  10.0  s


# Reading the Groundtruth

In [6]:
file_paths = glob.glob('{}/scratch/Results/*_EndToEnd.csv'.format(__ns3_path))
endToEnd_dfs = {}
apps = []
print(file_paths)
for file_path in file_paths:
    df_name = file_path.split('/')[-1].split('_')[0]
    df = pd.read_csv(file_path)
    df = df[df['IsReceived'] == 1]
    df = df.reset_index(drop=True)
    # df['EndToEndDelay'] = (df['ReceiveTime'] - df['SentTime'])
    # keep the packets their sent time is after 1s
    df = df[df['SentTime'] > 2000000000]
    df = df[df['SentTime'] < 9000000000]
    # df = df.drop(columns=['IsReceived', 'ReceiveTime', 'SentTime'])
    df = df.drop(columns=['IsReceived'])
    print(len(df))
    endToEnd_dfs[df_name] = df

['/home/mahdi/Documents/NAL/ns-3-dev/scratch/Results/R0h1R1h1_EndToEnd.csv', '/home/mahdi/Documents/NAL/ns-3-dev/scratch/Results/R0h0R1h0_EndToEnd.csv']
20755
20754


In [7]:
file_paths = glob.glob('{}/scratch/Results/*_Switch.csv'.format(__ns3_path))
switch_dfs = {}

for file_path in file_paths:
    df_name = file_path.split('/')[-1].split('_')[0]
    df = pd.read_csv(file_path)
    df = df[df['IsSent'] == 1]
    df = df.reset_index(drop=True)
    # df['SegmentDelay'] = (df['SentTime'] - df['ReceiveTime'])
    # keep the packets their sent time is after 1s
    df = df[df['ReceiveTime'] > 2000000000]
    df = df[df['ReceiveTime'] < 9000000000]
    # drop IsReceived, SourcePort, DestinationPort, SequenceNb, ReceiveTime, SentTime
    df = df.drop(columns=['IsSent'])
    print(len(df))
    switch_dfs[df_name] = df

switch_dfs[list(switch_dfs.keys())[0]].head()

41509
41508


Unnamed: 0,SourceIp,SourcePort,DestinationIp,DestinationPort,SequenceNb,PayloadSize,ReceiveTime,SentTime
3301,10.1.2.1,49153,10.2.2.1,50000,26536001,1000,8999938881,8999938881
3302,10.1.2.1,49153,10.2.2.1,50000,26535001,1000,8999770241,8999770241
3303,10.1.1.1,49153,10.2.1.1,50001,26536001,1000,8999601601,8999601601
3304,10.1.1.1,49153,10.2.1.1,50001,26535001,1000,8999432961,8999432961
3305,10.1.1.1,49153,10.2.1.1,50001,26534001,1000,8998927041,8998927041


# Intermediate links statistics

In [8]:
interLinks_statistics = {}
for flow in endToEnd_dfs.keys():
    interLinks_statistics[flow] = {}
    interLinks_statistics[flow][('source', 'T0')] = get_statistics(intermediateLink_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), endToEnd_dfs[flow], switch_dfs['T0']))
    interLinks_statistics[flow][('T0', 'T1')] = get_statistics(intermediateLink_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T0'], switch_dfs['T1']))
    interLinks_statistics[flow][('T1', 'dest')] = get_statistics(intermediateLink_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T1'], endToEnd_dfs[flow]))

interLinks_statistics

{'R0h1R1h1': {('source', 'T0'): {'DelayMean': 3173251.891891892,
   'DelayStd': 4665.689410293564,
   'sampleSize': 1036},
  ('T0', 'T1'): {'DelayMean': 3168640.0, 'DelayStd': 0.0, 'sampleSize': 1032},
  ('T1', 'dest'): {'DelayMean': 3168640.0,
   'DelayStd': 0.0,
   'sampleSize': 1032}},
 'R0h0R1h0': {('source', 'T0'): {'DelayMean': 3173018.378378378,
   'DelayStd': 4580.941708101073,
   'sampleSize': 1036},
  ('T0', 'T1'): {'DelayMean': 3168640.0, 'DelayStd': 0.0, 'sampleSize': 1032},
  ('T1', 'dest'): {'DelayMean': 3168640.0,
   'DelayStd': 0.0,
   'sampleSize': 1032}}}

# Switches statistics

In [9]:
switches_statistics = {}
for flow in endToEnd_dfs.keys():
    switches_statistics[flow] = {}
    switches_statistics[flow]['T0'] = get_statistics(switch_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T0']))
    switches_statistics[flow]['T1'] = get_statistics(switch_data(endToEnd_dfs[flow].drop(columns=['SentTime', 'ReceiveTime']), switch_dfs['T1']))

switches_statistics

{'R0h1R1h1': {'T0': {'DelayMean': 25232388.687258687,
   'DelayStd': 46791.722743545804,
   'sampleSize': 1036},
  'T1': {'DelayMean': 0.0, 'DelayStd': 0.0, 'sampleSize': 1032}},
 'R0h0R1h0': {'T0': {'DelayMean': 25235552.644787643,
   'DelayStd': 46265.73632645717,
   'sampleSize': 1036},
  'T1': {'DelayMean': 0.0, 'DelayStd': 0.0, 'sampleSize': 1032}}}

In [10]:
# free the switch_dfs
switch_dfs = None

# Groundtruth delay mean and std

In [11]:
# calculate the mean and std of thet delay of each flow
endToEnd_statistics = {}
for flow in endToEnd_dfs.keys():
    endToEnd_statistics[flow] = get_statistics(endToEnd_dfs[flow])

print(endToEnd_statistics)

{'R0h1R1h1': {'DelayMean': 34744379.94796435, 'DelayStd': 46381.0474268288, 'sampleSize': 20755}, 'R0h0R1h0': {'DelayMean': 34745363.116507664, 'DelayStd': 46400.54762969484, 'sampleSize': 20754}}


# End to End and Persegment Compatibility Check

In [12]:

check_all_delayConsistency(endToEnd_statistics, switches_statistics, interLinks_statistics, confidenceValue)

Flow: R0h1R1h1
 Result: True
Flow: R0h0R1h0
 Result: True
