# ARDDoS Data analysis for TFTP, SNMP and SSDP test traffic

In [25]:
# Generic
import os
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import numpy as np
import math
import re
import ipaddress
import kaleido

# For showing progress bar of for loops
from progressbar import Bar, ETA, \
    AdaptiveETA, Percentage, \
    ProgressBar 
widgets = [Percentage(),
            ' ', Bar(),
            ' ', ETA(),
            ' ', AdaptiveETA()]
pbar = ProgressBar(widgets=widgets)

# Own functions
from pcapreader import PcapReader
pcapToDf = PcapReader.pcapToDf

# For converting string to ip address in dataframe
from cyberpandas import to_ipaddress 
# For pretty printing dataframes
from tabulate import tabulate 
# For plotting
import plotly.express as px

# Functions

In [26]:
def refineLatexTableStringScientificPaper(latexTableString):
    # Hacks for multiindex dataframe
    multiColFirst = latexTableString.find("\multicolumn")
    latexTableString = latexTableString[:multiColFirst+16] + 'c' + latexTableString[multiColFirst+17:]
    startRemove = latexTableString.find('\multicolumn',multiColFirst+1)
    latexTableString = latexTableString[:startRemove] + latexTableString[startRemove+18:]
    # Convert Level and Amplification factor to multirow:
    latexTableString = latexTableString[:latexTableString.find('Level')] + '\\multirow{2}{*}{Level}' +  latexTableString[latexTableString.find('Level')+len('Level'):]
    latexTableString = latexTableString[:latexTableString.find('Amplification factor')] + '\\multirow{2}{*}{Amplification Factor}' +  latexTableString[latexTableString.find('Amplification factor')+len('Amplification factor'):]
    # Adding a horizontal line for multindex table
    latexTableString = latexTableString[:latexTableString.find('\\\\')+len('\\\\')] + '\cline{2-5}' + latexTableString[latexTableString.find('\\\\')+len('\\\\'):]
    # Add "tab" to label

    # Find caption and label and put at the bottom
    captionIndex = latexTableString.find("\caption")
    labelIndex = latexTableString.find('\label')
    labelStart = latexTableString.find('{',labelIndex)
    # Put 'tab' in label for figure
    latexTableString = latexTableString[:labelStart+1] + 'tab:' + latexTableString[labelStart+1:]
    captionEndIndex = latexTableString.find("}",labelIndex+1)+1
    endTableIndex = latexTableString.find("\end{tabular}")
    endTableEndIndex = endTableIndex + len("\end{tabular}")
    latexTableString = latexTableString[:captionIndex] + latexTableString[captionEndIndex:endTableEndIndex] + latexTableString[captionIndex:captionEndIndex] + latexTableString[endTableEndIndex:]
    return latexTableString

In [27]:
## Get All pcap filenames:
def getFiles(baseDir):
    victimFilenames = []
    attackerFilenames = []
    reflectorFilenames = []
    for root, dirs, files in os.walk(baseDir):
        for file in files:
            if file.endswith(".pcapng"):
                if "victim" in file:
                    victimFilenames.append(os.path.join(root, file))
                if "attacker" in file:
                    attackerFilenames.append(os.path.join(root, file))
                if "reflector" in file:
                    reflectorFilenames.append(os.path.join(root,file))
    return attackerFilenames,victimFilenames,reflectorFilenames

In [28]:
## Get attacker bytes sent
def getAttackBytes(attackerFilenames,Protocol,useCachedBytes):
    print("Getting Attacker Bytes for " + Protocol + " from pcap files...")
    picklePath = os.path.dirname(os.path.dirname(reflectorFilenames[0])) + '/AttackerBytes_' + Protocol + '_' + '.pkl'
    if not useCachedBytes:
        pbar = ProgressBar(widgets=widgets)
        attackerBytes = []
        for attackFile in pbar(attackerFilenames):
            # Get attack level
            level = int(re.search(r'\d+',attackFile).group(0)) # Find first number in string = level
            attackerDf = pcapToDf(attackFile,True)
            if Protocol == "TFTP" or Protocol == "SSDP" or Protocol == "SNMP":
            # By finding the TFTP packets and summing the byte lengths we get the total number of bytes send by the attacker
                attackerDf = attackerDf.loc[attackerDf['Protocol'].isin([Protocol])]
            attackerBytesSent = attackerDf["Length"].sum()
            attackerBytes.append({'Level':level,'Attacker Outbound':attackerBytesSent})
        attackerBytes = pd.DataFrame(attackerBytes).sort_values('Level')
        # Save to pickle file for fast reloading
        attackerBytes.to_pickle(picklePath)
    else:
        if os.path.exists(picklePath):
            attackerBytes = pd.read_pickle(picklePath)
        else:
            raise Exception("Error no cached pickle file. Run the function with useCachedBytes=False to recalcuate the df, and create the pickle file '" + os.path.basename(picklePath) + "'" )
 
    return attackerBytes

In [29]:
## Get Victim bytes received
def getVictimBytes(victimFilenames,Protocol,useCachedBytes=False):
    print("Getting Victim Bytes for " + Protocol + " from pcap files...")
    picklePath = os.path.dirname(os.path.dirname(reflectorFilenames[0])) + '/VictimBytes' + Protocol + '_' + '.pkl'
    if not useCachedBytes:
        pbar = ProgressBar(widgets=widgets)
        victimBytes = []
        for victimFile in pbar(victimFilenames):
            # Get attack level
            level = int(re.search(r'\d+',victimFile).group(0)) # Find first number in string = level
            victimDf = pcapToDf(victimFile,True)
            if Protocol == "TFTP":
                # In the victim pcap filtering by destination port 50040 (the tftp servers source port) gives the tftp data transfered to the victim
                victimBytesReceived = victimDf.loc[(victimDf['UDP Destination Port'] == 50040)]["Length"].sum()
            elif Protocol == "SNMP":
                victimBytesReceived = victimDf.loc[victimDf['Protocol'].isin([Protocol])]["Length"].sum()
            elif Protocol == "SSDP":
                victimBytesReceived = victimDf.loc[(victimDf['Protocol'] == Protocol) & (victimDf['Destination'] != ipaddress.ip_address('239.255.255.250'))]["Length"].sum()
            victimBytes.append({'Level':level,'Victim Inbound':victimBytesReceived}) 
        victimBytes = pd.DataFrame(victimBytes).sort_values('Level')
        # Save to pickle file for fast reloading
        victimBytes.to_pickle(picklePath)
    else:
        if os.path.exists(picklePath):
            victimBytes = pd.read_pickle(picklePath)
        else:
            raise Exception("Error no cached pickle file. Run the function with useCachedBytes=False to recalcuate the df, and create the pickle file '" + os.path.basename(picklePath) + "'" )
    return victimBytes

In [30]:
## Get Victim bytes received
def getVictimBytes(victimFilenames,Protocol,useCachedBytes=False):
    print("Getting Victim Bytes for " + Protocol + " from pcap files...")
    picklePath = os.path.dirname(os.path.dirname(reflectorFilenames[0])) + '/VictimBytes' + Protocol + '_' + '.pkl'
    if not useCachedBytes:
        pbar = ProgressBar(widgets=widgets)
        victimBytes = []
        for victimFile in pbar(victimFilenames):
            # Get attack level
            level = int(re.search(r'\d+',victimFile).group(0)) # Find first number in string = level
            victimDf = pcapToDf(victimFile,True)
            if Protocol == "TFTP":
                # In the victim pcap filtering by destination port 50040 (the tftp servers source port) gives the tftp data transfered to the victim
                victimBytesReceived = victimDf.loc[(victimDf['UDP Destination Port'] == 50040)]["Length"].sum()
            elif Protocol == "SNMP":
                victimBytesReceived = victimDf.loc[victimDf['Protocol'].isin([Protocol])]["Length"].sum()
            elif Protocol == "SSDP":
                victimBytesReceived = victimDf.loc[(victimDf['Protocol'] == Protocol) & (victimDf['Destination'] != ipaddress.ip_address('239.255.255.250'))]["Length"].sum()
            victimBytes.append({'Level':level,'Victim Inbound':victimBytesReceived}) 
        victimBytes = pd.DataFrame(victimBytes).sort_values('Level')
        # Save to pickle file for fast reloading
        victimBytes.to_pickle(picklePath)
    else:
        if os.path.exists(picklePath):
            victimBytes = pd.read_pickle(picklePath)
        else:
            raise Exception("Error no cached pickle file. Run the function with useCachedBytes=False to recalcuate the df, and create the pickle file '" + os.path.basename(picklePath) + "'" )
    return victimBytes

In [31]:
## Get Victim bytes received
def getVictimBytes(victimFilenames,Protocol,useCachedBytes=False):
    print("Getting Victim Bytes for " + Protocol + " from pcap files...")
    picklePath = os.path.dirname(os.path.dirname(reflectorFilenames[0])) + '/VictimBytes' + Protocol + '_' + '.pkl'
    if not useCachedBytes:
        pbar = ProgressBar(widgets=widgets)
        victimBytes = []
        for victimFile in pbar(victimFilenames):
            # Get attack level
            level = int(re.search(r'\d+',victimFile).group(0)) # Find first number in string = level
            victimDf = pcapToDf(victimFile,True)
            if Protocol == "TFTP":
                # In the victim pcap filtering by destination port 50040 (the tftp servers source port) gives the tftp data transfered to the victim
                victimBytesReceived = victimDf.loc[(victimDf['UDP Destination Port'] == 50040)]["Length"].sum()
            elif Protocol == "SNMP":
                victimBytesReceived = victimDf.loc[victimDf['Protocol'].isin([Protocol])]["Length"].sum()
            elif Protocol == "SSDP":
                victimBytesReceived = victimDf.loc[(victimDf['Protocol'] == Protocol) & (victimDf['Destination'] != ipaddress.ip_address('239.255.255.250'))]["Length"].sum()
            victimBytes.append({'Level':level,'Victim Inbound':victimBytesReceived}) 
        victimBytes = pd.DataFrame(victimBytes).sort_values('Level')
        # Save to pickle file for fast reloading
        victimBytes.to_pickle(picklePath)
    else:
        if os.path.exists(picklePath):
            victimBytes = pd.read_pickle(picklePath)
        else:
            raise Exception("Error no cached pickle file. Run the function with useCachedBytes=False to recalcuate the df, and create the pickle file '" + os.path.basename(picklePath) + "'" )
    return victimBytes

In [32]:
## Get Reflector bytes received and sent
def getReflectorBytes(reflectorFilenames,Protocol,useCachedBytes=False):
    print("Getting Reflector Bytes for " + Protocol + " from pcap files...")
    picklePath = os.path.dirname(os.path.dirname(reflectorFilenames[0])) + '/ReflectorBytes_' + Protocol + '_' + '.pkl'
    if not useCachedBytes:
        pbar = ProgressBar(widgets=widgets)
        reflectorBytes = []
        for reflectorFile in pbar(reflectorFilenames):
            # Get attack level
            level = int(re.search(r'\d+',reflectorFile).group(0)) # Find first number in string = level
            reflectorDf = pcapToDf(reflectorFile,True)
            if Protocol == "TFTP":
                # In the victim pcap filtering by destination port 50040 (the tftp servers source port) gives the tftp data transfered to the victim
                reflectorBytesReceived = reflectorDf.loc[(reflectorDf['UDP Source Port'] == 50040)]["Length"].sum()
                reflectorBytesSent = reflectorDf.loc[(reflectorDf['UDP Destination Port'] == 50040)]["Length"].sum()
            elif Protocol == "SNMP":
                reflectorBytesReceived = reflectorDf.loc[(reflectorDf['Protocol'] == Protocol) & (reflectorDf['Info'].str.contains("getBulkRequest"))]["Length"].sum()
                reflectorBytesSent = reflectorDf.loc[(reflectorDf['Protocol'] == Protocol) & (reflectorDf['Info'].str.contains("get-response"))]["Length"].sum()
            elif Protocol == "SSDP":
                reflectorBytesReceived = reflectorDf.loc[(reflectorDf['Protocol'] == Protocol) & (reflectorDf['Destination'] != ipaddress.ip_address('239.255.255.250')) & (reflectorDf["Info"].str.contains("M-SEARCH"))]["Length"].sum()
                reflectorBytesSent = reflectorDf.loc[(reflectorDf['Protocol'] == Protocol) & (reflectorDf['Destination'] != ipaddress.ip_address('239.255.255.250')) & (reflectorDf["UDP Destination Port"] == 50040)]["Length"].sum()
            reflectorBytes.append({'Level':level,'Reflector Inbound':reflectorBytesReceived,'Reflector Outbound':reflectorBytesSent})
        reflectorBytes = pd.DataFrame(reflectorBytes).sort_values('Level')
        # Save to pickle file for fast reloading
        reflectorBytes.to_pickle(picklePath)
    else:
        if os.path.exists(picklePath):
            reflectorBytes = pd.read_pickle(picklePath)
        else:
            raise Exception("Error no cached pickle file. Run the function with useCachedBytes=False to recalcuate the df, and create the pickle file '" + os.path.basename(picklePath) + "'" )
    return reflectorBytes

In [33]:
## Data manipulation for presentation
def getStatDf(attackerBytes,victimBytes,reflectorBytes):
    StatDf = pd.DataFrame(columns=pd.MultiIndex.from_tuples([("Victim", "Inbound"), ("Reflector", "Inbound"), ("Reflector", "Outbound"), ("Attacker", "Outbound")]))
    StatDf["Level"] = victimBytes['Level']
    StatDf[("Victim","Inbound")] = victimBytes['Victim Inbound']
    StatDf[("Reflector","Inbound")] = reflectorBytes['Reflector Inbound']
    StatDf[("Reflector","Outbound")] = reflectorBytes['Reflector Outbound']
    StatDf[("Attacker","Outbound")] = attackerBytes['Attacker Outbound']
    StatDf['Amplification factor'] = StatDf[("Victim","Inbound")] / StatDf[("Attacker","Outbound")]
    column_to_move = StatDf.pop("Level")
    StatDf.insert(0, "Level", column_to_move)
    return StatDf

In [34]:
## Plotting
def PlotLevelAttackData(attackerBytes,victimBytes,reflectorBytes,Protocol):
    # Get single index dataframe
    df1 = pd.merge(attackerBytes,victimBytes,on="Level")
    df2 = pd.merge(df1,reflectorBytes,on='Level')
    # Remove 'level' string in level column
    df2["Level"] = df2['Level']
    # Get maximum value in dataframe rounded up to highest exponent (i.e. up to nearest 100M for example)
    maxN = df2.select_dtypes(include=[np.number]).max().max()
    c = 10 ** int(math.log10(maxN)) # Same number of digits as max number
    yaxisRange = [10,math.ceil(maxN/c) * c]

    # Plot data
    fig = px.line(df2, 
                title=Protocol + " Test Traffic",
                x='Level',
                y=['Attacker Outbound','Victim Inbound', 'Reflector Inbound','Reflector Outbound'],
                log_y=True,
                range_y=yaxisRange,
                markers=True)
    fig.update_layout(
        yaxis=dict(
            tickfont=dict(size=16,color="black"),
            title_text="Bytes",
            titlefont=dict(size=24,color="black")),
        xaxis=dict(
            tickfont=dict(size=16,color="black"),
            title_text="Level",
            titlefont=dict(size=24,color="black")),
        title={'font': {'size': 36,'color':"black"}},
        legend = dict(font = dict(size = 20, color = "black",)),
        legend_title = dict(text="Legend",font = dict(size = 24,color="black"))
        )
    fig.update_traces(line=dict(width=3),marker=dict(size=10))
    fig.show()
    fig.write_image(Protocol + " Test Traffic.pdf", width=1920/2, height=1080/2,scale=-2,engine='kaleido')

# TFTP DATA OWN TOOL

In [35]:
# Get pcap files
attackerFilenames,victimFilenames,reflectorFilenames = getFiles('./pcap_tftp_own_tool')
assert len(attackerFilenames) == len(victimFilenames) == len(reflectorFilenames)
Protocol = "TFTP"
useCachedBytes = True

## Reading the data

In [36]:
# Read pcap 
attackerBytes = getAttackBytes(attackerFilenames,Protocol=Protocol,useCachedBytes=useCachedBytes)
victimBytes = getVictimBytes(victimFilenames,Protocol=Protocol,useCachedBytes=useCachedBytes)
reflectorBytes = getReflectorBytes(reflectorFilenames,Protocol=Protocol,useCachedBytes=useCachedBytes)

Getting Attacker Bytes for TFTP from pcap files...
Getting Victim Bytes for TFTP from pcap files...
Getting Reflector Bytes for TFTP from pcap files...


## Displaying the data in latex

In [37]:
#print(tabulate(StatDf,headers='keys',tablefmt='fancy_grid',showindex=False))
StatDf = getStatDf(attackerBytes,victimBytes,reflectorBytes)
latexTableString = StatDf.to_latex(column_format='ccrrrc',index=False,caption="TFTP DDoS test traffic",label="TFTP_Test_Traffic",position='H') # For getting the table into the report
latexTableString = refineLatexTableStringScientificPaper(latexTableString)
print(latexTableString)

\begin{table}[H]
\centering

\begin{tabular}{ccrrrc}
\toprule
\multirow{2}{*}{Level} &  Victim & \multicolumn{2}{c}{Reflector} &  Attacker & \multirow{2}{*}{Amplification Factor} \\\cline{2-5}
      & Inbound &   Inbound & Outbound & {Outbound} \\
\midrule
    0 &   51540 &       900 &    51540 &       900 &            57.266667 \\
    1 &  515400 &      9000 &   515400 &      9000 &            57.266667 \\
    2 & 1823388 &     87600 &  1823388 &     87600 &            20.814932 \\
    3 & 2489146 &    715882 &  2489146 &    715800 &             3.477432 \\
    4 & 3998028 &   4201042 &  3998028 &   4200960 &             0.951694 \\
    5 & 3769330 &  10570740 &  3769330 &  10570740 &             0.356581 \\
    6 & 3181882 &  41926080 &  3082598 &  42887280 &             0.074192 \\
    7 & 3324972 &  87821842 &  3087582 & 113922480 &             0.029186 \\
    8 & 3164170 &  86906002 &  2908100 & 118607400 &             0.026678 \\
    9 & 3061506 &  88363740 &  2885362 & 117188340

## Plotting

In [38]:
PlotLevelAttackData(attackerBytes,victimBytes,reflectorBytes,Protocol)

# SNMP DATA OWN TOOL

In [39]:
# Get pcap files
attackerFilenames,victimFilenames,reflectorFilenames = getFiles('./pcap_snmp_own_tool')
assert len(attackerFilenames) == len(victimFilenames) == len(reflectorFilenames)
Protocol = "SNMP"
useCachedBytes = True

## Reading the data

In [40]:
# Read pcap 
attackerBytes = getAttackBytes(attackerFilenames,Protocol=Protocol,useCachedBytes=useCachedBytes)
victimBytes = getVictimBytes(victimFilenames,Protocol=Protocol,useCachedBytes=useCachedBytes)
reflectorBytes = getReflectorBytes(reflectorFilenames,Protocol=Protocol,useCachedBytes=useCachedBytes)

Getting Attacker Bytes for SNMP from pcap files...
Getting Victim Bytes for SNMP from pcap files...
Getting Reflector Bytes for SNMP from pcap files...


## Displaying the data in latex

In [41]:
#print(tabulate(StatDf,headers='keys',tablefmt='fancy_grid',showindex=False))
StatDf = getStatDf(attackerBytes,victimBytes,reflectorBytes)
latexTableString = StatDf.to_latex(column_format='ccrrrc',index=False,caption="SNMP DDoS test traffic",label="SNMP_Test_Traffic",position='H') # For getting the table into the report
latexTableString = refineLatexTableStringScientificPaper(latexTableString)
print(latexTableString)

\begin{table}[H]
\centering

\begin{tabular}{ccrrrc}
\toprule
\multirow{2}{*}{Level} &   Victim & \multicolumn{2}{c}{Reflector} &  Attacker & \multirow{2}{*}{Amplification Factor} \\\cline{2-5}
      &  Inbound &   Inbound & Outbound & {Outbound} \\
\midrule
    0 &    21885 &      1140 &    21885 &      1140 &            19.197368 \\
    1 &   218850 &     11400 &   218850 &     11400 &            19.197368 \\
    2 &  2103878 &    109592 &  2103878 &    109592 &            19.197368 \\
    3 & 16054836 &    943388 & 16054836 &    943388 &            17.018275 \\
    4 & 15679873 &   6193392 & 15679873 &   6193392 &             2.531710 \\
    5 & 15285943 &  13757292 & 15285943 &  13757292 &             1.111116 \\
    6 & 14941619 &  61406936 & 14425133 &  63786952 &             0.234243 \\
    7 & 14597295 & 127481032 & 12757496 & 178241888 &             0.081896 \\
    8 & 14784047 & 129665424 & 13094525 & 177508260 &             0.083287 \\
    9 & 14862833 & 119897980 & 12318337

## Plotting

In [42]:
PlotLevelAttackData(attackerBytes,victimBytes,reflectorBytes,Protocol)

# SSDP DATA OWN TOOL

In [19]:
# Get pcap files
attackerFilenames,victimFilenames,reflectorFilenames = getFiles('./pcap_ssdp_own_tool')
assert len(attackerFilenames) == len(victimFilenames) == len(reflectorFilenames)
Protocol = "SSDP"
useCachedBytes = True

## Reading the data

In [20]:
# Read pcap 
attackerBytes = getAttackBytes(attackerFilenames,Protocol=Protocol,useCachedBytes=useCachedBytes)
victimBytes = getVictimBytes(victimFilenames,Protocol=Protocol,useCachedBytes=useCachedBytes)
reflectorBytes = getReflectorBytes(reflectorFilenames,Protocol=Protocol,useCachedBytes=useCachedBytes)

Getting Attacker Bytes for SSDP from pcap files...
Getting Victim Bytes for SSDP from pcap files...
Getting Reflector Bytes for SSDP from pcap files...


## Displaying the data in latex

In [23]:
#print(tabulate(StatDf,headers='keys',tablefmt='fancy_grid',showindex=False))
StatDf = getStatDf(attackerBytes,victimBytes,reflectorBytes)
latexTableString = StatDf.to_latex(column_format='ccrrrc',index=False,caption="SSDP DDoS test traffic",label="SSDP_Test_Traffic",position='H') # For getting the table into the report
latexTableString = refineLatexTableStringScientificPaper(latexTableString)
print(latexTableString)

\begin{table}[H]
\centering

\begin{tabular}{ccrrrc}
\toprule
\multirow{2}{*}{Level} &    Victim & \multicolumn{2}{c}{Reflector} &  Attacker & \multirow{2}{*}{Amplification Factor} \\\cline{2-5}
      &   Inbound &   Inbound & Outbound & {Outbound} \\
\midrule
    0 &      4305 &      2040 &     4305 &      2040 &             2.110294 \\
    1 &     43050 &     20400 &    43050 &     20400 &             2.110294 \\
    2 &    419307 &    198696 &   419307 &    198696 &             2.110294 \\
    3 &   3401237 &   1611736 &  3401237 &   1611736 &             2.110294 \\
    4 &  21642670 &  10255760 & 21642670 &  10255760 &             2.110294 \\
    5 &  50067724 &  22827056 & 48068195 &  23774432 &             2.105948 \\
    6 & 101404275 &  80468344 & 85919764 &  95273848 &             1.064345 \\
    7 & 104109537 & 118981640 & 53743907 & 266397072 &             0.390806 \\
    8 & 106204924 & 131557152 & 54328526 & 321798712 &             0.330035 \\
    9 & 105126378 & 14481484

## Plotting

In [24]:
PlotLevelAttackData(attackerBytes,victimBytes,reflectorBytes,Protocol)