# ARDDoS Data analysis for TFTP, SNMP and SSDP test traffic

In [None]:
# Generic
import os
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import numpy as np
import math

# For showing progress bar of for loops
from progressbar import Bar, ETA, \
    AdaptiveETA, Percentage, \
    ProgressBar 
widgets = [Percentage(),
            ' ', Bar(),
            ' ', ETA(),
            ' ', AdaptiveETA()]
pbar = ProgressBar(widgets=widgets)

# Own functions
from pcapreader import PcapReader
pcapToDf = PcapReader.pcapToDf

# For converting string to ip address in dataframe
from cyberpandas import to_ipaddress 
# For pretty printing dataframes
from tabulate import tabulate 
# For plotting
import plotly.express as px

# Functions

In [None]:
def refineLatexTableStringScientificPaper(latexTableString):
    # Hacks for multiindex dataframe
    multiColFirst = latexTableString.find("\multicolumn")
    latexTableString = latexTableString[:multiColFirst+16] + 'c' + latexTableString[multiColFirst+17:]
    startRemove = latexTableString.find('\multicolumn',multiColFirst+1)
    latexTableString = latexTableString[:startRemove] + latexTableString[startRemove+18:]
    # Convert Level and Amplification factor to multirow:
    latexTableString = latexTableString[:latexTableString.find('Level')] + '\\multirow{2}{*}{Level}' +  latexTableString[latexTableString.find('Level')+len('Level'):]
    latexTableString = latexTableString[:latexTableString.find('Amplification factor')] + '\\multirow{2}{*}{Amplification Factor}' +  latexTableString[latexTableString.find('Amplification factor')+len('Amplification factor'):]
    # Adding a horizontal line for multindex table
    latexTableString = latexTableString[:latexTableString.find('\\\\')+len('\\\\')] + '\cline{2-5}' + latexTableString[latexTableString.find('\\\\')+len('\\\\'):]
    # Find caption and put at the bottom
    captionIndex = latexTableString.find("\caption")
    captionEndIndex = latexTableString.find("}",captionIndex+1)+1
    endTableIndex = latexTableString.find("\end{tabular}")
    endTableEndIndex = endTableIndex + len("\end{tabular}")
    latexTableString[:endTableIndex] + latexTableString[endTableEndIndex:]
    latexTableString = latexTableString[:endTableEndIndex] + latexTableString[captionIndex:captionEndIndex] + latexTableString[endTableEndIndex:]
    latexTableString = latexTableString[:captionIndex] + latexTableString[captionEndIndex:]
    return latexTableString

In [None]:
## Get All pcap filenames:
def getFiles(baseDir):
    victimFilenames = []
    attackerFilenames = []
    reflectorFilenames = []
    for root, dirs, files in os.walk(baseDir):
        for file in files:
            if file.endswith(".pcapng"):
                if "victim" in file:
                    victimFilenames.append(os.path.join(root, file))
                if "attacker" in file:
                    attackerFilenames.append(os.path.join(root, file))
                if "reflector" in file:
                    reflectorFilenames.append(os.path.join(root,file))
    return attackerFilenames,victimFilenames,reflectorFilenames

In [None]:
## Get attacker bytes sent
def getAttackBytes(attackerFilenames,Protocol,useCachedBytes):
    print("Getting Attacker Bytes for " + Protocol + " from pcap files...")
    picklePath = os.path.dirname(os.path.dirname(reflectorFilenames[0])) + '/AttackerBytes_' + Protocol + '_' + '.pkl'
    if not useCachedBytes:
        pbar = ProgressBar(widgets=widgets)
        attackerBytes = []
        for attackFile in pbar(attackerFilenames):
            # Get attack level
            start = attackFile.find('level')
            end = start+len('level')+1
            level = attackFile[start:end]
            attackerDf = pcapToDf(attackFile,True)
            if Protocol == "TFTP" or Protocol == "SSDP" or Protocol == "SNMP":
            # By finding the TFTP packets and summing the byte lengths we get the total number of bytes send by the attacker
                attackerDf = attackerDf.loc[attackerDf['Protocol'].isin([Protocol])]
            attackerBytesSent = attackerDf["Length"].sum()
            attackerBytes.append({'Level':level,'Attacker Outbound':attackerBytesSent})
        attackerBytes = pd.DataFrame(attackerBytes).sort_values('Level')
        # Save to pickle file for fast reloading
        attackerBytes.to_pickle(picklePath)
    else:
        if os.path.exists(picklePath):
            reflectorBytes = pd.read_pickle(picklePath)
        else:
            raise Exception("Error no cached pickle file. Run the function with useCachedBytes=False to recalcuate the df, and create the pickle file '" + os.path.basename(picklePath) + "'" )
 
    return attackerBytes

In [None]:
## Get Victim bytes received
def getVictimBytes(victimFilenames,Protocol,useCachedBytes=False):
    print("Getting Victim Bytes for " + Protocol + " from pcap files...")
    picklePath = os.path.dirname(os.path.dirname(reflectorFilenames[0])) + '/VictimBytes' + Protocol + '_' + '.pkl'
    if not useCachedBytes:
        pbar = ProgressBar(widgets=widgets)
        victimBytes = []
        for victimFile in pbar(victimFilenames):
            # Get attack level
            start = victimFile.find('level')
            end = start+len('level')+1
            level = victimFile[start:end]
            victimDf = pcapToDf(victimFile,True)
            if Protocol == "TFTP":
                # In the victim pcap filtering by destination port 50040 (the tftp servers source port) gives the tftp data transfered to the victim
                victimBytesReceived = victimDf.loc[(victimDf['UDP Destination Port'] == 50040)]["Length"].sum()
            elif Protocol == "SNMP":
                victimBytesReceived = victimDf.loc[victimDf['Protocol'].isin([Protocol])]["Length"].sum()
            elif Protocol == "SSDP":
                victimBytesReceived = victimDf.loc[(victimDf['Protocol'] == Protocol) & (victimDf['Destination'] != to_ipaddress('239.255.255.250'))]["Length"].sum()
            victimBytes.append({'Level':level,'Victim Inbound':victimBytesReceived}) 
        victimBytes = pd.DataFrame(victimBytes).sort_values('Level')
        # Save to pickle file for fast reloading
        victimBytes.to_pickle(picklePath)
    else:
        if os.path.exists(picklePath):
            reflectorBytes = pd.read_pickle(picklePath)
        else:
            raise Exception("Error no cached pickle file. Run the function with useCachedBytes=False to recalcuate the df, and create the pickle file '" + os.path.basename(picklePath) + "'" )
    return victimBytes

In [None]:
## Get Reflector bytes received and sent
def getReflectorBytes(reflectorFilenames,Protocol,useCachedBytes=False):
    print("Getting Reflector Bytes for " + Protocol + " from pcap files...")
    picklePath = os.path.dirname(os.path.dirname(reflectorFilenames[0])) + '/ReflectorBytes_' + Protocol + '_' + '.pkl'
    if not useCachedBytes:
        pbar = ProgressBar(widgets=widgets)
        reflectorBytes = []
        for reflectorFile in pbar(reflectorFilenames):
            # Get attack level
            start = reflectorFile.find('level')
            end = start+len('level')+1
            level = reflectorFile[start:end]
            reflectorDf = pcapToDf(reflectorFile,True)
            if Protocol == "TFTP":
                # In the victim pcap filtering by destination port 50040 (the tftp servers source port) gives the tftp data transfered to the victim
                reflectorBytesReceived = reflectorDf.loc[(reflectorDf['UDP Source Port'] == 50040)]["Length"].sum()
                reflectorBytesSent = reflectorDf.loc[(reflectorDf['UDP Destination Port'] == 50040)]["Length"].sum()
            elif Protocol == "SNMP":
                reflectorBytesReceived = reflectorDf.loc[(reflectorDf['Protocol'] == Protocol) & (reflectorDf['Info'].str.contains("getBulkRequest"))]["Length"].sum()
                reflectorBytesSent = reflectorDf.loc[(reflectorDf['Protocol'] == Protocol) & (reflectorDf['Info'].str.contains("get-response"))]["Length"].sum()
            elif Protocol == "SSDP":
                reflectorBytesReceived = reflectorDf.loc[(reflectorDf['Protocol'] == Protocol) & (reflectorDf['Destination'] != to_ipaddress('239.255.255.250')) & (reflectorDf["Info"].str.contains("M-SEARCH"))]["Length"].sum()
                reflectorBytesSent = reflectorDf.loc[(reflectorDf['Protocol'] == Protocol) & (reflectorDf['Destination'] != to_ipaddress('239.255.255.250')) & (reflectorDf["Info"].str.contains("HTTP"))]["Length"].sum()
            reflectorBytes.append({'Level':level,'Reflector Inbound':reflectorBytesReceived,'Reflector Outbound':reflectorBytesSent})
        reflectorBytes = pd.DataFrame(reflectorBytes).sort_values('Level')
        # Save to pickle file for fast reloading
        reflectorBytes.to_pickle(picklePath)
    else:
        if os.path.exists(picklePath):
            reflectorBytes = pd.read_pickle(picklePath)
        else:
            raise Exception("Error no cached pickle file. Run the function with useCachedBytes=False to recalcuate the df, and create the pickle file '" + os.path.basename(picklePath) + "'" )
    return reflectorBytes

In [None]:
## Data manipulation for presentation
def getStatDf(attackerBytes,victimBytes,reflectorBytes):
    StatDf = pd.DataFrame(columns=pd.MultiIndex.from_tuples([("Victim", "Inbound"), ("Reflector", "Inbound"), ("Reflector", "Outbound"), ("Attacker", "Outbound")]))
    StatDf["Level"] = victimBytes['Level'].apply(lambda x: x.strip('level'))
    StatDf[("Victim","Inbound")] = victimBytes['Victim Inbound']
    StatDf[("Reflector","Inbound")] = reflectorBytes['Reflector Inbound']
    StatDf[("Reflector","Outbound")] = reflectorBytes['Reflector Outbound']
    StatDf[("Attacker","Outbound")] = attackerBytes['Attacker Outbound']
    StatDf['Amplification factor'] = StatDf[("Victim","Inbound")] / StatDf[("Attacker","Outbound")]
    column_to_move = StatDf.pop("Level")
    StatDf.insert(0, "Level", column_to_move)
    return StatDf

# TFTP DATA OWN TOOL

In [None]:
# Get pcap files
attackerFilenames,victimFilenames,reflectorFilenames = getFiles('./pcap_tftp_own_tool')
Protocol = "TFTP"
useCachedBytes = False

## Reading the data

In [None]:
# Read pcap 
attackerBytes = getAttackBytes(attackerFilenames,Protocol=Protocol,useCachedBytes=useCachedBytes)
victimBytes = getVictimBytes(victimFilenames,Protocol=Protocol,useCachedBytes=useCachedBytes)
reflectorBytes = getReflectorBytes(reflectorFilenames,Protocol=Protocol,useCachedBytes=useCachedBytes)

## Displaying the data in latex

In [None]:
#print(tabulate(StatDf,headers='keys',tablefmt='fancy_grid',showindex=False))
StatDf = getStatDf(attackerBytes,victimBytes,reflectorBytes)
latexTableString = StatDf.to_latex(column_format='ccrrrc',index=False,caption="TFTP DDoS test traffic",label="TFTP_Test_Traffic",position='H') # For getting the table into the report
latexTableString = refineLatexTableStringScientificPaper(latexTableString)
print(latexTableString)

## Plotting

In [None]:
# Get single index dataframe
df1 = pd.merge(attackerBytes,victimBytes,on="Level")
df2 = pd.merge(df1,reflectorBytes,on='Level')
# Remove 'level' string in level column
df2["Level"] = df2['Level'].apply(lambda x: x.strip('level'))
# Get maximum value in dataframe rounded up to highest exponent (i.e. up to nearest 100M for example)
maxN = df2.select_dtypes(include=[np.number]).max().max()
c = 10 ** int(math.log10(maxN)) # Same number of digits as max number
yaxisRange = [10,math.ceil(maxN/c) * c]

# Plot data
fig = px.line(df2, 
              title=Protocol + " Test Traffic",
              x='Level',
              y=['Attacker Outbound','Victim Inbound', 'Reflector Inbound','Reflector Outbound'],
              log_y=True,
              range_y=yaxisRange,
              markers=True)
fig.show()

# SNMP DATA OWN TOOL

In [None]:
# Get pcap files
attackerFilenames,victimFilenames,reflectorFilenames = getFiles('./pcap_snmp_own_tool')

## Reading the data

In [None]:
# Read pcap files
attackerBytes = getAttackBytes(attackerFilenames,Protocol="SNMP")
victimBytes = getAttackBytes(victimFilenames)
reflectorBytes = getAttackBytes(reflectorFilenames,Protocol="SNMP")

## Displaying the data in latex

In [None]:
#print(tabulate(StatDf,headers='keys',tablefmt='fancy_grid',showindex=False))
StatDf = getStatDf(attackerBytes,victimBytes,reflectorBytes)
latexTableString = StatDf.to_latex(column_format='ccrrrc',index=False,caption="SNMP DDoS test traffic",label="SNMP_Test_Traffic",position='H') # For getting the table into the report
latexTableString = refineLatexTableStringScientificPaper(latexTableString)
print(latexTableString)

# SSDP DATA OWN TOOL

In [None]:
# Get pcap files
attackerFilenames,victimFilenames,reflectorFilenames = getFiles('./pcap_ssdp_own_tool')

## Reading the data

In [None]:
# Read pcap files
attackerBytes = getAttackBytes(attackerFilenames,Protocol="SSDP")
victimBytes = getAttackBytes(victimFilenames)
reflectorBytes = getAttackBytes(reflectorFilenames,Protocol="SSDP")

## Displaying the data in latex

In [None]:
#print(tabulate(StatDf,headers='keys',tablefmt='fancy_grid',showindex=False))
StatDf = getStatDf(attackerBytes,victimBytes,reflectorBytes)
latexTableString = StatDf.to_latex(column_format='ccrrrc',index=False,caption="SSDP DDoS test traffic",label="SNMP_Test_Traffic",position='H') # For getting the table into the report
latexTableString = refineLatexTableStringScientificPaper(latexTableString)
print(latexTableString)