## Pandas Exercise
### Task: Use pandas to process and query the wireshark file you send in Chapter 6
#### 1) Load the Wireshark file that you used in Chapter 6 into a pandas DataFrame

In [1]:
import numpy as np 
import pandas as pd

# Load data from Wireshark
df = pd.read_csv("ws.csv")
df.index = df.index + 1
df.head()

Unnamed: 0,Unnamed: 1,Time,Source,Destination,Protocol,Length,Info
1,1,0.0,10.97.172.240,52.21.103.149,TCP,78,54831 > 443 [SYN] Seq=0 Win=65535 Len=0 MSS=...
2,2,0.082079,52.21.103.149,10.97.172.240,TCP,66,"443 > 54831 [SYN, ACK] Seq=0 Ack=1 Win=26883..."
3,3,0.082252,10.97.172.240,52.21.103.149,TCP,54,54831 > 443 [ACK] Seq=1 Ack=1 Win=262144 Len=0
4,4,0.083455,10.97.172.240,52.21.103.149,TLSv1.2,309,Client Hello
5,5,0.16465,52.21.103.149,10.97.172.240,TCP,54,443 > 54831 [ACK] Seq=1 Ack=256 Win=28160 Len=0


In [2]:
# Print the keys from the dictionaries
print(df.keys())

Index([' ', 'Time', 'Source', 'Destination', 'Protocol', 'Length', 'Info'], dtype='object')


#### 2) How many packets (rows) are there in the DataFrame


In [3]:
# Print the number of packet
num_packet = df.shape[0]
print('Number of packets: ' + str(num_packet))

Number of packets: 30922


#### 3) How many fields (columns) are there in the DataFrame ?

In [4]:
# Print the field (dictionary keys) in the DataFrame
print(df.keys())
num_field = df.shape[1]
print(num_field)

Index([' ', 'Time', 'Source', 'Destination', 'Protocol', 'Length', 'Info'], dtype='object')
7


#### 4) Remove the last column

In [5]:
# Remove the last column from DataFrame
df.drop('Info', axis=1, inplace=True)
df

Unnamed: 0,Unnamed: 1,Time,Source,Destination,Protocol,Length
1,1,0.000000,10.97.172.240,52.21.103.149,TCP,78
2,2,0.082079,52.21.103.149,10.97.172.240,TCP,66
3,3,0.082252,10.97.172.240,52.21.103.149,TCP,54
4,4,0.083455,10.97.172.240,52.21.103.149,TLSv1.2,309
5,5,0.164650,52.21.103.149,10.97.172.240,TCP,54
...,...,...,...,...,...,...
30918,30918,27.624493,10.97.172.240,157.240.1.18,TCP,54
30919,30919,27.632048,10.97.172.240,157.240.1.18,TLSv1.2,500
30920,30920,27.635130,157.240.1.18,10.97.172.240,TCP,54
30921,30921,27.635137,157.240.1.18,10.97.172.240,TLSv1.2,96


#### 5) Insert a row/column

In [6]:
new_row = df.loc[num_packet]
df.loc[num_packet+1,:] = new_row
new_col = df.loc[:, 'Time']*df.loc[:, 'Length']
name_col = 'Test'
df.insert(df.shape[1], name_col, new_col)
df

Unnamed: 0,Unnamed: 1,Time,Source,Destination,Protocol,Length,Test
1,1.0,0.000000,10.97.172.240,52.21.103.149,TCP,78.0,0.000000
2,2.0,0.082079,52.21.103.149,10.97.172.240,TCP,66.0,5.417214
3,3.0,0.082252,10.97.172.240,52.21.103.149,TCP,54.0,4.441608
4,4.0,0.083455,10.97.172.240,52.21.103.149,TLSv1.2,309.0,25.787595
5,5.0,0.164650,52.21.103.149,10.97.172.240,TCP,54.0,8.891100
...,...,...,...,...,...,...,...
30919,30919.0,27.632048,10.97.172.240,157.240.1.18,TLSv1.2,500.0,13816.024000
30920,30920.0,27.635130,157.240.1.18,10.97.172.240,TCP,54.0,1492.297020
30921,30921.0,27.635137,157.240.1.18,10.97.172.240,TLSv1.2,96.0,2652.973152
30922,30922.0,27.635210,10.97.172.240,157.240.1.18,TCP,54.0,1492.301340


#### 6) What is the maximum length of a packet ?

In [7]:
# Determine the maximum length of packet
df['Length'].max()

1434.0

#### 7) What is the mean length of TCP packets ?

In [8]:
# Average length of TCP packets
df['Length'].mean()

769.1684183293988

#### 8) Repeat the exercise 6.2 with pandas

In [9]:
import os

os.chdir("..")
module_path = os.path.abspath(os.getcwd())
print(module_path)
 
# import module_path
from ImportClass import ClassPacket

# Pipeline for filtering TCP flow
PacketObj = ClassPacket('ws.csv')
packetDict = PacketObj.openFile()
packetFlow = PacketObj.printFlow(packetDict)
tcpFlow = PacketObj.sameFlow(packetFlow)
tcpByte = PacketObj.newDict(tcpFlow)
print(tcpByte)

# Convert to pandas Dataframe
tcpDF = pd.DataFrame([[keys[0], keys[1], keys[2], keys[3], values] for keys, values in tcpByte.items()]) \
        .rename(columns={0:'IP source', 1:'IP destination', 2:'TCP source', 3:'TCP destination', 4:'Byte length'})
print(tcpDF)

D:\UCL\4th Year\Software for Network and Services Design\Lab
{('10.97.172.240', '52.21.103.149', 54831, 443): 510, ('52.21.103.149', '10.97.172.240', 443, 54831): 1608, ('10.97.172.240', '162.125.18.133', 53359, 443): 162, ('10.97.172.240', '162.125.18.133', 54122, 443): 162, ('10.97.172.240', '162.125.18.133', 54834, 443): 132, ('10.97.172.240', '162.125.18.133', 54835, 443): 132, ('10.97.172.240', '162.125.18.133', 54836, 443): 348, ('10.97.172.240', '162.125.18.133', 54837, 443): 360, ('162.125.18.133', '10.97.172.240', 443, 53359): 54, ('162.125.18.133', '10.97.172.240', 443, 54122): 54, ('162.125.18.133', '10.97.172.240', 443, 54834): 66, ('162.125.18.133', '10.97.172.240', 443, 54835): 66, ('162.125.18.133', '10.97.172.240', 443, 54836): 1662, ('162.125.18.133', '10.97.172.240', 443, 54837): 2375, ('10.97.172.240', '216.58.206.142', 54777, 443): 108, ('216.58.206.142', '10.97.172.240', 443, 54777): 108, ('10.97.172.240', '216.58.206.129', 54838, 443): 2226, ('216.58.206.129', '10