# We start by importing the relevant libraries

In [204]:
from scapy.all import *
import numpy as np
import graphviz
from graphviz import Source

## We load the pcap file and split it into Packets and Frames:

In [205]:
all_traffic = rdpcap("./pcap/network-traffic.pcap")
packet_list = []		# to store layer 3 traffic
frame_list = []			# to store layer 2 traffic
for packet in all_traffic:
	if (IP in packet):
		packet_list.append(packet)	
	else:
		frame_list.append(packet)

## We filter the frames, we are interested in source, destination and size

In [206]:
filtered_frames = []
for frame in frame_list:
	filtered_frames.append([frame['Ether'].src + ',' + frame['Ether'].dst, len(frame)])
matrix_ff = np.array(filtered_frames)		# matrix of filtered frames

## We filter the packets, we are interested in only source, destination and size

In [207]:
filtered_packets = []
for packet in packet_list:
	filtered_packets.append([packet['IP'].src + ',' + packet['IP'].dst, len(packet)])
matrix_fp = np.array(filtered_packets)		# matrix of filtered packets

## Identify the conversation (communication between end points)

In [208]:
ethernet_conversation = list(set(matrix_ff[:,0]))
size_ether = [0] * len(ethernet_conversation)
matrix_ethernet_conv = np.array([ethernet_conversation, size_ether])
matrix_ethernet_conv = np.transpose(matrix_ethernet_conv)

In [209]:
conversation_list = list(set(matrix_fp[:,0]))
size_list = [0] * len(conversation_list)
matrix_conversations = np.array([conversation_list, size_list])		# size_list gets casted to string =(
matrix_conversations = np.transpose(matrix_conversations)			# change first column to the IP addresses and second column to size

### Calculation of the conversation size

In [210]:
for i in range(len(matrix_ethernet_conv)):
	conversation_size = 0
	for j in range(len(matrix_ff[:,0])):
		if (matrix_ethernet_conv[i,0] == matrix_ff[j,0]):
			conversation_size += int(matrix_ff[j,1])
	matrix_ethernet_conv[i,1] = conversation_size

In [211]:
for i in range(len(matrix_conversations)):
	conversation_size = 0
	for j in range(len(matrix_fp[:,0])):
		if (matrix_conversations[i,0] == matrix_fp[j,0]):
			conversation_size += int(matrix_fp[j,1])
	matrix_conversations[i,1] = conversation_size

## We create three vectors for Source, Destination and ConversationSize

In [212]:
src_macs = []
dst_macs = []
con_macs = []
for i in range (len(matrix_ethernet_conv)):
	macs = matrix_conversations[i,0].split(",")
	src_macs.append(ips[0])
	dst_macs.append(ips[1])
	con_macs.append(matrix_conversations[i,1])

In [213]:
src_ips = []
dst_ips = []
con_siz = []
for i in range (len(matrix_conversations)):
	ips = matrix_conversations[i,0].split(",")
	src_ips.append(ips[0])
	dst_ips.append(ips[1])
	con_siz.append(matrix_conversations[i,1])

In [214]:
src_macs_copy = copy.deepcopy(src_macs)
dst_macs_copy = copy.deepcopy(dst_macs)

In [215]:
src_ips_copy = copy.deepcopy(src_ips)
dst_ips_copy = copy.deepcopy(dst_ips)

## We create a dictionary of the conversation with prefix PC<number>

In [216]:
mac_values = src_macs + dst_macs
mac_values = list(set(mac_values))

In [217]:
ip_values = src_ips + dst_ips
ip_values = list(set(ip_values))

In [218]:
pc_labels2 = []
for i in range (1, len(mac_values) + 1):
	prefix = "PC" + str(i)
	pc_labels2.append(prefix)
mac_dict = dict(zip(mac_values, pc_labels2))

In [219]:
pc_labels = []
for i in range (1, len(ip_values) + 1):
	prefix = "PC" + str(i)
	pc_labels.append(prefix)
conversation_dict = dict(zip(ip_values, pc_labels))

## We substitute IP values for their respective key

In [220]:
for item in src_macs:
	index = src_macs.index(item)
	src_macs[index] = mac_dict.get(item)
for item in dst_macs:
	index = dst_macs.index(item)
	dst_macs[index] = mac_dict.get(item)

In [221]:
for item in src_ips:
	index = src_ips.index(item)
	src_ips[index] = conversation_dict.get(item)
for item in dst_ips:
	index = dst_ips.index(item)
	dst_ips[index] = conversation_dict.get(item)

## We create the Digraph of IPv4 Conversations and Ethernet Conversations

In [222]:
dotether = graphviz.Digraph(comment='Ethernet Conversations')
dotether.engine = "twopi" #doctest: +ELLIPSIS
dotether #doctest: +ELLIPSIS
dotether.graph_attr['ranksep']='2'		#separation between center (twopi engine)
dotether.graph_attr['nodesep']='0.5'		#separation between arrows

In [223]:
dotipv4 = graphviz.Digraph(comment='IPv4 Conversations')
dotipv4.engine = "twopi" #doctest: +ELLIPSIS
dotipv4 #doctest: +ELLIPSIS
dotipv4.graph_attr['ranksep']='2'		#separation between center (twopi engine)
dotipv4.graph_attr['nodesep']='0.5'		#separation between arrows

### We first create the Nodes

In [224]:
for key, value in mac_dict.items():
	dotether.node(key, value)

In [225]:
for key, value in conversation_dict.items():
	dotipv4.node(key, value)

### We now add the edges

In [226]:
con_macs

['659', '1454', '2631', '15182']

In [227]:
con_siz_int2

[659, 1454, 2631, 15182]

In [228]:
B2

15182

In [229]:
a2

0.5008607037113544

In [230]:
b2

0.000757419265991875

In [231]:
mapped_con_size2

[1.0, 1.6021483164635406, 2.4936307925359773, 12.0]

#### First we create a linear map of the conversation size

In [232]:
con_siz_int2 = list(map(int, con_macs))
A2 = min(con_siz_int2)
B2 = max(con_siz_int2)
P2 = 1  # thinest edge
Q2 = 12  # thickest edge
b2 = (Q2 - P2) / (B2 - A2)
a2 = P2 - b2 * A2
mapped_con_size2 = [a2 + b2 * x for x in con_siz_int2]
str_map_con_size2 = list(map(str, mapped_con_size2))

In [233]:
con_siz_int = list(map(int, con_siz))
A = min(con_siz_int)
B = max(con_siz_int)
P = 1  # thinest edge
Q = 12  # thickest edge
b = (Q - P) / (B - A)
a = P - b * A
mapped_con_size = [a + b * x for x in con_siz_int]
str_map_con_size = list(map(str, mapped_con_size))

In [234]:
readable_size = []
for item in con_siz_int:
	units_size = 1
	counter = 0
	while (item / (1000 * units_size) > 1):
		units_size = units_size * 1000
		counter += 1
	output = item / units_size
	if (counter == 0):
		prefix = "B"
	elif (counter == 1):
		prefix = "KB"
	elif (counter == 2):
		prefix = "MB"
	elif (counter == 3):
		prefix = "GB"
	elif (counter == 4):
		prefix = "TB"
	if (counter == 0):
		read = str(int(output)) + " " + prefix
	else:
		read = "{:.2f}".format(output) + " " + prefix
	readable_size.append(read)

In [235]:
for i in range(len(src_macs_copy)):
	dotether.edge(src_macs_copy[i], dst_macs_copy[i], penwidth=str_map_con_size2[i])

In [236]:
for i in range(len(src_ips_copy)):
	dotipv4.edge(src_ips_copy[i], dst_ips_copy[i], penwidth=str_map_con_size[i])

### Rendering the image of the ipv4 conversation:

In [237]:
dotether.format = 'svg'
dotether.render('test-output/conversation-ether.gv')

'test-output\\conversation-ether.gv.svg'

In [238]:

dotipv4.format = 'svg'
dotipv4.render('test-output/conversation-ipv4.gv')

'test-output\\conversation-ipv4.gv.svg'

In [None]:
'test-output/conversation-ether.gv.svg'

In [239]:
'test-output/conversation-ipv4.gv.svg'

'test-output/conversation-ipv4.gv.svg'

## We make an create a dictionary of ocurrences (relevant for analysis of frequency)

In [240]:
key_occurrences = []
value_ocurrences = []
for key, value in conversation_dict.items():
	counter = 0
	key_occurrences.append(value)
	for item in src_ips_copy:
		counter = counter + 1 if (key == item) else counter
	value_ocurrences.append(counter)
occurrences_dict = dict(zip(key_occurrences, value_ocurrences))