## Setup

In [1]:
import datetime
from pathlib import Path

import networkx as nx
import numpy as np 
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd 
from plotly.subplots import make_subplots
from pyvis.network import Network
from scapy.all import rdpcap, IP, TCP, UDP, Raw
from convert_pcap_to_df import convert_pcap_to_df


%matplotlib inline

## Packet Capture File Overview

In [2]:
amazon_ring_cap = rdpcap("data/AmazonRing/ring.pcap")
pcap = amazon_ring_cap[IP]
pcap

<IP from ring.pcap: TCP:610 UDP:9266 ICMP:1 Other:0>

In [3]:
pcap_len_summary = {'All': len(amazon_ring_cap), 'IP': len(pcap[IP]), 'TCP': len(pcap[TCP]), 'UDP': len(pcap[UDP])}
pcap_len_summary['IP_Other'] = pcap_len_summary['IP']-(pcap_len_summary['TCP'] + pcap_len_summary['UDP'])
pcap_len_summary['Other'] = pcap_len_summary['All']-pcap_len_summary['IP']

In [4]:
tmp_df = pd.DataFrame(pcap_len_summary, index=["key"]).T.drop(['IP_Other']).sort_values(by='key', ascending=False)
fig = px.bar(tmp_df, y="key", text=tmp_df['key'])

fig.update_layout(
  height=200, 
  width=400, 
  yaxis_title="Number of packets",
  xaxis_title=None,
  )
fig.update_layout(margin=dict(t=10, b=10))

fig.write_image("assets/ring_overview.pdf")
fig.show()

## Build or load DataFrame

In [9]:
analysis_pkl_path = Path("data/AmazonRing/ring.pkl")
analysis_pcap_path = Path("data/AmazonRing/ring.pcap")

if analysis_pkl_path.is_file():
  df = pd.read_pickle(analysis_pkl_path.as_posix())
else:
  df = convert_pcap_to_df(pcap_path=analysis_pcap_path, pkl_path=analysis_pkl_path)

In [10]:
df.sample(3)

Unnamed: 0,time,payload,IP_version,IP_ihl,IP_tos,IP_len,IP_id,IP_flags,IP_frag,IP_ttl,...,TCP_urgptr,UDP_sport,UDP_dport,UDP_len,UDP_chksum,Raw_load,IP_int_arr_time,IP_Burst_ix,IP_Burst_length,IP_Burst_avg_size
3313,2021-04-07 17:56:28.075859456,Raw,4,5,0,1242,770,0,0,128,...,,51504,22070,1222,28501,1214,0 days 00:00:00.003899136,805,2,866.67
346,2021-04-07 17:55:54.629710080,Raw,4,5,0,1242,9426,0,0,128,...,,51504,22070,1222,27431,1214,0 days 00:00:00.003066368,26,3,946.75
5344,2021-04-07 17:56:51.491474944,Raw,4,5,0,200,1310,0,0,128,...,,51506,44464,180,2749,172,0 days 00:00:00.001625344,0,0,0.0


## Analysis

#### Network Graph with unique connections

In [11]:
# Get unique connections
connections = df[['IP_src', 'IP_dst']].groupby(['IP_src','IP_dst']).size().rename("count").reset_index()
connections = connections[connections['count'] != 0]

# Remove 0.0.0.0 and 255.255.255.255 connections
zero = np.logical_or(connections['IP_src'] == '0.0.0.0', connections['IP_dst'] == '0.0.0.0')
broad = np.logical_or(connections['IP_src'] == '255.255.255.255', connections['IP_dst'] == '255.255.255.255')
connections = connections[np.invert(np.logical_or(zero, broad))]
max_count = connections['count'].max()
connections

Unnamed: 0,IP_src,IP_dst,count
11,18.197.187.162,192.168.24.221,3417
20,192.168.24.1,192.168.24.221,12
27,192.168.24.221,18.197.187.162,5844
28,192.168.24.221,192.168.24.1,12
30,192.168.24.221,213.239.239.166,1
32,192.168.24.221,3.213.247.79,128
33,192.168.24.221,34.196.207.230,50
34,192.168.24.221,52.20.55.110,85
35,192.168.24.221,52.87.138.226,55
38,213.239.239.166,192.168.24.221,1


In [12]:
connections = connections.itertuples(index=False, name=None)

In [13]:
G = nx.DiGraph()
G.add_weighted_edges_from(connections)

In [14]:
net = Network(directed=True, notebook=True)
net.from_nx(G)


In [15]:
for edge in net.edges:
  # edge['width'] = width_from_weight(edge['weight'], max_count=max_count)
  edge['width'] = (edge['weight']/max_count)*5
  edge['label'] = edge['weight']

In [16]:
# net.show_buttons(filter_=['physics', 'nodes', 'edges'])
options = """
var options = {
  "nodes": {
    "font": {
      "background": "rgba(255,125,104,0.77)"
    }
  },
  "edges": {
    "color": {
      "inherit": true
    },
    "scaling": {
      "max": 100
    },
    "font": {
      "size": 9,
      "background": "rgba(255,255,255,0.90)"
    },
    "smooth": {
      "forceDirection": "none"
    }
  },
  "physics": {
    "minVelocity": 0.75,
    "solver": "repulsion"
  }
}
"""
net.set_options(options)
net.show('assets/network.html')

#### Network layer (IP)

##### Payload size per IP source address

In [17]:
source_addresses = df.groupby("IP_src")['Raw_load'].sum().to_frame(name = 'size').reset_index()
source_addresses = source_addresses.sort_values("size", ascending=False)
source_addresses['size'] = source_addresses['size'].apply(lambda x: x/2**20)
source_addresses

Unnamed: 0,IP_src,size
3,192.168.24.221,2.99655
1,18.197.187.162,0.5384254
6,34.196.207.230,0.01620579
5,3.213.247.79,0.01071835
7,52.20.55.110,0.009614944
8,52.87.138.226,0.006874084
2,192.168.24.1,1.144409e-05
0,0.0.0.0,4.768372e-06
4,213.239.239.166,9.536743e-07


In [18]:
fig = px.bar(
  source_addresses, 
  x="size", 
  y="IP_src", 
  # log_x=True,
  # color="size", 
  template="plotly", 
  title="Source addresses: summed payload sizes (Bytes sent)",
  orientation='h',
  labels={
          "IP_src": "IP address",
          "size": "Aggregated payload size (MB)",
        },
  )
fig.show()

##### Payload size per IP destination address

In [19]:
destination_addresses = df.groupby("IP_dst")['Raw_load'].sum().to_frame(name='size').reset_index()
destination_addresses = destination_addresses.sort_values("size", ascending=False)
destination_addresses['size'] = destination_addresses['size'].apply(lambda x: x/2**20)
destination_addresses

Unnamed: 0,IP_dst,size
0,18.197.187.162,2.908938
2,192.168.24.221,0.581851
5,3.213.247.79,0.03989506
8,52.87.138.226,0.03286171
6,34.196.207.230,0.01204777
7,52.20.55.110,0.002794266
1,192.168.24.1,1.144409e-05
4,255.255.255.255,4.768372e-06
3,213.239.239.166,9.536743e-07


In [20]:
fig = px.bar(
  destination_addresses, 
  x="size", 
  y="IP_dst", 
  # log_x=True,
  # color="size", 
  template="plotly", 
  title="Destination addresses: summed payload sizes (Bytes Received)",
  orientation='h',
  labels={
        "IP_dst": "IP address",
        "size": "Aggregated payload size (MB)",
      },
  )
fig.show()

##### Distribution of payload sizes over time

In [21]:
top_src_addr = source_addresses.iloc[0]['IP_src']
df_top_src_addr = df[df['IP_src'] == top_src_addr][['time', 'IP_src', 'IP_dst', 'Raw_load']]

In [22]:
scatter_plots = []
for address in df_top_src_addr['IP_dst'].unique():
  sub_df = df_top_src_addr[df_top_src_addr['IP_dst'] == address]
  scatter_plots.append(
    go.Scatter(x=sub_df['time'], y=sub_df['Raw_load'], name=address)
    )

In [23]:
layout = go.Layout(
    title=f"IP packet sizes sent from {top_src_addr} over time",
    xaxis_title="Time",
    yaxis_title="Payload size (Bytes)",
    legend_title_text='Destination addresses'
)

fig = go.Figure(
  data=scatter_plots,
  layout=layout,
  )

fig.update_layout(
  height=500, 
  width=1800, 
  )
fig.update_layout(margin=dict(t=10, b=10))

fig.write_image("assets/ring_top_src_sent.pdf")
fig.show()

#### Transport Layer

##### TCP Ports

In [24]:
source_ports = df.groupby("TCP_sport")['Raw_load'].sum().to_frame(name='size').reset_index()
source_ports = source_ports.sort_values("size", ascending=False)
source_ports['size'] = source_ports['size'].apply(lambda x: x/2**10)
source_ports

Unnamed: 0,TCP_sport,size
1,443,34.609375
3,51721,33.397461
2,51164,17.615234
5,54739,16.929688
12,9998,9.845703
11,64975,6.979492
10,61707,6.307617
7,57308,5.357422
0,15063,3.959961
4,51821,2.469727


In [25]:
fig = px.bar(
  source_ports, 
  x="size", 
  y="TCP_sport", 
  # log_x=True,
  # color="size", 
  template="plotly", 
  title="Source ports: summed payload sizes (Bytes sent)",
  orientation='h',
  labels={
          "TCP_sport": "TCP Port",
          "size": "Aggregated payload size (kB)",
        },
  )
fig.show()

In [26]:
destination_ports = df.groupby("TCP_dport")['Raw_load'].sum().to_frame(name='size').reset_index()
destination_ports = destination_ports.sort_values("size", ascending=False)
destination_ports['size'] = destination_ports['size'].apply(lambda x: x/2**10)
destination_ports

Unnamed: 0,TCP_dport,size
1,443,86.839844
7,57308,8.297852
11,64975,8.296875
8,57582,5.96875
4,51821,3.959961
6,55484,3.876953
3,51721,3.750977
2,51164,3.667969
5,54739,3.662109
10,61707,3.645508


In [27]:
fig = px.bar(
  destination_ports, 
  x="size", 
  y="TCP_dport", 
  # log_x=True,
  # color="size", 
  template="plotly", 
  title="Destination ports: summed payload sizes (Bytes Received)",
  orientation='h',
  labels={
        "TCP_dport": "TCP port",
        "size": "Aggregated payload size (kB)",
      },
  )
fig.show()

###### Distribution of TCP payload sizes over time

In [28]:
top_src_port = source_ports.iloc[0]['TCP_sport']
df_top_src_port = df[df['TCP_sport'] == top_src_port][['time', 'TCP_sport', 'TCP_dport', 'Raw_load']]

In [29]:
scatter_plots = []
for port in df_top_src_port['TCP_dport'].unique():
  sub_df = df_top_src_port[df_top_src_port['TCP_dport'] == port]
  scatter_plots.append(
    go.Scatter(x=sub_df['time'], y=sub_df['Raw_load'], name=port)
    )

In [30]:
layout = go.Layout(
    title=f"TCP packet sizes sent from {top_src_port} over time",
    xaxis_title="Time",
    yaxis_title="Payload size (Bytes)",
    legend_title_text='Destination ports'
)

fig = go.Figure(
  data=scatter_plots,
  layout=layout,
  )
fig.show()

##### UDP Ports

In [31]:
source_ports = df.groupby("UDP_sport")['Raw_load'].sum().to_frame(name='size').reset_index()
source_ports = source_ports.sort_values("size", ascending=False)
source_ports['size'] = source_ports['size'].apply(lambda x: x/2**10)
source_ports

Unnamed: 0,UDP_sport,size
3,51504,2431.865234
4,51506,544.417969
2,44464,543.351562
1,22070,4.035156
9,68,0.010742
5,52461,0.005859
6,53,0.005859
8,67,0.005859
0,123,0.000977
7,59939,0.000977


In [32]:
fig = px.bar(
  source_ports, 
  x="size", 
  y="UDP_sport", 
  # log_x=True,
  # color="size", 
  template="plotly", 
  title="Source ports: summed payload sizes (Bytes sent)",
  orientation='h',
  labels={
          "UDP_sport": "UDP Port",
          "size": "Aggregated payload size (kB)",
        },
  )
fig.show()

In [33]:
destination_ports = df.groupby("UDP_dport")['Raw_load'].sum().to_frame(name='size').reset_index()
destination_ports = destination_ports.sort_values("size", ascending=False)
destination_ports['size'] = destination_ports['size'].apply(lambda x: x/2**10)
destination_ports

Unnamed: 0,UDP_dport,size
1,22070,2431.865234
2,44464,544.417969
4,51506,543.351562
3,51504,4.035156
8,67,0.010742
5,52461,0.005859
6,53,0.005859
9,68,0.005859
0,123,0.000977
7,59939,0.000977


In [34]:
fig = px.bar(
  destination_ports, 
  x="size", 
  y="UDP_dport", 
  # log_x=True,
  # color="size", 
  template="plotly", 
  title="Destination ports: summed payload sizes (Bytes Received)",
  orientation='h',
  labels={
        "UDP_dport": "UDP port",
        "size": "Aggregated payload size (kB)",
      },
  )
fig.show()

###### Distribution of TCP payload sizes over time

In [35]:
top_src_port = source_ports.iloc[0]['UDP_sport']
df_top_src_port = df[df['UDP_sport'] == top_src_port][['time', 'UDP_sport', 'UDP_dport', 'Raw_load']]

In [36]:
scatter_plots = []
for port in df_top_src_port['UDP_dport'].unique():
  sub_df = df_top_src_port[df_top_src_port['UDP_dport'] == port]
  scatter_plots.append(
    go.Scatter(x=sub_df['time'], y=sub_df['Raw_load'], name=port)
    )

In [37]:
layout = go.Layout(
    title=f"UDP packet sizes sent from {top_src_port} over time",
    xaxis_title="Time",
    yaxis_title="Payload size (Bytes)",
    legend_title_text='Destination ports'
)

fig = go.Figure(
  data=scatter_plots,
  layout=layout,
  )
fig.show()

## Feature Engineering

### Inter-arrival time

In [38]:
df[['time', 'IP_src', 'IP_dst']]

Unnamed: 0,time,IP_src,IP_dst
0,2021-04-07 17:54:10.427264768,192.168.24.221,3.213.247.79
1,2021-04-07 17:54:10.522255104,192.168.24.221,3.213.247.79
2,2021-04-07 17:54:10.534473984,192.168.24.221,3.213.247.79
3,2021-04-07 17:54:10.631671552,192.168.24.221,3.213.247.79
4,2021-04-07 17:54:10.778831872,192.168.24.221,3.213.247.79
...,...,...,...
9872,2021-04-07 17:57:06.375312896,3.213.247.79,192.168.24.221
9873,2021-04-07 17:57:06.375767552,3.213.247.79,192.168.24.221
9874,2021-04-07 17:57:06.382616576,3.213.247.79,192.168.24.221
9875,2021-04-07 17:57:06.492804864,3.213.247.79,192.168.24.221


In [39]:
# Inter-arrival time
frames = []
for addr in (set(df['IP_src'].unique()) | set(df['IP_dst'].unique())):
  tmp_df = df[df["IP_dst"] == addr].sort_values("time", ascending=True)
  # Create series and calculate time until the next packet departs
  tmp_df['IP_int_arr_time'] = tmp_df['time'].diff()
  frames.append(tmp_df)
df = pd.concat(frames).sort_index()
df[[
  'time',
  'IP_src', 
  'IP_dst', 
  # 'IP_int_dpt_time', 
  'IP_int_arr_time']]


Unnamed: 0,time,IP_src,IP_dst,IP_int_arr_time
0,2021-04-07 17:54:10.427264768,192.168.24.221,3.213.247.79,NaT
1,2021-04-07 17:54:10.522255104,192.168.24.221,3.213.247.79,0 days 00:00:00.094990336
2,2021-04-07 17:54:10.534473984,192.168.24.221,3.213.247.79,0 days 00:00:00.012218880
3,2021-04-07 17:54:10.631671552,192.168.24.221,3.213.247.79,0 days 00:00:00.097197568
4,2021-04-07 17:54:10.778831872,192.168.24.221,3.213.247.79,0 days 00:00:00.147160320
...,...,...,...,...
9872,2021-04-07 17:57:06.375312896,3.213.247.79,192.168.24.221,0 days 00:00:00.000433152
9873,2021-04-07 17:57:06.375767552,3.213.247.79,192.168.24.221,0 days 00:00:00.000454656
9874,2021-04-07 17:57:06.382616576,3.213.247.79,192.168.24.221,0 days 00:00:00.006849024
9875,2021-04-07 17:57:06.492804864,3.213.247.79,192.168.24.221,0 days 00:00:00.110188288


In [40]:
# Stream analysis of top source address with corresponding destination
top_src_addr, top_dst_addr = df[df['IP_src'] == top_src_addr][['IP_src', 'IP_dst']].describe().loc['top']
top_src_addr, top_dst_addr 

('192.168.24.221', '18.197.187.162')

In [41]:
top_stream_df = df[df['IP_src'] == top_src_addr]
top_stream_df = top_stream_df[top_stream_df['IP_dst'] == top_dst_addr]
top_stream_df = top_stream_df[[
  'time', 
  'payload', 
  'IP_src', 
  'IP_dst', 
  'IP_int_arr_time', 
  # 'IP_int_dpt_time', 
  'Raw_load']]
top_stream_df

Unnamed: 0,time,payload,IP_src,IP_dst,IP_int_arr_time,Raw_load
264,2021-04-07 17:55:53.220411648,NoPayload,192.168.24.221,18.197.187.162,NaT,1
265,2021-04-07 17:55:53.236104192,NoPayload,192.168.24.221,18.197.187.162,0 days 00:00:00.015692544,1
266,2021-04-07 17:55:53.240877824,Raw,192.168.24.221,18.197.187.162,0 days 00:00:00.004773632,1100
267,2021-04-07 17:55:53.331727616,NoPayload,192.168.24.221,18.197.187.162,0 days 00:00:00.090849792,1
268,2021-04-07 17:55:53.340790016,Raw,192.168.24.221,18.197.187.162,0 days 00:00:00.009062400,623
...,...,...,...,...,...,...
6103,2021-04-07 17:56:58.444066048,Raw,192.168.24.221,18.197.187.162,0 days 00:00:00.007608320,470
6104,2021-04-07 17:56:58.458676224,Raw,192.168.24.221,18.197.187.162,0 days 00:00:00.014610176,172
6105,2021-04-07 17:56:58.463372800,Raw,192.168.24.221,18.197.187.162,0 days 00:00:00.004696576,397
6106,2021-04-07 17:56:58.583078144,NoPayload,192.168.24.221,18.197.187.162,0 days 00:00:00.119705344,1


In [42]:
layout = go.Layout(
    title=f"Packet sizes of most frequent communication partners from {top_src_addr} to {top_dst_addr} over time",
    xaxis_title="Time",
    yaxis_title="Payload size (Bytes)",
    # legend_title_text=''
)

fig = go.Figure(
  data=[go.Scatter(x=top_stream_df['time'], y=top_stream_df['Raw_load'])],
  layout=layout,
  )

fig.update_layout(
  height=500, 
  width=1800, 
  )
fig.update_layout(margin=dict(t=50, b=10))

fig.write_image("assets/ring_top_comm.pdf")

fig.show()

### Bursts

In [43]:
BURST_TIME_THRESHOLD = datetime.timedelta(seconds=0.3) # Only packets sent within this time interval will be considered for the current burst
BURST_SIZE_THRESHOLD = int(df['Raw_load'].mean() * 1.1) # Only packets that have at least this many bytes will be considered for the current burst

In [44]:
df.head(10)

Unnamed: 0,time,payload,IP_version,IP_ihl,IP_tos,IP_len,IP_id,IP_flags,IP_frag,IP_ttl,...,TCP_urgptr,UDP_sport,UDP_dport,UDP_len,UDP_chksum,Raw_load,IP_int_arr_time,IP_Burst_ix,IP_Burst_length,IP_Burst_avg_size
0,2021-04-07 17:54:10.427264768,NoPayload,4,5,0,48,37821,0,0,128,...,0,,,,,1,NaT,0,0,0.0
1,2021-04-07 17:54:10.522255104,NoPayload,4,5,0,40,38067,0,0,128,...,0,,,,,1,0 days 00:00:00.094990336,0,0,0.0
2,2021-04-07 17:54:10.534473984,Raw,4,5,0,172,38118,0,0,128,...,0,,,,,132,0 days 00:00:00.012218880,0,0,0.0
3,2021-04-07 17:54:10.631671552,NoPayload,4,5,0,40,38237,0,0,128,...,0,,,,,1,0 days 00:00:00.097197568,0,0,0.0
4,2021-04-07 17:54:10.778831872,NoPayload,4,5,0,40,38413,0,0,128,...,0,,,,,1,0 days 00:00:00.147160320,0,0,0.0
5,2021-04-07 17:54:11.431710464,Raw,4,5,0,115,38651,0,0,128,...,0,,,,,75,0 days 00:00:00.652878592,0,0,0.0
6,2021-04-07 17:54:11.432216576,Raw,4,5,0,46,38892,0,0,128,...,0,,,,,6,0 days 00:00:00.000506112,0,0,0.0
7,2021-04-07 17:54:11.432881664,Raw,4,5,0,85,39074,0,0,128,...,0,,,,,45,0 days 00:00:00.000665088,0,0,0.0
8,2021-04-07 17:54:11.527110400,NoPayload,4,5,0,40,39249,0,0,128,...,0,,,,,1,0 days 00:00:00.094228736,0,0,0.0
9,2021-04-07 17:54:11.530748928,Raw,4,5,0,355,39318,0,0,128,...,0,,,,,315,0 days 00:00:00.003638528,0,0,0.0


In [45]:
def add_burst_ixs_to_df(df):
  ixs = np.zeros((len(df),))
  burst_ix = 1
  i = 0
  
  while i < len(df) - 1:

    # Find next package that is big enough to be considered relevant
    j = i + 1
    while j < len(df) - 1 and df.iloc[j]['Raw_load'] < BURST_SIZE_THRESHOLD:
      j += 1

    if df.iloc[j]['time'] - df.iloc[i]['time'] < BURST_TIME_THRESHOLD:
      # Same burst for alle packets within time threshold
      ixs[i: j+1] = burst_ix
    else:
      ixs[i] = burst_ix
      burst_ix += 1
      # Discard all small payloads outside of time threshold
      for n in range(i+1, j):
        ixs[n] = burst_ix
        burst_ix += 1

    i = j
  
  df['IP_Burst_ix'] = ixs
  df['IP_Burst_ix'] = df['IP_Burst_ix'].astype('UInt32')
  return df
  
top_stream_df = add_burst_ixs_to_df(top_stream_df)

In [46]:
top_stream_df['IP_Burst_length'] = top_stream_df.groupby(['IP_Burst_ix'])['IP_Burst_ix'].transform('size')
top_stream_df['IP_Burst_length'] = top_stream_df['IP_Burst_length'] - 1 # length 1 should equal a burst with two packets
top_stream_df['IP_Burst_avg_size'] = top_stream_df.groupby(['IP_Burst_ix'])['Raw_load'].transform('mean')
top_stream_df['IP_Burst_avg_size'] = top_stream_df['IP_Burst_avg_size'].round(decimals=2)
top_stream_df.loc[top_stream_df['IP_Burst_length'] == 0, ['IP_Burst_ix', 'IP_Burst_length', 'IP_Burst_avg_size']] = 0
top_stream_df['IP_Burst_ix'] = top_stream_df['IP_Burst_ix'].astype('category')
top_stream_df['IP_Burst_ix'] = top_stream_df['IP_Burst_ix'].cat.codes


In [47]:
top_stream_df

Unnamed: 0,time,payload,IP_src,IP_dst,IP_int_arr_time,Raw_load,IP_Burst_ix,IP_Burst_length,IP_Burst_avg_size
264,2021-04-07 17:55:53.220411648,NoPayload,192.168.24.221,18.197.187.162,NaT,1,1,4,345.2
265,2021-04-07 17:55:53.236104192,NoPayload,192.168.24.221,18.197.187.162,0 days 00:00:00.015692544,1,1,4,345.2
266,2021-04-07 17:55:53.240877824,Raw,192.168.24.221,18.197.187.162,0 days 00:00:00.004773632,1100,1,4,345.2
267,2021-04-07 17:55:53.331727616,NoPayload,192.168.24.221,18.197.187.162,0 days 00:00:00.090849792,1,1,4,345.2
268,2021-04-07 17:55:53.340790016,Raw,192.168.24.221,18.197.187.162,0 days 00:00:00.009062400,623,1,4,345.2
...,...,...,...,...,...,...,...,...,...
6103,2021-04-07 17:56:58.444066048,Raw,192.168.24.221,18.197.187.162,0 days 00:00:00.007608320,470,13,724,655.58
6104,2021-04-07 17:56:58.458676224,Raw,192.168.24.221,18.197.187.162,0 days 00:00:00.014610176,172,13,724,655.58
6105,2021-04-07 17:56:58.463372800,Raw,192.168.24.221,18.197.187.162,0 days 00:00:00.004696576,397,13,724,655.58
6106,2021-04-07 17:56:58.583078144,NoPayload,192.168.24.221,18.197.187.162,0 days 00:00:00.119705344,1,13,724,655.58


In [48]:
layout = go.Layout(
    title=f"Packet sizes of most frequent communication partners from {top_src_addr} to {top_dst_addr} over time with overlay showing packet bursts",
    xaxis_title="Time",
    # yaxis_title="Payload size (Bytes)",
    # legend_title_text=''
    showlegend=False
)

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add payloads
fig.add_trace(
    go.Scatter(x=top_stream_df['time'], y=top_stream_df['Raw_load'], name="Payload"),
    secondary_y=False,
)

# Add burst id
colorscale = []
for i in np.linspace(0, 1, top_stream_df['IP_Burst_ix'].max()):
    colorscale.append([i, 'rgba(246, 226, 127, 0.75)'])
    colorscale.append([i,'rgba(246, 226, 127, 0.75)'])
colorscale[0][1] = 'rgba(255, 255, 255, 0)'
colorscale[1][1] = 'rgba(255, 255, 255, 0)'

colorbar=dict(
    tick0=0,
    dtick=1
)
fig.add_trace(
    go.Heatmap(x=top_stream_df['time'], y = np.ones((len(top_stream_df))), z =top_stream_df['IP_Burst_ix'], name='Burst', colorscale=colorscale, colorbar=colorbar, showscale=False),
    secondary_y=True,
)


# Set x-axis title
# fig.update_xaxes(title_text="xaxis title")

# Set y-axes titles
fig.update_yaxes(title_text="Payload size (Bytes)", secondary_y=False)
fig.update_yaxes(title_text="Burst", secondary_y=True, visible=False)

# Add figure title
fig.update_layout(
    layout
)

fig.update_layout(
  height=500, 
  width=1800, 
  )
fig.update_layout(margin=dict(t=50, b=10))

fig.write_image("assets/ring_top_comm_bursts.pdf")
fig.show()
