In [None]:
EXPORT_FIGURES = False # Whether to export svg figures (True) or plot them (False)
DOWNLOAD_FIGURES = False # Whether to download the SVG figures as zip file

# Loading Data

In [None]:
# Download data from git repo
!wget -P data/ https://raw.githubusercontent.com/Cinbarker/CESE4060_Wireshark/main/processed_data/coffee_company.feather
!wget -P data/ https://raw.githubusercontent.com/Cinbarker/CESE4060_Wireshark/main/processed_data/anne_and_max.feather
!wget -P data/ https://raw.githubusercontent.com/Cinbarker/CESE4060_Wireshark/main/processed_data/ikea.feather
!pip install -U kaleido

In [None]:
import pandas as pd
import plotly.express as px
import statsmodels
import statsmodels.api as sm

# Load dataframes
cc_df = pd.read_feather("data/coffee_company.feather")
am_df = pd.read_feather("data/anne_and_max.feather")
ik_df = pd.read_feather("data/ikea.feather")

# Convert timestamps
cc_df["Sniff_Timestamp"] = pd.to_datetime(cc_df["Sniff_Timestamp"])
am_df["Sniff_Timestamp"] = pd.to_datetime(am_df["Sniff_Timestamp"])
ik_df["Sniff_Timestamp"] = pd.to_datetime(ik_df["Sniff_Timestamp"])

# Remove outliers in from MacBook's Wi-Fi card
am_df = am_df[(am_df["Antenna_Signal_1"] != 0) & (am_df["Antenna_Noise"] != 0)]
cc_df = cc_df[(cc_df["Antenna_Signal_1"] != 0) & (cc_df["Antenna_Noise"] != 0)]
ik_df = ik_df[(ik_df["Antenna_Signal_1"] != 0) & (ik_df["Antenna_Noise"] != 0)]

# BSSIDs for each capture
bssids_cc = ["78:8a:20:d7:6c:53"]
bssids_am = ['d0:21:f9:bf:91:05', 'd2:21:f9:cf:91:05', 'd2:21:f9:af:91:05']
bssids_ik = ["f0:1d:2d:63:e2:2b", "f0:1d:2d:63:e2:29", "00:27:e3:7d:cb:81", "f0:1d:2d:64:03:4b", "f0:1d:2d:64:03:49", "f0:1d:2d:63:f7:c9", "f0:1d:2d:63:f7:cb", "00:27:e3:82:00:cb", "00:27:e3:81:cc:2b", "00:27:e3:82:00:c9", "00:27:e3:81:d1:a1", "00:27:e3:86:3e:0b", "f0:1d:2d:65:d2:6b", "f0:1d:2d:65:d2:69", "f0:1d:2d:65:e6:49", "f0:1d:2d:65:e6:4b", "00:27:e3:81:cc:21", "00:27:e3:7d:b7:81", "f0:1d:2d:63:c4:a9", "f0:1d:2d:66:f2:0b", "f0:1d:2d:66:f2:09", "f0:1d:2d:63:c4:ab", "f0:1d:2d:63:c4:ab", "00:27:e3:81:ce:61", "00:27:e3:90:62:a1", "00:27:e3:90:64:c9", "f0:1d:2d:65:d2:61", "00:27:e3:7d:c9:a1", "00:27:e3:7d:c9:ab", "00:27:e3:81:99:cb", "f0:1d:2d:65:dc:c1", "00:27:e3:81:99:c9", "f0:1d:2d:63:cb:a9", "00:27:e3:90:64:cb", "f0:1d:2d:63:cb:ab", "f0:1d:2d:65:ce:81", "00:27:e3:82:01:61", "f0:1d:2d:64:02:e1", "f0:1d:2d:66:dc:a1", "f0:1d:2d:63:f7:c1", "f0:1d:2d:63:e2:21", "00:27:e3:82:00:c1", "f0:1d:2d:65:e6:41", "f0:1d:2d:63:c4:a1", "00:27:e3:86:3e:01", "f0:1d:2d:64:03:41", "f0:1d:2d:63:cc:81", "00:27:e3:81:fd:6b", "00:27:e3:81:d5:a1", "f0:1d:2d:63:cb:a1", "f0:1d:2d:65:ea:a1", "00:27:e3:81:c3:a1", "00:27:e3:81:fd:61", "f0:1d:2d:63:f1:01", "f0:1d:2d:65:e9:81", "f0:1d:2d:66:f2:01", "f0:1d:2d:63:cd:e1", "00:27:e3:7d:cc:81", "f0:1d:2d:65:e0:c1", "00:27:e3:81:d1:ab", "f0:1d:2d:63:fe:e1", "00:27:e3:7d:cb:8b", "f0:1d:2d:63:ff:41"]

# Mapping to readable labels
labels = {
    "Antenna_Signal_1": "Antenna Signal [dBm]",
    "Antenna_Noise": "Antenna Noise [dBm]",
    "Unique_Devices": "Number of Devices",
    "Sniff_Timestamp": "Time",
    "Interval": "Time Interval",
    "DataRate": "Data Rate [Mb/s]",
    "FC_Retry": "FC Retry",
    "SNR": "SNR [dBm]",
    "Avg_Noise": "Average Noise [dBm]",
    "Avg_Quality": "Average Quality",
    "Avg_SNR": "SNR",
    "Number_Retries": "Number of Retries",
    "Total_Packets": "Total Packets",
    "Retry_Rate": "Retry Rate"
}

In [None]:
# Helper function to either show figures or save them as SVGs
def save_or_show(fig, name: str):
  if EXPORT_FIGURES:
    !mkdir -p figures
    fig.update_layout(
      margin=dict(l=0,r=0,b=0,t=0),
      paper_bgcolor='rgba(0,0,0,0)',
      plot_bgcolor='rgba(0,0,0,0)'
    )
    fig.write_image("figures/" + name + ".svg")
    print("Saved to figures/" + name + ".svg")
  else:
    fig.update_layout(title=name)
    fig.show()

# Processing Data and Generating Figures of Results

## Number of individual devices over time

In [None]:
# Plot unique data addresses as histogramm in 15 min intervals
# 1. Exclude the AP's own BSSIDs from the unique devices
# 2. Floor timestamps to 5-minute intervals
# 3. Count the unique transmit addresses in each 5-minute interval
# 4. Plot the number of unique transmit addresses in 5-minute intervals

# For cc_df
device_counts = (
    cc_df.loc[~cc_df['Trans_Addr'].isin(bssids_cc)]
    .assign(Interval=lambda df: df['Sniff_Timestamp'].dt.floor('5min'))
    .groupby('Interval')['Trans_Addr'].nunique().reset_index(name='Unique_Devices')
)
fig = px.bar(device_counts, x="Interval", y="Unique_Devices", labels=labels)
fig.update_layout(showlegend=False)
save_or_show(fig, "unique_devices_over_time_cc")


# For am_df
device_counts = (
    am_df.loc[~am_df['Trans_Addr'].isin(bssids_am)]
    .assign(Interval=lambda df: df['Sniff_Timestamp'].dt.floor('5min'))
    .groupby('Interval')['Trans_Addr'].nunique().reset_index(name='Unique_Devices')
)
fig = px.bar(device_counts, x="Interval", y="Unique_Devices", labels=labels)
fig.update_layout(showlegend=False)
save_or_show(fig, "unique_devices_over_time_am")

# For ik_df
device_counts = (
    ik_df.loc[~ik_df['Trans_Addr'].isin(bssids_ik)]
    .assign(Interval=lambda df: df['Sniff_Timestamp'].dt.floor('5min'))
    .groupby('Interval')['Trans_Addr'].nunique().reset_index(name='Unique_Devices')
)
fig = px.bar(device_counts, x="Interval", y="Unique_Devices", labels=labels)
fig.update_layout(showlegend=False)
save_or_show(fig, "unique_devices_over_time_ik")

## Antenna signal over time

In [None]:
fig = px.scatter(cc_df, x="Sniff_Timestamp", y="Antenna_Signal_1", labels=labels)
save_or_show(fig, "antenna_signal_vs_time_cc")
fig = px.scatter(am_df, x="Sniff_Timestamp", y="Antenna_Signal_1", labels=labels)
save_or_show(fig, "antenna_signal_vs_time_am")
fig = px.scatter(ik_df, x="Sniff_Timestamp", y="Antenna_Signal_1", labels=labels)
save_or_show(fig, "antenna_signal_vs_time_ik")

## Antenna noise over time

In [None]:
fig = px.scatter(cc_df, x="Sniff_Timestamp", y="Antenna_Noise", labels=labels)
save_or_show(fig, "antenna_noise_vs_time_cc")

fig = px.scatter(am_df, x="Sniff_Timestamp", y="Antenna_Noise", labels=labels)
save_or_show(fig, "antenna_noise_vs_time_am")

fig = px.scatter(ik_df, x="Sniff_Timestamp", y="Antenna_Noise", labels=labels)
save_or_show(fig, "antenna_noise_vs_time_ik")

## Antenna noise vs signal

In [None]:
fig = px.scatter(cc_df, x="Antenna_Noise", y="Antenna_Signal_1", labels=labels)
save_or_show(fig, "antenna_noise_vs_signal_cc")

fig = px.scatter(am_df, x="Antenna_Noise", y="Antenna_Signal_1", labels=labels)
save_or_show(fig, "antenna_noise_vs_signal_am")

fig = px.scatter(ik_df, x="Antenna_Noise", y="Antenna_Signal_1", labels=labels)
save_or_show(fig, "antenna_noise_vs_signal_ik")

## Antenna Noise and Antenna Signal over Time

In [None]:
legend_layout = dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,    # slightly above the plot area
    xanchor="right",
    x=1
)

fig = px.line(cc_df, x="Sniff_Timestamp",
              y=["Antenna_Signal_1", "Antenna_Noise"],
              labels=labels)
fig.update_layout(
    yaxis_title="Power [dBm]",
    legend=legend_layout
)
save_or_show(fig, "antenna_signal&noise_vs_time_cc")

fig = px.line(am_df, x="Sniff_Timestamp",
              y=["Antenna_Signal_1", "Antenna_Noise"],
              labels=labels)
fig.update_layout(
    yaxis_title="Power [dBm]",
    legend=legend_layout
)
save_or_show(fig, "antenna_signal&noise_vs_time_am")

fig = px.line(ik_df, x="Sniff_Timestamp",
              y=["Antenna_Signal_1", "Antenna_Noise"],
              labels=labels)
fig.update_layout(
    yaxis_title="Power [dBm]",
    legend=legend_layout
)
save_or_show(fig, "antenna_signal&noise_vs_time_ik")

## SNR over time

In [None]:
# print average snr value
average_snr_cc = cc_df["SNR"].mean()
average_snr_am = am_df["SNR"].mean()
average_snr_ik = ik_df["SNR"].mean()
print("cc snr:", average_snr_cc)
print("am snr:", average_snr_am)
print("ik snr:", average_snr_ik)

#figure of SNR over time
fig = px.line(cc_df, x="Sniff_Timestamp", y="SNR", labels=labels)
save_or_show(fig, "snr_over_time_cc")
fig = px.line(am_df, x="Sniff_Timestamp", y="SNR", labels=labels)
save_or_show(fig, "snr_over_time_am")
fig = px.line(ik_df, x="Sniff_Timestamp", y="SNR", labels=labels)
save_or_show(fig, "snr_over_time_ik")

## SNR over time

In [None]:
fig = px.scatter(cc_df, x="Sniff_Timestamp", y="SNR", labels=labels)
save_or_show(fig, "snr_over_time_cc")

fig = px.scatter(am_df, x="Sniff_Timestamp", y="SNR", labels=labels)
save_or_show(fig, "snr_over_time_am")

fig = px.scatter(ik_df, x="Sniff_Timestamp", y="SNR", labels=labels)
save_or_show(fig, "snr_over_time_ik")

## SNR vs Retransmission Rate

In [None]:
# Ratio retransmissions devided be total number of transmissions
# Scatter plot for SNR vs NumberOFRetransmissions/TotalNumberOfPackets
# 1. Compute Retry Rate per SNR: Sum of FC_Retry / Total Transmissions at that SNR
# 2. Compute actual Retry Rate

# For cc
retry_rate_per_snr = cc_df.groupby("SNR").agg(
    Retry_Rate=("FC_Retry", "sum"),  # Sum of bad FC_Retry at that SNR
    Total_Transmissions=("FC_Retry", "count")  # Count of all transmissions at that SNR
).reset_index()
retry_rate_per_snr["Retry_Rate"] = retry_rate_per_snr["Retry_Rate"] / retry_rate_per_snr["Total_Transmissions"]
fig = px.scatter(retry_rate_per_snr, x="SNR", y="Retry_Rate", labels=labels, trendline="ols")
save_or_show(fig, "snr_vs_retransmission_rate_cc")


# For am
retry_rate_per_snr = am_df.groupby("SNR").agg(
    Retry_Rate=("FC_Retry", "sum"),  # Sum of bad FC_Retry at that SNR
    Total_Transmissions=("FC_Retry", "count")  # Count of all transmissions at that SNR
).reset_index()
retry_rate_per_snr["Retry_Rate"] = retry_rate_per_snr["Retry_Rate"] / retry_rate_per_snr["Total_Transmissions"]

fig = px.scatter(retry_rate_per_snr, x="SNR", y="Retry_Rate", labels=labels, trendline="ols")
save_or_show(fig, "snr_vs_retransmission_rate_am")

# For ik
retry_rate_per_snr = ik_df.groupby("SNR").agg(
    Retry_Rate=("FC_Retry", "sum"),  # Sum of bad FC_Retry at that SNR
    Total_Transmissions=("FC_Retry", "count")  # Count of all transmissions at that SNR
).reset_index()
retry_rate_per_snr["Retry_Rate"] = retry_rate_per_snr["Retry_Rate"] / retry_rate_per_snr["Total_Transmissions"]

fig = px.scatter(retry_rate_per_snr, x="SNR", y="Retry_Rate", labels=labels, trendline="ols")
save_or_show(fig, "snr_vs_retransmission_rate_ik")

## Retries vs SNR vs Transmissions in numbers

In [None]:
# 1. Exclude specific MAC addresses
# 2. Create a new column for 5-minute intervals
# 3. Count unique transmitter addresses per 5-minute interval
# 4. Count retries per 5-minute interval
# 5. Merge both counts on the 5-minute interval
# 6. Scatter plot of retries vs. unique transmitters

# For am
am_df['5_min_interval'] = am_df['Sniff_Timestamp'].dt.floor('5min')
am_filtered = am_df[~am_df['Trans_Addr'].isin(bssids_am)]
am_filtered['5_min_interval'] = am_filtered['Sniff_Timestamp'].dt.floor('5min')
device_counts_am = am_filtered.groupby('5_min_interval')['Trans_Addr'].nunique().reset_index(name='Unique_Devices')
retries_am = am_df["FC_Retry"].sum()
dev_retries_am = am_df["FC_Retry"].count()
transmissions_am = am_df.groupby("5_min_interval")["FC_Retry"].count().reset_index(name="Count")


#Average percentage of retries over transmissions per minute
print("am average percentage of retries over transmissions for AM:", (retries_am/ dev_retries_am))
#Max percentage of retries ocer transmissions per minute
print("am max percentage of retries over transmissions for AM:", (retries_am/ dev_retries_am).max())
#Min percentage of retries ocer transmissions per minute
print("am min percentage of retries over transmissions for AM:", (retries_am/ dev_retries_am).min())
#Average number of Devices per minuted
print("am average number of devices per minute for AM:", device_counts_am["Unique_Devices"].mean())
#Max number of Devices per minute
print("am max number of devices per minute for AM:", device_counts_am["Unique_Devices"].max())
#Min number of Devices per minute
print("am min number of devices per minute for AM:", device_counts_am["Unique_Devices"].min())
#Average number of Transmissions per minute
print("am average number of transmissions per minute for AM:", transmissions_am["Count"].mean())
#Max number of Transmissions per minute
print("am max number of transmissions per minute for AM:", transmissions_am["Count"].max())
#Min number of Transmissions per minute
print("am min number of transmissions per minute for AM:", transmissions_am["Count"].min())
#Average SNR
print("am average SNR for AM:", am_df["SNR"].mean())
#Max SNR
print("am max SNR for AM:", am_df["SNR"].max())
#Min SNR
print("am min SNR for AM:", am_df["SNR"].min())
#Average Data Rate
print("am average Data Rate for AM:", am_df["DataRate"].mean())
#Max Data Rate
print("am max Data Rate for AM:", am_df["DataRate"].max())
#Min Data Rate
print("am min Data Rate for AM:", am_df["DataRate"].min())


# For cc
cc_df['5_min_interval'] = cc_df['Sniff_Timestamp'].dt.floor('5min')
cc_filtered = cc_df[~cc_df['Trans_Addr'].isin(bssids_cc)]
cc_filtered['5_min_interval'] = cc_filtered['Sniff_Timestamp'].dt.floor('5min')
device_counts_cc = cc_filtered.groupby('5_min_interval')['Trans_Addr'].nunique().reset_index(name='Unique_Devices')
retries_cc = cc_df["FC_Retry"].sum()
dev_retries_cc = cc_df["FC_Retry"].count()
transmissions_cc = cc_df.groupby("5_min_interval")["FC_Retry"].count().reset_index(name="Count")

# CC Statistics
print("cc average percentage of retries over transmissions for CC:", (retries_cc / dev_retries_cc).mean())
print("cc max percentage of retries over transmissions for CC:", (retries_cc / dev_retries_cc).max())
print("cc min percentage of retries over transmissions for CC:", (retries_cc / dev_retries_cc).min())
print("cc average number of devices per minute for CC:", device_counts_cc["Unique_Devices"].mean())
print("cc max number of devices per minute for CC:", device_counts_cc["Unique_Devices"].max())
print("cc min number of devices per minute for CC:", device_counts_cc["Unique_Devices"].min())
print("cc average number of transmissions per minute for CC:", transmissions_cc["Count"].mean())
print("cc max number of transmissions per minute for CC:", transmissions_cc["Count"].max())
print("cc min number of transmissions per minute for CC:", transmissions_cc["Count"].min())
print("cc average SNR for CC:", cc_df["SNR"].mean())
print("cc max SNR for CC:", cc_df["SNR"].max())
print("cc min SNR for CC:", cc_df["SNR"].min())
print("cc average Data Rate for CC:", cc_df["DataRate"].mean())
print("cc max Data Rate for CC:", cc_df["DataRate"].max())
print("cc min Data Rate for CC:", cc_df["DataRate"].min())

# For ik
ik_df['5_min_interval'] = ik_df['Sniff_Timestamp'].dt.floor('5min')
ik_filtered = ik_df[~ik_df['Trans_Addr'].isin(bssids_ik)]
ik_filtered['5_min_interval'] = ik_filtered['Sniff_Timestamp'].dt.floor('5min')
device_counts_ik = ik_filtered.groupby('5_min_interval')['Trans_Addr'].nunique().reset_index(name='Unique_Devices')
retries_ik = ik_df["FC_Retry"].sum()
dev_retries_ik = ik_df["FC_Retry"].count()
transmissions_ik = ik_df.groupby("5_min_interval")["FC_Retry"].count().reset_index(name="Count")


# IK Statistics
print("ik average percentage of retries over transmissions for IK:", (retries_ik / dev_retries_ik).mean())
print("ik max percentage of retries over transmissions for IK:", (retries_ik / dev_retries_ik).max())
print("ik min percentage of retries over transmissions for IK:", (retries_ik / dev_retries_ik).min())
print("ik average number of devices per minute for IK:", device_counts_ik["Unique_Devices"].mean())
print("ik max number of devices per minute for IK:", device_counts_ik["Unique_Devices"].max())
print("ik min number of devices per minute for IK:", device_counts_ik["Unique_Devices"].min())
print("ik average number of transmissions per minute for IK:", transmissions_ik["Count"].mean())
print("ik max number of transmissions per minute for IK:", transmissions_ik["Count"].max())
print("ik min number of transmissions per minute for IK:", transmissions_ik["Count"].min())
print("ik average SNR for IK:", ik_df["SNR"].mean())
print("ik max SNR for IK:", ik_df["SNR"].max())
print("ik min SNR for IK:", ik_df["SNR"].min())
print("ik average Data Rate for IK:", ik_df["DataRate"].mean())
print("ik max Data Rate for IK:", ik_df["DataRate"].max())
print("ik min Data Rate for IK:", ik_df["DataRate"].min())









## number of devices vs. SNR

In [None]:
# 1. Compute the average noise per 5-minute interval
# 2. Merge with the unique transmitter counts
# 3. Scatter plot of # of devices vs. average noise

# For am
am_5min_interval = am_df['Sniff_Timestamp'].dt.floor('5min')
device_counts_am = am_filtered.groupby('5_min_interval')['Trans_Addr'].nunique().reset_index(name='Unique_Devices')
avg_SNR_am = am_filtered.groupby("5_min_interval")["SNR"].mean().reset_index(name="Avg_SNR")
merged_SNR_df = pd.merge(device_counts_am, avg_SNR_am, on="5_min_interval", how="inner")
fig = px.scatter(merged_SNR_df, x="Unique_Devices", y="Avg_SNR", labels=labels,trendline="ols")
save_or_show(fig, "snr_vs_devices_am")
#calculate R squared
x = merged_SNR_df["Unique_Devices"]
y = merged_SNR_df["Avg_SNR"]
x = sm.add_constant(x)  # Adds a constant term to the predictor
model = sm.OLS(y, x)
results = model.fit()
print(results.rsquared)

# For cc
avg_SNR_cc = cc_filtered.groupby("5_min_interval")["SNR"].mean().reset_index(name="Avg_SNR")
merged_SNR_df = pd.merge(device_counts_cc, avg_SNR_cc, on="5_min_interval", how="inner")
fig = px.scatter(merged_SNR_df, x="Unique_Devices", y="Avg_SNR", labels=labels, trendline="ols")
save_or_show(fig, "snr_vs_devices_cc")
#calculate R squared
x = merged_SNR_df["Unique_Devices"]
y = merged_SNR_df["Avg_SNR"]
model = sm.OLS(y, x)
results = model.fit()
print(results.rsquared)

# For ik
avg_SNR_ik = ik_filtered.groupby("5_min_interval")["SNR"].mean().reset_index(name="Avg_SNR")
merged_SNR_df = pd.merge(device_counts_ik, avg_SNR_ik, on="5_min_interval", how="inner")
fig = px.scatter(merged_SNR_df, x="Unique_Devices", y="Avg_SNR", labels=labels, trendline="ols")
save_or_show(fig, "snr_vs_devices_ik")
#calculate R squared
x = merged_SNR_df["Unique_Devices"]
y = merged_SNR_df["Avg_SNR"]
model = sm.OLS(y, x)
results = model.fit()
print(results.rsquared)

## Number of Devices VS Retransmission Rates

In [None]:
# Calculate total retries per 5-minute interval
# Calculate total packets per 5-minute interval
# Calculate Retry Rate (retries / total packets)
# Merge the calculated metrics with the unique transmitter counts
# Scatter plot of # of devices vs. Retry Rate with trendline

# For am
total_retries_am = am_filtered.groupby("5_min_interval")["FC_Retry"].sum().reset_index(name="Avg_Retries")
total_packets_am = am_filtered.groupby("5_min_interval")["FC_Retry"].count().reset_index(name="Total_Packets")
total_retries_am["Retry_Rate"] = total_retries_am["Avg_Retries"] / total_packets_am["Total_Packets"]
merged_SNR_df = pd.merge(device_counts_am, total_retries_am[["5_min_interval", "Retry_Rate"]], on="5_min_interval", how="inner")
fig = px.scatter(merged_SNR_df, x="Unique_Devices", y="Retry_Rate", labels=labels, trendline="ols")
save_or_show(fig, "devices_vs_retry_rate_am")
#calculate R squared#calculate R squared
x = merged_SNR_df["Unique_Devices"]
y = merged_SNR_df["Retry_Rate"]
model = sm.OLS(y, x)
results = model.fit()
print(results.rsquared)

# For cc
total_retries_cc = cc_filtered.groupby("5_min_interval")["FC_Retry"].sum().reset_index(name="Avg_Retries")
total_packets_cc = cc_filtered.groupby("5_min_interval")["FC_Retry"].count().reset_index(name="Total_Packets")
total_retries_cc["Retry_Rate"] = total_retries_cc["Avg_Retries"] / total_packets_cc["Total_Packets"]
merged_SNR_df = pd.merge(device_counts_cc, total_retries_cc[["5_min_interval", "Retry_Rate"]], on="5_min_interval", how="inner")
fig = px.scatter(merged_SNR_df, x="Unique_Devices", y="Retry_Rate", labels=labels, trendline="ols")
save_or_show(fig, "devices_vs_retry_rate_cc")
#calculate R squared#calculate R squared
x = merged_SNR_df["Unique_Devices"]
y = merged_SNR_df["Retry_Rate"]
model = sm.OLS(y, x)
results = model.fit()
print(results.rsquared)

# For ik
total_retries_ik = ik_filtered.groupby("5_min_interval")["FC_Retry"].sum().reset_index(name="Avg_Retries")
total_packets_ik = ik_filtered.groupby("5_min_interval")["FC_Retry"].count().reset_index(name="Total_Packets")
total_retries_ik["Retry_Rate"] = total_retries_ik["Avg_Retries"] / total_packets_ik["Total_Packets"]
merged_SNR_df = pd.merge(device_counts_ik, total_retries_ik[["5_min_interval", "Retry_Rate"]], on="5_min_interval", how="inner")
fig = px.scatter(merged_SNR_df, x="Unique_Devices", y="Retry_Rate", labels=labels, trendline="ols")
save_or_show(fig, "devices_vs_retry_rate_ik")
#calculate R squared#calculate R squared
x = merged_SNR_df["Unique_Devices"]
y = merged_SNR_df["Retry_Rate"]
model = sm.OLS(y, x)
results = model.fit()
print(results.rsquared)


## Number of Devices vs Number of Transmissions

In [None]:
# Calculate total packets per 5-minute interval
# Merge the calculated metrics with the unique transmitter counts
# Scatter plot of # of devices vs. Retry Rate with trendline

# For am
#add 5min interval to am_df so we can use it to dedine the amount of retries


total_packets_am = am_df.groupby("5_min_interval")["FC_Retry"].count().reset_index(name="Total_Packets")
merged_SNR_df = pd.merge(device_counts_am, total_packets_am[["5_min_interval", "Total_Packets"]], on="5_min_interval", how="inner")
fig = px.scatter(merged_SNR_df, x="Unique_Devices", y="Total_Packets", labels=labels, trendline="ols")
save_or_show(fig, "devices_vs_total_packets_am")
#calculate R squared#calculate R squared
x = merged_SNR_df["Unique_Devices"]
y = merged_SNR_df["Total_Packets"]
model = sm.OLS(y, x)
results = model.fit()
print(results.rsquared)


# For cc
total_packets_cc = cc_filtered.groupby("5_min_interval")["FC_Retry"].count().reset_index(name="Total_Packets")
merged_SNR_df = pd.merge(device_counts_cc, total_packets_cc[["5_min_interval", "Total_Packets"]], on="5_min_interval", how="inner")
fig = px.scatter(merged_SNR_df, x="Unique_Devices", y="Total_Packets", labels=labels, trendline="ols")
save_or_show(fig, "devices_vs_total_packets_cc")
#calculate R squared#calculate R squared
x = merged_SNR_df["Unique_Devices"]
y = merged_SNR_df["Total_Packets"]
model = sm.OLS(y, x)
results = model.fit()
print(results.rsquared)


# For ik
total_packets_ik = ik_filtered.groupby("5_min_interval")["FC_Retry"].count().reset_index(name="Total_Packets")
merged_SNR_df = pd.merge(device_counts_ik, total_packets_ik[["5_min_interval", "Total_Packets"]], on="5_min_interval", how="inner")
fig = px.scatter(merged_SNR_df, x="Unique_Devices", y="Total_Packets", labels=labels, trendline="ols")
save_or_show(fig, "devices_vs_total_packets_ik")
#calculate R squared#calculate R squared
x = merged_SNR_df["Unique_Devices"]
y = merged_SNR_df["Total_Packets"]
model = sm.OLS(y, x)
results = model.fit()
print(results.rsquared)


## Frame Types

In [None]:
import pandas as pd
import plotly.express as px

# Prepare list to hold the data for each dataframe and each frame category
data = []
for df, label in zip([am_df, cc_df, ik_df], ['am', 'cc', 'ik']):
    total_frames = len(df)

    # Type 0 (Management) frames:
    beacon_count = len(df[(df["FC_Type"] == 0) & (df["FC_Subtype"] == 8)]) # Beacon frames: Subtype 8
    probe_count = len(df[(df["FC_Type"] == 0) & (df["FC_Subtype"] == 5)])  # Probe response frames: Subtype 5
    other_type0_count = len(df[(df["FC_Type"] == 0) & (~df["FC_Subtype"].isin([8, 5]))]) # Other management frames

    # Type 1 (Control) frames:
    type1_count = len(df[df["FC_Type"] == 1])

    # Type 2 (Data) frames: isolate additional subtypes
    data_count = len(df[(df["FC_Type"] == 2) & (df["FC_Subtype"] == 0)]) # Data frames: Subtype 0
    null_count = len(df[(df["FC_Type"] == 2) & (df["FC_Subtype"] == 4)]) # Null (no data) frames: Subtype 4
    qos_data_count = len(df[(df["FC_Type"] == 2) & (df["FC_Subtype"] == 8)]) # QoS Data frames: Subtype 8
    qos_null_count = len(df[(df["FC_Type"] == 2) & (df["FC_Subtype"] == 12)]) # QoS Null (no data) frames: Subtype 12
    other_type2_count = len(df[(df["FC_Type"] == 2) & (~df["FC_Subtype"].isin([0, 4, 8, 12]))]) # Any other type 2 frames

    # Calculate percentages for each category (guard against division by zero)
    beacon_pct = (beacon_count / total_frames) * 100 if total_frames > 0 else 0
    probe_pct = (probe_count / total_frames) * 100 if total_frames > 0 else 0
    #assoc_pct = (assoc_count / total_frames) * 100 if total_frames > 0 else 0
    other0_pct = (other_type0_count / total_frames) * 100 if total_frames > 0 else 0
    type1_pct = (type1_count / total_frames) * 100 if total_frames > 0 else 0
    data_pct = (data_count / total_frames) * 100 if total_frames > 0 else 0
    null_pct = (null_count / total_frames) * 100 if total_frames > 0 else 0
    qos_data_pct = (qos_data_count / total_frames) * 100 if total_frames > 0 else 0
    qos_null_pct = (qos_null_count / total_frames) * 100 if total_frames > 0 else 0
    other2_pct = (other_type2_count / total_frames) * 100 if total_frames > 0 else 0

    # Append data for Type 0 categories
    data.append({"DataFrame": label, "Frame Category": "Beacon (0,8)", "Percentage": beacon_pct})
    data.append({"DataFrame": label, "Frame Category": "Probe Response (0,5)", "Percentage": probe_pct})

    if other_type0_count > 0:
        data.append({"DataFrame": label, "Frame Category": "Other Mgmt Frames (0,*)", "Percentage": other0_pct})

    # Append data for Type 1 (Control) frames
    data.append({"DataFrame": label, "Frame Category": "Control Frames (1,*)", "Percentage": type1_pct})

    # Append data for Type 2 subcategories
    if data_count > 0:
        data.append({"DataFrame": label, "Frame Category": "Data Frames (2,0)", "Percentage": data_pct})
    if null_count > 0:
        data.append({"DataFrame": label, "Frame Category": "Null (no data) (2,4)", "Percentage": null_pct})
    if qos_data_count > 0:
        data.append({"DataFrame": label, "Frame Category": "QoS Data (2,8)", "Percentage": qos_data_pct})
    if qos_null_count > 0:
        data.append({"DataFrame": label, "Frame Category": "QoS Null (no data) (2,12)", "Percentage": qos_null_pct})
    if other_type2_count > 0:
        data.append({"DataFrame": label, "Frame Category": "Other Data Frames (2,*)", "Percentage": other2_pct})

# Create a dataframe from the list
plot_df = pd.DataFrame(data)

# Create a stacked bar chart using Plotly Express
fig = px.bar(
    plot_df,
    x="DataFrame",
    y="Percentage",
    color="Frame Category",
    barmode="stack",
    labels={"Percentage": "Percentage (%)", "DataFrame": "DataFrame"},
    color_discrete_map={
                "Beacon (0,8)": "#0C5DA5",
                "Probe Response (0,5)": "#DD2C00",
                "Null (no data) (2,4)": "#FF9500",
                "QoS Null (no data) (2,12)": "#9e9e9e",
                "Data Frames (2,0)": "#845B97",
                "Other Mgmt Frames (0,*)": "#474747",
                "QoS Data (2,8)": "#00B945",
                "Other Data Frames (2,*)": "yellow"}
)
fig.update_layout(
    legend_title="Frame (type,subtype)"
)
# ['0C5DA5', '00B945', 'FF9500', 'FF2C00', '845B97', '474747', '9e9e9e']
# Save or show the figure using the provided function
save_or_show(fig, "frame_types")

## Resolved hostnames

In [None]:
def plot_resolved_hostnames(df, title):
    df_filtered = df[["Src_Name", "Dst_Name", "DataRate"]].copy()
    df_filtered = df_filtered.melt(id_vars=["DataRate"], value_name="Host").dropna(subset=["Host"])

    # Calculate hostname counts and average data rate
    hostname_counts = df_filtered.groupby("Host").size().reset_index(name="Count")
    avg_data_rate = df_filtered.groupby("Host")["DataRate"].mean().reset_index(name="Avg_DataRate")
    hostname_counts = hostname_counts.merge(avg_data_rate, on="Host")

    # Calculate total and percentage for each host
    total = hostname_counts["Count"].sum()
    hostname_counts["Percent"] = hostname_counts["Count"] / total * 100

    # Combine fields with less than 1% into "Other" category
    other_hosts = hostname_counts[hostname_counts["Percent"] < 1]
    other = other_hosts.sum(numeric_only=True)
    other["Host"] = "Other"
    other["Avg_DataRate"] = (other_hosts["Count"] * other_hosts["Avg_DataRate"]).sum() / other["Count"]
    hostname_counts = hostname_counts[hostname_counts["Percent"] >= 1]
    hostname_counts = pd.concat([hostname_counts, pd.DataFrame([other])], ignore_index=True)

    # Create pie chart using the custom label
    hostname_counts["Label"] = hostname_counts.apply(
        lambda row: f'{row["Host"]} ({row["Avg_DataRate"]:.2f} Mbps)', axis=1
    )
    fig = px.pie(
        hostname_counts,
        names="Label",
        values="Count",
        hole=0.3,
    )

    fig.update_traces(textposition="inside", textinfo="percent+label")
    fig.update_layout(showlegend=False)
    save_or_show(fig, title)

plot_resolved_hostnames(cc_df, "resolved_hostnames_cc")
plot_resolved_hostnames(ik_df, "resolved_hostnames_ik")

## Site usage by devices over time

In [None]:
def plot_site_usage(df, bssids, site, title):
  # Create a bar chart of devices per time interval colored by the number of devices using "site"
  df_filtered = df.loc[~df['Trans_Addr'].isin(bssids)].copy()
  df_filtered["Interval"] = df_filtered["Sniff_Timestamp"].dt.floor("5min")
  device_counts = (df_filtered.groupby("Interval")["Trans_Addr"].nunique()
                   .reset_index(name="Unique_Devices"))
  site_counts = (df_filtered[
      df_filtered["Src_Name"].str.contains(site, case=False, na=False) |
      df_filtered["Dst_Name"].str.contains(site, case=False, na=False)
      ].groupby("Interval")["Trans_Addr"].nunique().reset_index(name="Num Devices"))
  merged_counts = pd.merge(device_counts, site_counts, on="Interval", how="left")
  merged_counts["Num Devices"].fillna(0, inplace=True)
  fig = px.bar(merged_counts, x="Interval", y="Unique_Devices", color="Num Devices",
               color_continuous_scale="Viridis", labels=labels).update_layout(showlegend=True)
  save_or_show(fig, title)

# site = "chatgpt.com"
# site = "facebook.com"
# site = "google.com"
# site = "fbcdn.net"
# site = "overleaf.com"
# site = "github.com"
site = "1e100.net"

plot_site_usage(cc_df, bssids_cc, site, "site_usage_cc")
plot_site_usage(ik_df, bssids_ik, site, "site_usage_ik")

# Download Generated Figures

In [None]:
from google.colab import files

if DOWNLOAD_FIGURES:
  !zip -r figures.zip figures/
  files.download("figures.zip")