In [None]:
import json
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tqdm import tqdm
import seaborn as sn
import plotly.express as px
from mpl_toolkits.mplot3d import axis3d

from quic_info2 import Quic_info

In [None]:
df = pd.read_csv(os.path.join("data", "spurious.csv"))

In [None]:
df

In [None]:
plt.boxplot(df["spurious"].values)
plt.ylabel("Log scaled")
# plt.yscale("log")
plt.title("Number of spurious")

In [None]:
sorted_values = np.sort(df["spurious"].values)
cdf = np.arange(len(sorted_values)) / len(sorted_values)

plt.plot(sorted_values, cdf)
plt.title("Number of spurious")
plt.xlabel("Spurious number")
plt.ylabel("Cumulative Probability")
# plt.xscale("log")

# plt.xlim(xmin=0)
plt.grid()


In [None]:
isp_count = [(isp, len(df.loc[df["isp"] == isp])) for isp in df["isp"].unique()]


data = []
names = []

for isp, count in sorted(isp_count, key=lambda x:x[1], reverse=True):
    if count >= 5:
        print(isp, count)
        data.append(df.loc[df["isp"] == isp]["spurious"].values)
        names.append(isp)
    # elif count >= 5:
    #     print("Not took : ", isp)

In [None]:
fig = plt.figure(figsize=(8, 12))
plt.boxplot(data, vert=False)
plt.yticks(list(range(1, len(names) + 1)), names)
# plt.xscale("log")
plt.xlabel("Number of spurious")
plt.title("Spurious retransmission for ISP")
plt.grid()

In [None]:
corr = df[["dist", "spurious", "loss", "ping_as_rpm", "ratio_dl_rpm", "ratio_ul_rpm"]].corr()
sn.heatmap(corr, annot=True)

In [None]:
spurious_percentage = []

for index, row in df.iterrows():
    if row["loss"] > 0:
        spurious_percentage.append(
            100*row["spurious"] / row["loss"]
        )

In [None]:
plt.boxplot(spurious_percentage)
plt.title("Percentage of spurious retransmission among losses")
plt.ylabel("Percentage (%)")

In [None]:
sorted_values = np.sort(spurious_percentage)
cdf = np.arange(len(sorted_values)) / len(sorted_values)

plt.plot(sorted_values, cdf)
plt.title("Spurious percentage")
plt.xlabel("Spurious percentage among losses (%)")
plt.ylabel("Cumulative Probability")
# plt.xscale("log")

# plt.xlim(xmin=0)
plt.grid()


In [None]:
plt.hist(sorted_values, bins=100)
# plt.title("Spurious rate among losses distribution")
plt.xlabel("Spurious retransmission rate among losses (%)")
plt.ylabel("Number")
plt.yscale("log")
plt.savefig("output/spurious_distribution.pdf")

In [None]:
df["spurious_rate"] = df.apply(lambda r:100* r["spurious"]/r["loss"] if r["loss"] != 0 else 0, axis=1)
df.loc[df["loss_rate"] > .8].sort_values(by="loss_rate", ascending=False)


In [None]:
df.loc[df["isp"].str.contains("NZ")]

In [None]:
spurious_data = []
spurious_names = []

for isp, count in sorted(isp_count, key=lambda x: x[1], reverse=True):
    if count >= 5:
        cur_spurious_percentage = []
        for index, row in df.loc[df["isp"] == isp].iterrows():
            if row["loss"] > 0:
                cur_spurious_percentage.append(
                    100*row["spurious"] / row["loss"]
                )
        if len(cur_spurious_percentage) > 0:
            spurious_data.append(cur_spurious_percentage)
            spurious_names.append(isp)


In [None]:
fig = plt.figure(figsize=(8, 5))
plt.boxplot(spurious_data, vert=False)
plt.yticks(list(range(1, len(spurious_names) + 1)), spurious_names)
# plt.xscale("log")
plt.xlabel("Spurious percentage (%)")
plt.title("Percentage of spurious retransmission among losses")
plt.grid()

In [None]:
# Remove outlier
dfbis = df[["loss_rate", "ratio_dl_rpm", "ratio_ul_rpm"]]
z_scores = np.abs((dfbis - dfbis.mean()) / dfbis.std())
dfbis = dfbis[(z_scores < 3).all(axis=1)]

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(10, 4))
ax1.scatter(dfbis["loss_rate"], dfbis["ratio_dl_rpm"], alpha=.2)
ax2.scatter(dfbis["loss_rate"], dfbis["ratio_ul_rpm"], alpha=.2)
# fig.suptitle(f"Factor of latency increase with loss rate (without outlier)")
fig.supxlabel("Loss rate (%)")
ax1.set_ylabel("Factor of latency increase")
ax1.set_title("download")
ax2.set_title("upload")
ax1.grid()
ax2.grid()

# ax1.set_xscale("log")
# ax2.set_xscale("log")

plt.subplots_adjust(wspace=0.02, hspace=0.2, top=0.8, bottom=0.15)
plt.savefig("output/increase_loss.pdf")

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(10, 4))
ax1.scatter(dfbis["loss_rate"], dfbis["ratio_dl_rpm"], alpha=.2)
ax2.scatter(dfbis["loss_rate"], dfbis["ratio_ul_rpm"], alpha=.2)
# fig.suptitle(f"Factor of latency increase with loss rate (without outlier)")
fig.supxlabel("Loss rate (%)")
ax1.set_ylabel("Factor of latency increase")
ax1.set_title("download")
ax2.set_title("upload")
ax1.grid()
ax2.grid()

ax1.set_xlim(xmin=-.005, xmax=.1)
ax2.set_xlim(xmin=-.005, xmax=.1)

plt.subplots_adjust(wspace=0.08, hspace=0.2, top=0.8, bottom=0.15)
plt.savefig("output/increase_loss_limit.pdf")


In [None]:
dists = []
data = []
data2 = []

for dist in sorted(df["dist"].unique()):
    # print(len(df.loc[df["dist"] == dist]))
    dists.append(dist)
    data.append(np.mean(df.loc[df["dist"] == dist]["loss_rate"].values))
    data2.append(np.mean(df.loc[df["dist"] == dist]["spurious_rate"].values))


In [None]:
plt.plot(dists, data)
plt.plot(dists, data2)


In [None]:
plt.scatter(df["loss_rate"], df["spurious_rate"], alpha=.2)
plt.xlim(xmax=1, xmin=-0.1)
# plt.title("Comparaison of spurious among losses\ntaking loss rate below 1%")
plt.xlabel("Loss rate (%)")
plt.ylabel("Spurious retransmission rate among losses (%)")
plt.savefig("output/spurious_loss.pdf")

In [None]:
plt.scatter(df["spurious_rate"], df["ratio_dl_rpm"], alpha=.2)
plt.scatter(df["spurious_rate"], df["ratio_ul_rpm"], alpha=.2)
plt.xlim(xmax=1, xmin=-0.1)
plt.title("Comparaison of spurious among losses\ntaking loss rate below 1%")
plt.xlabel("Loss rate (%)")
plt.ylabel("Spurious rate among losses (%)")
# plt.savefig("output/spurious_loss.pdf")
