In [None]:
import json
import os
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pandas as pd
import folium
import ipinfo
import numpy as np

In [None]:
def get_df(folders):
    df = pd.read_csv(os.path.join("data", "delta.csv"))
    return df

In [None]:
df = get_df(["memoire-vps-output", "memoire-vps-output2"])

In [None]:
df

In [None]:
df.describe()

In [None]:
delta_df = pd.DataFrame({
    "ip": df["ip"],
    "isp": df["isp"],
    "dist": df["dist"],
    "ping" : df["ping"],
    "ping_rpm" : df["ping_as_rpm"],
    "dl_bw": df["dl_bw"],
    "ul_bw": df["ul_bw"],
    "dl_rpm" : df["dl_rpm"],
    "delta_dl_rpm": df.apply(lambda r: r["ping_as_rpm"] - r["dl_rpm"], axis=1),
    "ratio_dl_rpm": df.apply(lambda r: r["ping_as_rpm"]/r["dl_rpm"], axis=1),
    "ul_rpm": df["ul_rpm"],
    "delta_ul_rpm": df.apply(lambda r: r["ping_as_rpm"] - r["ul_rpm"], axis=1),
    "ratio_ul_rpm": df.apply(lambda r: r["ping_as_rpm"]/r["ul_rpm"], axis=1),
})


In [None]:
delta_df

In [None]:
delta_df.describe()

In [None]:
dfbis = delta_df[["ping", "ping_rpm", "dl_bw", "ul_bw", "delta_dl_rpm", "delta_ul_rpm", "ratio_dl_rpm", "ratio_ul_rpm"]]
z_scores = np.abs((dfbis - dfbis.mean()) / dfbis.std())
dfbis = dfbis[(z_scores < 3).all(axis=1)]

In [None]:
def make_cdf(values, title, xlabel):
    sorted_values = np.sort(values)
    cdf = np.arange(len(sorted_values)) / len(sorted_values)
    # cumsum = np.cumsum(sorted_values)
    # norm_cumsum = cumsum / cumsum[-1]

    plt.plot(sorted_values, cdf)
    # plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel("Cumulative Probability")

    plt.xlim(xmin=0)
    plt.grid()


In [None]:
make_cdf(
    delta_df["delta_dl_rpm"].values,
    title ="CDF of the estimated latency on loaded download", 
    xlabel="Latency estimation (in RPM lost)"
    )
plt.savefig("output/delta_dl.pdf")

In [None]:
make_cdf(
    delta_df["ratio_dl_rpm"].values,
    title="CDF of the estimated factor of latency increase on loaded download",
    xlabel="Factor of increase"
)
plt.savefig("output/ratio_dl.pdf")
plt.xlim(xmax=25)
plt.savefig("output/ratio_dl_limit.pdf")

In [None]:
make_cdf(
    delta_df["delta_ul_rpm"].values,
    title="CDF of the estimated latency on loaded upload",
    xlabel="Latency estimation (in RPM lost)"
    )
plt.savefig("output/delta_ul.pdf")

In [None]:
make_cdf(
    delta_df["ratio_ul_rpm"].values,
    title="CDF of the estimated latency on loaded upload",
    xlabel="Factor of increase"
    )
plt.savefig("output/ratio_ul.pdf")
plt.xlim(xmax=25)
plt.savefig("output/ratio_ul_limit.pdf")

In [None]:
for isp in sorted(delta_df["isp"].unique()):
    print(isp)

In [None]:
MIN_NUMBER_OF_TESTS = 5

In [None]:
isp_count = [(isp, len(delta_df.loc[delta_df["isp"] == isp])) for isp in delta_df["isp"].unique()]

for isp, count in sorted(isp_count, key=lambda x:(x[1], x[0]), reverse=True):
    if count >= MIN_NUMBER_OF_TESTS:
        print(f"{isp} -> {count}")

In [None]:
delta_percent_upload = []
delta_percent_download = []
names = []

for isp, count in sorted(isp_count, key=lambda x:(x[1], x[0]), reverse=True):
    if count >= MIN_NUMBER_OF_TESTS:
        delta_percent_upload.append(
            delta_df.loc[delta_df["isp"] == isp]["ratio_ul_rpm"].values
        )
        delta_percent_download.append(
            delta_df.loc[delta_df["isp"] == isp]["ratio_dl_rpm"].values
        )
        names.append(isp)

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(20, 7))

ax1.boxplot(
    delta_percent_upload, 
    vert=False)
ax1.set_yticks(list(range(1, len(names) + 1)), names)
ax1.set_title("upload")
ax1.grid()

ax2.boxplot(delta_percent_download, vert=False)
ax2.set_yticks(list(range(1, len(names) + 1)), names)
ax2.set_title("download")
ax2.grid()

ax1.set_xscale('log')
ax1.xaxis.set_major_formatter(ticker.FuncFormatter(lambda y,pos: ('{{:.{:1d}f}}'.format(int(np.maximum(-np.log10(y),0)))).format(y)))
ax2.set_xscale('log')
ax2.xaxis.set_major_formatter(ticker.FuncFormatter(lambda y, pos: (
    '{{:.{:1d}f}}'.format(int(np.maximum(-np.log10(y), 0)))).format(y)))

# ax1.set_ylim(
#     ymin=0,
#     ymax=20 
# )
fig.supxlabel("Factor (log scale)")
# fig.supylabel("ISP sorted by their number of tests")
fig.suptitle(f"Factor of latency increase for ISP who have at least {MIN_NUMBER_OF_TESTS} tests")

plt.subplots_adjust(wspace=0.01, hspace=0)
plt.show()

In [None]:
dists = []
means_bb_upload = []
std_bb_upload = []
means_bb_download = []
std_bb_download = []


for dist in sorted(delta_df["dist"].unique()):
    if dist < 100000000000:
        dists.append(dist)
        bb_uploads = delta_df.loc[delta_df["dist"] == dist]["ratio_ul_rpm"].values
        means_bb_upload.append(np.mean(bb_uploads))
        std_bb_upload.append(np.std(bb_uploads))
        bb_downloads = delta_df.loc[delta_df["dist"] == dist]["ratio_dl_rpm"].values
        means_bb_download.append(np.mean(bb_downloads))
        std_bb_download.append(np.std(bb_downloads))

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1)

ax1.plot(dists, means_bb_upload, label="upload")
ax2.plot(dists, means_bb_download, label="download")

fig.suptitle("Percentage of latency increase with distance")
fig.supylabel("Factor of latency increasing")
fig.supxlabel("Relative distance from the server (Km)")

plt.show()

In [None]:
delta_df.loc[delta_df["isp"] == "ViaSat,Inc., US "]

In [None]:
delta_df.loc[delta_df["isp"] == "ViaSat,Inc., US "].describe()

In [None]:
delta_df.loc[delta_df["isp"] == "ViaSat,Inc., US "].to_csv("output/plance.csv")

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(10, 4))
ax1.scatter(dfbis["dl_bw"], dfbis["ratio_dl_rpm"], alpha=.2)
ax2.scatter(dfbis["ul_bw"], dfbis["ratio_ul_rpm"], alpha=.2)
# fig.suptitle(f"Factor of latency increase with goodput (without outlier)")
fig.supxlabel("Estimated goodput (Mbps)")
ax1.set_ylabel("Factor of latency increase")
ax1.set_title("download")
ax2.set_title("upload")
ax1.grid()
ax2.grid()

# ax1.set_xscale("log")
# ax2.set_xscale("log")

plt.subplots_adjust(
    wspace=0.02, 
    hspace=0.2, 
    top=0.85,
    bottom=0.15
)
plt.savefig("output/increase_over_bw.pdf")

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(10, 4))
ax1.scatter(dfbis["dl_bw"], dfbis["ratio_dl_rpm"], alpha=.2)
ax2.scatter(dfbis["ul_bw"], dfbis["ratio_ul_rpm"], alpha=.2)
fig.suptitle(f"Factor of latency increase with goodput (without outlier)")
fig.supxlabel("Estimated goodput (Mbps)")
ax1.set_ylabel("Factor of latency increase")
ax1.set_title("download")
ax2.set_title("upload")
ax1.grid()
ax2.grid()

# ax1.set_xscale("log")
# ax2.set_xscale("log")

# ax1.set_xlim(xmin=0.9)
ax2.set_xlim(xmin=0.9)
ax1.set_xlim(xmin=-0.2, xmax=100)
ax2.set_xlim(xmin=-0.2, xmax=50)

plt.subplots_adjust(
    wspace=0.05,
    hspace=0.2,
    top=0.85,
    bottom=0.15
)
# plt.savefig("output/increase_over_bw_limit.pdf")


In [None]:
delta_df[["dl_bw", "ul_bw", "ratio_dl_rpm", "ratio_ul_rpm"]].describe()


In [None]:
plt.scatter(dfbis["ul_bw"], dfbis["ratio_ul_rpm"], alpha=.2)
# fig.title(f"Factor of latency increase with goodput (without outlier)")
plt.xlabel("Estimated goodput (Mbps)")
plt.ylabel("Factor of latency increase")
# ax1.set_title("download")
# ax2.set_title("upload")
# ax1.grid()
plt.grid()
plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(10, 4))
ax1.scatter(dfbis["ping"], dfbis["ratio_dl_rpm"], alpha=.2)
ax2.scatter(dfbis["ping"], dfbis["ratio_ul_rpm"], alpha=.2)
# fig.suptitle(f"Factor of latency increase with goodput (without outlier)")
fig.supxlabel("Idle ping (ms)")
ax1.set_ylabel("Factor of latency increase")
ax1.set_title("download")
ax2.set_title("upload")
ax1.grid()
ax2.grid()

plt.subplots_adjust(
    wspace=0.05,
    hspace=0.2,
    top=0.85,
    bottom=0.15
)
plt.savefig("output/increase_over_idle.pdf")