In [None]:
import pandas as pd
import sys
import os
import seaborn as sns
import holoviews as hv
from IPython.core.display import display, HTML
from holoviews import opts, dim, Palette

hv.extension('bokeh')

opts.defaults(
    opts.Bars(xrotation=45, tools=['hover']),
    opts.BoxWhisker(width=700, xrotation=30, box_fill_color=Palette('Category20')),
    opts.Curve(width=700, tools=['hover']),
    opts.GridSpace(shared_yaxis=True),
    opts.Scatter(width=700, height=500, color=Palette('Category20'), size=dim('growth')+5, tools=['hover'],alpha=0.5, cmap='Set1'),
    opts.NdOverlay(legend_position='left'))

# Defines upper bound of ping for the network speed analysis.
upper_acceptable_ping_bound = 10

# Initializes the figures path in webpage for the diagram output
if os.path.isdir("./webpage/figures") == False:
    os.mkdir("./webpage/figures")
    print("Path 'figures' created successfully")
else:
    print("Path 'figures' initialized")

def numOutlierCount(attribute):
    q25=attribute.quantile(0.25)
    q75=attribute.quantile(0.75)
    iqr=q75-q25
    AnzahlMax=attribute[attribute>q75+1.5*iqr].count()
    AnzahlMin=attribute[attribute<q25-1.5*iqr].count()
    AnzahlAusreißerAbsolut=AnzahlMax+AnzahlMin
    AnzahlAusreißerProzentual=AnzahlAusreißerAbsolut/attribute.count()
    print("Ausreißer Absolut: "+str(AnzahlAusreißerAbsolut))
    print("Anzahl Ausreißer prozentual: "+str(AnzahlAusreißerProzentual))
    return(AnzahlAusreißerAbsolut)
def inspect_outliers(df,attribute):
    q25=attribute.quantile(0.25)
    q75=attribute.quantile(0.75)
    iqr=q75-q25
    df_outliers_min=df[attribute<(q25-iqr*1.5)]
    df_outliers_max = df[attribute>(q75+iqr*1.5)]
    display(HTML("</br><h2>Min-Outliers</h2>"))
    display(HTML(df_outliers_min.to_html()))
    display(HTML("<hr></br><h2>Max-Outliers</h2>"))
    display(HTML(df_outliers_max.to_html()))
    
# Data Import
try:
    df_ping = pd.read_csv("Data/ping_test.csv",index_col=0)
    df_speed_test = pd.read_csv("Data/speed_test.csv", index_col=0)

    df_ping["date"] = pd.to_datetime(df_ping["date"],format="%d.%m.%Y %H:%M:%S")
    df_speed_test["date"] = pd.to_datetime(df_speed_test["date"], format="%d.%m.%Y %H:%M:%S")
except:
    print("Error while searching for files. Please perform network-test first.")
    sys.exit(0)

# Basic Data Wrangling

In [None]:
print(df_ping.shape)
df_ping.head()


In [None]:
print(df_speed_test.shape)
df_speed_test.head()

In [None]:
df_speed_test["ping"].max()



In [None]:
print(
    "The maximal Ping time has been {} ms.\nThe minimal ping time has been {} ms. \nThe mean ping time has been {} ms. "
    .format(df_ping["max"].max(), df_ping["min"].min(), df_ping["avg"].mean()))

In [None]:
df_ping_issues = df_ping[df_ping["max"]==99999]
print("There are {} issues in the analysis of the network.".format(df_ping_issues.shape[0]))

# Filter issues from eg. sockets
df_ping = df_ping[df_ping["max"]!=99999]

 # Ping Times in ms with extreme outlieres

In [None]:
ping_gt_10_opts = opts.Scatter(color="red", size=10)
fig_ping_times_with_extreme_outlieres = hv.Curve(
    (df_ping["date"],
     df_ping["max"]),"Date","Ping in ms").opts(title="All Max. Ping Times in ms") * hv.Scatter(
         (df_ping["date"][df_ping["max"] > upper_acceptable_ping_bound],
          df_ping["max"][df_ping["max"] > upper_acceptable_ping_bound]),
         "Date",
         "Max_Ping_Time",
     ).opts(ping_gt_10_opts)


#Safe newly generated plot
hv.save(fig_ping_times_with_extreme_outlieres, os.path.join("webpage","figures","fig_ping_times_with_extreme_outliers.html") , backend='bokeh')
fig_ping_times_with_extreme_outlieres

In [None]:
inspect_outliers(df_ping,df_ping["max"])

# Ping Times in ms without extreme outlieres

In [None]:
ping_gt_10_lt_1000_opts = opts.Scatter(color="red", size=10)
fig_ping_times_without_extreme_outliers = hv.Curve(
    (df_ping["date"][df_ping["max"]<1000],
     df_ping["max"][df_ping["max"]<1000]),"Date","Ping in ms").opts(title="All Max. Ping Times in ms without extreme outlieres") * hv.Scatter(
         (df_ping["date"][df_ping["max"] > upper_acceptable_ping_bound][df_ping["max"]<1000],
          df_ping["max"][df_ping["max"] > upper_acceptable_ping_bound][df_ping["max"]<1000]),
         "Date",
         "Max_Ping_Time",
     ).opts(ping_gt_10_opts)

#Safe newly generated plot
hv.save(fig_ping_times_without_extreme_outliers, os.path.join("webpage","figures","fig_ping_times_without_extreme_outliers.html") , backend='bokeh')
fig_ping_times_without_extreme_outliers

In [None]:
# The latency bound under which network speedtest is performing is defined in the network_test.py
pingbound_network_test = df_speed_test["ping"].min()
downstream_below_3k_opts = opts.Scatter(color="red", size=10)
fig_network_speeds_under_upper_bound = hv.Curve(
    (df_speed_test["date"], df_speed_test["downstream"]), "Date", "Downstream").opts(
    title="Network Speed when Ping below {} ms".format(pingbound_network_test)) * hv.Scatter(
    (df_speed_test["date"][df_speed_test["downstream"] < 30000],
     df_speed_test["downstream"][df_speed_test["downstream"] < 30000]
     )).opts(downstream_below_3k_opts) * hv.HLine(30000, label="30K Line")


#Safe newly generated plot
hv.save(fig_network_speeds_under_upper_bound, os.path.join("webpage","figures","fig_network_speeds_under_upper_bound.html") , backend='bokeh')
fig_network_speeds_under_upper_bound