In [1]:
import pandas as pd
import sys
import os
import seaborn as sns
import holoviews as hv
from IPython.core.display import display, HTML
from holoviews import opts, dim, Palette

import configparser
config = configparser.ConfigParser()
config.read('config_a.ini')

# Initialize config values
upper_acceptable_ping_bound = float(config['DEFAULT']['upper_acceptable_ping_bound'])
upper_ping_issue_bound = float(config['DEFAULT']['upper_ping_issue_bound'])
acceptable_network_speed = float(config['DEFAULT']['acceptable_network_speed'])

# Warum kommt jetzt hier ein Fehler ich hab nurn Scheiß-TippFehler am Ende der Date korrigiert. 
hv.extension('bokeh')

opts.defaults(
    opts.Bars(xrotation=45, tools=['hover']),
    opts.BoxWhisker(width=700, xrotation=30, box_fill_color=Palette('Category20')),
    opts.Curve(width=700, tools=['hover']),
    opts.GridSpace(shared_yaxis=True),
    opts.Scatter(width=700, height=500, color=Palette('Category20'), size=dim('growth')+5, tools=['hover'],alpha=0.5, cmap='Set1'),
    opts.NdOverlay(legend_position='left'))


# Initializes the figures path in webpage for the diagram output
if os.path.isdir("./webpage/figures") == False:
    os.mkdir("./webpage/figures")
    print("Path 'figures' created successfully")
else:
    print("Path 'figures' initialized")

def numOutlierCount(attribute):
    q25=attribute.quantile(0.25)
    q75=attribute.quantile(0.75)
    iqr=q75-q25
    AnzahlMax=attribute[attribute>q75+1.5*iqr].count()
    AnzahlMin=attribute[attribute<q25-1.5*iqr].count()
    AnzahlAusreißerAbsolut=AnzahlMax+AnzahlMin
    AnzahlAusreißerProzentual=AnzahlAusreißerAbsolut/attribute.count()
    print("Ausreißer Absolut: "+str(AnzahlAusreißerAbsolut))
    print("Anzahl Ausreißer prozentual: "+str(AnzahlAusreißerProzentual))
    return(AnzahlAusreißerAbsolut)
def inspect_outliers(df,attribute):
    q25=attribute.quantile(0.25)
    q75=attribute.quantile(0.75)
    iqr=q75-q25
    df_outliers_min=df[attribute<(q25-iqr*1.5)]
    df_outliers_max = df[attribute>(q75+iqr*1.5)]
    display(HTML("</br><h2>Min-Outliers</h2>"))
    display(HTML(df_outliers_min.to_html()))
    display(HTML("<hr></br><h2>Max-Outliers</h2>"))
    display(HTML(df_outliers_max.to_html()))
    
# Data Import
try:
    df_ping = pd.read_csv("Data/ping_test.csv",index_col=0)
    df_speed_test = pd.read_csv("Data/speed_test.csv", index_col=0)

    df_ping["date"] = pd.to_datetime(df_ping["date"],format="%d.%m.%Y %H:%M:%S")
    df_speed_test["date"] = pd.to_datetime(df_speed_test["date"], format="%d.%m.%Y %H:%M:%S")
except:
    print("Error while searching for files. Please perform network-test first.")
    sys.exit(0)

Path 'figures' initialized


# Basic Data Wrangling

In [2]:
print(df_ping.shape)
df_ping.head()


(19200, 5)


Unnamed: 0,date,min,max,avg,url
0,2019-08-29 17:29:15,4.77,5.0,4.9,www.google.com
1,2019-08-29 17:29:45,4.89,5.02,4.96,www.google.com
2,2019-08-29 17:30:15,4.68,6.6,5.29,www.google.com
3,2019-08-29 17:30:45,4.92,5.06,4.97,www.google.com
4,2019-08-29 17:31:15,4.69,5.07,4.79,www.google.com


In [3]:
print(df_speed_test.shape)
df_speed_test.head()

(127, 8)


Unnamed: 0,address,date,downstream,ping,serverState,sponsor,upstream,your_isp
0,Germany,2019-09-28 12:26:30,57025.071289,8,,LeaseWeb,9320.640625,Deutsche Telekom AG
1,Germany,2019-09-28 12:36:24,44199.220703,8,,Gemnet LLC,9027.516602,Deutsche Telekom AG
2,Germany,2019-09-28 12:50:50,56974.939453,8,,LeaseWeb,9215.407227,Deutsche Telekom AG
3,Germany,2019-09-28 12:52:17,56734.995117,7,,LeaseWeb,10075.504883,Deutsche Telekom AG
4,Germany,2019-09-28 13:53:43,57052.379883,11,,LeaseWeb,10503.844727,Deutsche Telekom AG


In [4]:
df_ping_issues = df_ping[df_ping["max"]==upper_ping_issue_bound]
print("There are {} issues in the analysis of the ping.".format(df_ping_issues.shape[0]))

df_speed_test_issues = df_speed_test[df_speed_test["ping"]==upper_ping_issue_bound]
print("There are {} issues in the analysis of the network speed.".format(df_speed_test_issues.shape[0]))
# Filter issues from eg. sockets
df_ping = df_ping[df_ping["max"]!=upper_ping_issue_bound]
df_speed_test = df_speed_test[df_speed_test["ping"]!=upper_ping_issue_bound]

There are 0 issues in the analysis of the ping.
There are 0 issues in the analysis of the network speed.


In [5]:
df_speed_test["ping"].max()

90

In [6]:
print(
    "The maximal Ping time has been {} ms.\nThe minimal ping time has been {} ms. \nThe mean ping time has been {} ms. "
    .format(df_ping["max"].max(), df_ping["min"].min(), df_ping["avg"].mean()))

The maximal Ping time has been 2000.0 ms.
The minimal ping time has been 3.54 ms. 
The mean ping time has been 5.948969270833327 ms. 


 # Ping Times in ms with extreme outlieres

In [7]:
fig_all_max_ping = hv.Curve((df_ping["date"], df_ping["max"]),
                            "Date",
                            "Ping in ms",
                            label="All messured pings")
fig_dot_over_upper_bound = hv.Scatter(
    (df_ping["date"][df_ping["max"] > upper_acceptable_ping_bound],
     df_ping["max"][df_ping["max"] > upper_acceptable_ping_bound]),
    "Date",
    "Max_Ping_Time",
    label="Highlight pings over {} ms".format(
        str(upper_acceptable_ping_bound))).opts(opts.Scatter(color="red", size=10))

fig_ping_times_with_extreme_outliers = (fig_all_max_ping *
                                        fig_dot_over_upper_bound).opts(
                                            legend_position="top_left",
                                            title="All Max. Ping Times in ms",padding=0.05)
#Safe newly generated plot
hv.save(fig_ping_times_with_extreme_outliers,
        os.path.join("webpage", "figures",
                     "fig_ping_times_with_extreme_outliers.html"),
        backend='bokeh')
fig_ping_times_with_extreme_outliers

In [8]:
inspect_outliers(df_ping,df_ping["max"])

Unnamed: 0,date,min,max,avg,url
37,2019-08-29 17:47:46,4.7,4.81,4.78,www.google.com
46,2019-08-29 17:52:16,4.76,4.83,4.8,www.google.com
122,2019-08-29 18:30:18,4.65,4.83,4.75,www.google.com
123,2019-08-29 18:30:48,4.56,4.79,4.72,www.google.com
218,2019-08-29 19:18:20,4.74,4.8,4.77,www.google.com
262,2019-08-29 19:40:21,4.69,4.77,4.73,www.google.com
306,2019-08-29 20:02:22,4.66,4.8,4.76,www.google.com
359,2019-08-29 20:28:54,4.66,4.75,4.7,www.google.com
391,2019-08-29 20:44:55,4.72,4.81,4.77,www.google.com
396,2019-08-29 20:47:25,4.64,4.78,4.72,www.google.com


Unnamed: 0,date,min,max,avg,url
2,2019-08-29 17:30:15,4.68,6.6,5.29,www.google.com
101,2019-08-29 18:19:47,4.57,10.66,6.62,www.google.com
124,2019-08-29 18:31:18,4.64,8.61,5.71,www.google.com
167,2019-08-29 18:52:49,19.91,57.01,36.2,www.google.com
230,2019-08-29 19:24:21,4.62,11.99,6.5,www.google.com
264,2019-08-29 19:41:21,4.64,7.19,5.39,www.google.com
279,2019-08-29 19:48:52,4.85,6.66,5.39,www.google.com
355,2019-08-29 20:26:54,4.94,8.49,6.96,www.google.com
358,2019-08-29 20:28:24,4.85,7.93,5.66,www.google.com
426,2019-08-30 11:59:50,26.95,28.36,27.45,www.google.com


# Ping Times in ms without extreme outlieres

In [9]:
fig_ping_without_extreme_outliers = hv.Curve(
    (df_ping["date"][df_ping["max"]<1000],
     df_ping["max"][df_ping["max"]<1000]),"Date","Ping in ms",label="All ping times less then 1000 ms")

fig_ping_highlight_max = hv.Scatter(
         (df_ping["date"][df_ping["max"] > upper_acceptable_ping_bound][df_ping["max"]<1000],
          df_ping["max"][df_ping["max"] > upper_acceptable_ping_bound][df_ping["max"]<1000]),
         "Date",
         "Max_Ping_Time",
    label = "Highlight pings over {} ms".format(str(upper_acceptable_ping_bound))
     ).opts(color="red", size=10)

fig_ping_times_without_extreme_outliers = (fig_ping_without_extreme_outliers*fig_ping_highlight_max).opts(title="All Max. Ping Times in ms without extreme outlieres",
                                                                                                         legend_position="top_left",
                                                                                                         padding = 0.05)

#Safe newly generated plot
hv.save(fig_ping_times_without_extreme_outliers, os.path.join("webpage","figures","fig_ping_times_without_extreme_outliers.html") , backend='bokeh')
fig_ping_times_without_extreme_outliers

In [17]:
# The latency bound under which network speedtest is performing is defined in the network_test.py

pingbound_network_test = df_speed_test["ping"].min()

fig_network_speed_below_pingbound = hv.Curve(
    (df_speed_test["date"], df_speed_test["downstream"]/1000),
    "Date",
    "Network Speed",
    label="Messured downlink speed when ping below {} ms".format(
        str(pingbound_network_test)))

fig_highlight_below_acceptable_network_speed = hv.Scatter(
    (df_speed_test["date"][
        df_speed_test["downstream"] < acceptable_network_speed],
     df_speed_test["downstream"][
         df_speed_test["downstream"] < acceptable_network_speed]),
    "Date",
    "Network Speed",
    label="Highlight downstream speed below {} mbit/s".format(
        # Warum denn upper_acc... das ist doch ein Ping Wert kein Mbit-Wert?
        str(upper_acceptable_ping_bound))).opts(color="red", size=10)

fig_horizontal_marker = hv.HLine(
    acceptable_network_speed,
    label="Acceptable network speed at {} mbit/s".format(
        str(acceptable_network_speed))).opts(color="black")

fig_upstream_below_ping_bound = hv.Curve(
    (df_speed_test["date"], df_speed_test["upstream"]),
    "Date",
    "Network Speed",
    label="Messured uplink when ping below {} ms".format(
        str(pingbound_network_test))).opts(color="purple")

fig_network_speeds_under_upper_bound = (
    fig_network_speed_below_pingbound *
    fig_highlight_below_acceptable_network_speed * fig_upstream_below_ping_bound* fig_horizontal_marker
).opts(
    title="Network Speed when Ping below {} ms".format(pingbound_network_test),
    legend_position="top_left",
    padding=0.05)

#Safe newly generated plot
hv.save(fig_network_speeds_under_upper_bound,
        os.path.join("webpage", "figures",
                     "fig_network_speeds_under_upper_bound.html"),
        backend='bokeh')
fig_network_speeds_under_upper_bound

In [11]:
df_ping["qcut"] = pd.qcut(df_ping["avg"],10)

In [12]:
df_ping.groupby("qcut").agg({"avg":["count"]}).reset_index()

Unnamed: 0_level_0,qcut,avg
Unnamed: 0_level_1,Unnamed: 1_level_1,count
0,"(4.699, 5.28]",1934
1,"(5.28, 5.34]",2136
2,"(5.34, 5.37]",2125
3,"(5.37, 5.4]",1861
4,"(5.4, 5.44]",1899
5,"(5.44, 5.49]",1664
6,"(5.49, 5.61]",1879
7,"(5.61, 5.77]",1873
8,"(5.77, 5.93]",2008
9,"(5.93, 537.42]",1821


In [13]:
# Test