### Read data from influxdb

In [None]:
from data_exploration import *

In [None]:
client, client_df = connect_to_influxdb()

In [None]:
df = get_all_data(client_df)

### Calculate summary statistics by time period by device

In [None]:
def show_data(ev):
    clear_output(wait=True)
    display(Box(children = [device_name1,measurement_type1,time_period1,show_button1]))
    
    subset = df.copy()
    if time_period2.value=="1month":
        subset = get_offset_date(subset, 1)
    elif time_period2.value=="6months":
        subset = get_offset_date(subset, 6)
    
    fig = go.Figure()
    for test_type  in ["iperf","speedtest"]:
        df1 = subset[(subset["test_type"]==test_type) & (subset["SK_PI"] == device_name1.value) & (subset["MES_TYPE"] == measurement_type1.value)]
        stat=pd.DataFrame()
        if not df1.empty:
            fig.add_trace(go.Histogram(x=df1["result"],histnorm='percent',name=measurement_type1.value+" " + test_type,
                                       marker_color=colors_iperf_speedtest[test_type],xbins=dict(size=10)))
            #fig.add_trace(go.Box(x=df1["result"],name=measurement_type1.value+" " + test_type,marker_color=colors_iperf_speedtest[test_type]))
            fig.update_layout(xaxis_title_text='Mbps')
            if measurement_type1.value in ["UPLOAD","DOWNLOAD"]:
                treshold = limits[measurement_type1.value]
                fig.add_shape(go.layout.Shape(type="line",x0=treshold,y0=0,x1=treshold,y1=100,name=str(treshold)+"Mbps",
                                              line=dict(color="red",dash="dot")))
                stat = summary_stat(df1,treshold)
                stat["t-test"] = "N/A"
                devices = stat[(stat["size"]>test_size)]["SK_PI"]
                for device in devices:
                    stat.loc[stat["SK_PI"]==device,"t-test"] = get_ttest_device(df1["result"],treshold)
            else:
                stat = summary_stat(df1)
                fig.update_layout(xaxis_title_text='Miliseconds')  
        if not stat.empty:
            print(test_type +" summary:")
            display(stat) 
    
    fig.update_layout(barmode='overlay')
    fig.update_traces(opacity=0.5)
    fig.update_layout(title_text='Data distribution - '+ measurement_type1.value, yaxis_title_text='Percentage of tests')   
    fig.show()

In [None]:
device_name1 = widgets.Dropdown(options = df['SK_PI'].sort_values().unique(), description ='Device number: ',style = {'description_width': 'initial'}, disabled=False)
measurement_type1 =  widgets.Dropdown(options = df['MES_TYPE'].unique(), description ='Measurement type: ',style = {'description_width': 'initial'}, disabled=False)
time_period1 = widgets.Dropdown(options = ['1month','6months','all'], description ='Time period: ',style = {'description_width': 'initial'}, disabled=False)

show_button1 = widgets.Button(button_style= 'info', description="Show Data")
show_button1.on_click(show_data)

display(Box(children = [device_name1,measurement_type1,time_period1,show_button1]))

### Calculate summary statistics by time period for all devices ad draw a map

In [None]:
## helper function
def show_map(ev):
    clear_output(wait=True)
    display(Box(children = [measurement_type2,test_type2,color_by2,time_period2,show_button2]))
    
    subset = df.copy()
    if time_period2.value=="1month":
        subset = get_offset_date(subset, 1)
    elif time_period2.value=="6months":
        subset = get_offset_date(subset, 6)
        
    subset = subset[(subset["test_type"]==test_type2.value) & (subset["MES_TYPE"] == measurement_type2.value)]
    
    if measurement_type2.value in ["UPLOAD","DOWNLOAD"]:
        treshold = limits[measurement_type2.value]               
        stat = summary_stat(subset,treshold)
        stat["t-test"] = "N/A"
        devices = stat[(stat["size"]>test_size)]["SK_PI"]
        for device in devices:
            subset_device = subset[subset["SK_PI"]==device]
            stat.loc[stat["SK_PI"]==device,"t-test"] = get_ttest_device(subset_device["result"],treshold)
    else:
        stat = summary_stat(subset)
    display_stat = stat.copy()
    display_stat.rename(columns={'SK_PI': 'device_number', 'size': 'number_of_tests','MES_TYPE': 'measurement'}, inplace=True)
    display_stat["test_type"] = test_type2.value
    #del display_stat['MES_TYPE']
    display(display_stat) 
    
    coordinates_df = pd.DataFrame(columns=["device_number","name","lon","lat"])
    if path.exists(coordinates_path):
        coordinates_df =  pd.read_csv(coordinates_path)
    stat1 = pd.merge(stat, coordinates_df,left_on="SK_PI", right_on="device_number",how="inner")
    fig = get_fig_map(stat1,color_by2.value,'Devices colored by '+color_by2.value+' '+ measurement_type2.value)
    iplot( fig, validate=False)

In [None]:
measurement_type2 =  widgets.Dropdown(options = df['MES_TYPE'].unique(), description ='Measurement type: ',style = {'description_width': 'initial'}, disabled=False)
test_type2 = widgets.Dropdown(options = df['test_type'].unique(), description ='Test type: ',style = {'description_width': 'initial'}, disabled=False)
color_by2 = widgets.Dropdown(options = ["mean","median","std","min","max"], description ='Color by: ',style = {'description_width': 'initial'}, disabled=False)
time_period2 = widgets.Dropdown(options = ['1month','6months','all'], description ='Time period: ',style = {'description_width': 'initial'}, disabled=False)

show_button2 = widgets.Button(button_style= 'info', description="Show Data")
show_button2.on_click(show_map)

display(Box(children = [measurement_type2,test_type2,color_by2,time_period2,show_button2]))