In [1]:
import os
import csv

outputs_dir = '../speedtest/outputs'

speedtest_data = {}
for f in os.listdir(outputs_dir):
    with open('{}/{}'.format(outputs_dir, f), 'r') as csvfile:
        reader = csv.reader(csvfile)
        headers = next(reader, None)
        append_headers = False
        for h in headers:
            if not h in speedtest_data:
                append_headers = True
                break
        if append_headers:
            max_row = 0
            keys = speedtest_data.keys()
            if not len(keys) is 0:
                max_row = len(speedtest_data[next(iter(keys))])
            for h in headers:
                if not h in speedtest_data:
                    speedtest_data[h] = ['' for i in range(max_row)]
        for row in reader:
            for k in [key for key in speedtest_data if key not in headers]:
                speedtest_data[k].append('')
            for h, v in zip(headers, row):
                speedtest_data[h].append(v)       

In [2]:
for i in speedtest_data:
    print((i, speedtest_data[i][:5]))

In [3]:
import numpy as np
import pandas as pd
from dateutil.parser import parse as dateparser
from datetime import timedelta

names = list(speedtest_data.keys())
values = np.array([speedtest_data[key] for key in names]).transpose()

df = pd.DataFrame(values, columns=names, index=range(len(values)))

df['download_bits'] = df['download_bits'].apply(float)
df['upload_bits'] = df['upload_bits'].apply(float)
df['latency_ms'] = df['latency_ms'].apply(float)

def add_date_info_to_df(df):
    convert_to_seconds = lambda t: int(timedelta(hours=t.hour, minutes=t.minute, seconds=t.second).total_seconds())
    df['datetime'] = df['time'].apply(dateparser)
    df['datetime_by_hour'] = df['datetime'].apply(lambda t: t.replace(minute=0, second=0, microsecond=0))
    df['date'] = df['datetime'].apply(lambda t: t.date())
    df['time'] = df['datetime'].apply(lambda t: t.time())

add_date_info_to_df(df)

In [4]:
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

In [5]:
df_max_download_bits_by_hour = df[['datetime_by_hour']].copy()
max_download_bits_by_hour = df.groupby(['datetime_by_hour'])['download_bits'].max()
df_max_download_bits_by_hour.set_index(['datetime_by_hour'], inplace=True)
df_max_download_bits_by_hour['download_bits'] = max_download_bits_by_hour
df_max_download_bits_by_hour.reset_index(inplace=True)

df_max_upload_bits_by_hour = df[['datetime_by_hour']].copy()
max_upload_bits_by_hour = df.groupby(['datetime_by_hour'])['upload_bits'].max()
df_max_upload_bits_by_hour.set_index(['datetime_by_hour'], inplace=True)
df_max_upload_bits_by_hour['upload_bits'] = max_upload_bits_by_hour
df_max_upload_bits_by_hour.reset_index(inplace=True)

df_min_latency_ms_by_hour = df[['datetime_by_hour']].copy()
min_latency_ms_by_hour = df.groupby(['datetime_by_hour'])['latency_ms'].min()
df_min_latency_ms_by_hour.set_index(['datetime_by_hour'], inplace=True)
df_min_latency_ms_by_hour['latency_ms'] = min_latency_ms_by_hour
df_min_latency_ms_by_hour.reset_index(inplace=True)

In [10]:
import math

def produce_plot_with_datetime_by_hour(df, y_axis, days, y_axis_label=None, y_multiplier=1):
    df_ = df.copy()
    df_['Date / Time'] = df_['datetime_by_hour'].apply(lambda d: d.timestamp())
    df_[y_axis_label] = df_[y_axis]

    fig, ax = plt.subplots(figsize=(16,9))
    df_.plot(x='Date / Time', y=y_axis_label, kind='scatter', ax=ax)

    midnight_today = math.ceil(datetime.now().replace(hour=0, minute=0, second=0).timestamp())
    difference = days * 24 * 60 * 60
    one_day = 24 * 60 * 60
    ax.set_xticks([s for s in range(midnight_today - difference, math.ceil(datetime.now().timestamp()), one_day)])
    ax.set_xticklabels([datetime.fromtimestamp(s).strftime("%Y-%m-%d") for s in ax.get_xticks()])
    ax.set_yticklabels([ts * y_multiplier for ts in ax.get_yticks()])
    plt.show()

In [11]:
# DOWNLOAD SPEED OVER TIME

produce_plot_with_datetime_by_hour(
    df_max_download_bits_by_hour,
    'download_bits',
    3,
    y_axis_label='Download Speed (Mb/s)',
    y_multiplier=(1 / 1e6)
)

In [12]:
# UPLOAD SPEED OVER TIME

produce_plot_with_datetime_by_hour(
    df_max_upload_bits_by_hour,
    'upload_bits',
    3,
    y_axis_label='Upload Speed (Mb/s)',
    y_multiplier=(1 / 1e6)
)

In [13]:
# LATENCY OVER TIME

produce_plot_with_datetime_by_hour(
    df_min_latency_ms_by_hour,
    'latency_ms',
    3,
    y_axis_label='Latency (ms)',
    y_multiplier=(1)
)