In [1]:
import json
import pandas as pd
import seaborn as sb
import numpy as np

In [2]:
def load_json_data(file_path):
    """Loads JSON data from a file."""
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

In [3]:
def flatten_data(data):
    flattened_data = []
    submetrics_to_disregard = ['domloading', 'navigationStart']

    for website, metrics in data['sites'].items():
        for metric, value in metrics.items():
            if metric == 'navigationTiming':
                for sub_metric, sub_value in value.items():
                    if sub_metric not in submetrics_to_disregard and sub_value is not None:
                        flattened_data.append({
                            'website': website,
                            'metric': f'navigationTiming_{sub_metric}',
                            'value': sub_value
                        })
            elif metric != 'dnsLookupTimes':
                flattened_data.append({
                    'website': website,
                    'metric': metric,
                    'value': value
                })
    
    return flattened_data

In [4]:
def calculate_avg_and_mean(data, w_or_wo_ublock):
    flattened_data = flatten_data(data)
    df = pd.DataFrame(flattened_data)
    summary_stats = df.groupby('metric')['value'].agg(['mean', 'median'])
    #summary_stats = summary_stats.reset_index()

    print(summary_stats)
    summary_stats.to_csv(f'stats_{w_or_wo_ublock}.csv')

In [5]:
data_w_ublock = load_json_data('../json_files/5000_with_ublock.json')
data_wo_ublock = load_json_data('../json_files/filtered_data_no_ublock.json')

calculate_avg_and_mean(data_w_ublock, 'w_ublock')
calculate_avg_and_mean(data_wo_ublock, 'wo_ublock')



                                                     mean      median
metric                                                               
estimatedTBT                                 14006.630941   8640.1728
fcp                                          12342.971946   8533.5040
navigationTiming_connectEnd                   7034.457333   4516.7570
navigationTiming_connectStart                 4282.386004   2400.0480
navigationTiming_domComplete                 23336.077890  14400.2880
navigationTiming_domContentLoadedEventEnd    14159.040293   9316.8530
navigationTiming_domContentLoadedEventStart  14134.695349   9300.1860
navigationTiming_domInteractive              12909.753621   8633.5060
navigationTiming_domainLookupEnd              4243.681747   2366.7140
navigationTiming_domainLookupStart            4243.681747   2366.7140
navigationTiming_fetchStart                   4243.681747   2366.7140
navigationTiming_loadEventEnd                23344.384902  14400.2880
navigationTiming_loa