In [1]:
from pathlib import Path

import pandas as pd

In [2]:
dates_to_visualize = [
    "2019-02",
    "2020-02",
    "2021-02",
    "2022-02",
    "2023-02",
    # "2024-02",
]

In [3]:
available_data_files = list(Path("../data/httparchive_metrics/nel_config").glob("*.parquet"))

used_data_files = list(filter(lambda file: file.stem.split("_")[-1] in dates_to_visualize, available_data_files))
used_data_files

[WindowsPath('../data/httparchive_metrics/nel_config/failure_fraction_2019-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/failure_fraction_2020-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/failure_fraction_2021-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/failure_fraction_2022-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/failure_fraction_2023-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/include_subdomains_2019-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/include_subdomains_2020-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/include_subdomains_2021-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/include_subdomains_2022-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/include_subdomains_2023-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/max_age_2019-02.parquet'),
 WindowsPath('../dat

### Failure fraction configuration over months


In [4]:
# Pick out the Failure Fraction files
ff_files = list(filter(lambda file: file.stem[:file.stem.rfind("_")] == 'failure_fraction', used_data_files))
ff_files

[WindowsPath('../data/httparchive_metrics/nel_config/failure_fraction_2019-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/failure_fraction_2020-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/failure_fraction_2021-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/failure_fraction_2022-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/failure_fraction_2023-02.parquet')]

### Aggregate results to visualize

In [5]:
from results.result_utils import concat_data_from_files

ff_data = concat_data_from_files(ff_files)
ff_data['nel_failure_fraction'] = ff_data['nel_failure_fraction'].astype('Float32')
ff_data

Unnamed: 0,date,nel_failure_fraction,domain_count,domain_percent
0,2019-02,0.00001,223,60.270270
1,2019-02,0.001,1,0.270270
2,2019-02,0.01,1,0.270270
3,2019-02,0.1,3,0.810811
4,2019-02,0.5,2,0.540541
...,...,...,...,...
14,2023-02,0.3,1,0.000051
15,2023-02,0.9,5,0.000253
16,2023-02,0.1,2,0.000101
17,2023-02,0.0001,1,0.000051


In [7]:
from results.result_utils import get_first_or_0, date_to_text_format

ff_result = pd.DataFrame({}, index=[
    '0.00',
    '0.00-0.01',
    '0.01',
    '0.01-0.05',
    '0.05',
    '0.05-0.10',
    '0.10',
    '0.10-0.25',
    '0.25',
    '0.25-0.50',
    '0.50',
    '0.50-1.00',
    '1.00',
])

for date in dates_to_visualize:
    month_ff_data = ff_data[ff_data['date'] == date]
    
    month_data_col = []
    
    # month_result['0.00']
    next_val = month_ff_data[month_ff_data['nel_failure_fraction'] == 0].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))
    
    # month_result['0.00-0.01'] 
    next_val = month_ff_data[(month_ff_data['nel_failure_fraction'] > 0) & (month_ff_data['nel_failure_fraction'] < 0.01)].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))

    # month_result['0.01']
    next_val = month_ff_data[month_ff_data['nel_failure_fraction'] == 0.01].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))
    
    # month_result['0.01-0.05']
    next_val = month_ff_data[(month_ff_data['nel_failure_fraction'] > 0.01) & (month_ff_data['nel_failure_fraction'] < 0.05)].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))
    
    # month_result['0.05'] 
    next_val = month_ff_data[month_ff_data['nel_failure_fraction'] == 0.05].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))

    # month_result['0.05-0.10'] 
    next_val = month_ff_data[(month_ff_data['nel_failure_fraction'] > 0.05) & (month_ff_data['nel_failure_fraction'] < 0.10)].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))

    # month_result['0.10']
    next_val = month_ff_data[month_ff_data['nel_failure_fraction'] == 0.10].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))
    
    # month_result['0.10-0.25']
    next_val = month_ff_data[(month_ff_data['nel_failure_fraction'] > 0.10) & (month_ff_data['nel_failure_fraction'] < 0.25)].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))
    
    # month_result['0.25']
    next_val = month_ff_data[month_ff_data['nel_failure_fraction'] == 0.25].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))

    # month_result['0.25-0.50']
    next_val = month_ff_data[(month_ff_data['nel_failure_fraction'] > 0.25) & (month_ff_data['nel_failure_fraction'] < 0.50)].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))
    
    # month_result['0.50']
    next_val = month_ff_data[month_ff_data['nel_failure_fraction'] == 0.50].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))

    # month_result['0.50-1.00'] 
    next_val = month_ff_data[(month_ff_data['nel_failure_fraction'] > 0.50) & (month_ff_data['nel_failure_fraction'] < 1.00)].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))
    
    # month_result['1.00'] 
    next_val = month_ff_data[month_ff_data['nel_failure_fraction'] == 1.00].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))
    
    ff_result[date_to_text_format(date)] = month_data_col
    
ff_result

Unnamed: 0,Feb 2019,Feb 2020,Feb 2021,Feb 2022,Feb 2023
0.00,0,1,0,0,0
0.00-0.01,224,298,365,461,3093
0.01,1,108277,87,151,149
0.01-0.05,0,0,0,1,6
0.05,0,4,1272,1325,1343
0.05-0.10,0,0,0,0,0
0.10,3,273,185,17991,20931
0.10-0.25,0,2,4,16,78
0.25,0,0,6,6,5
0.25-0.50,0,0,0,0,2
