In [4]:
from pathlib import Path

import pandas as pd

In [3]:
dates_to_visualize = [
    "2019-02",
    "2020-02",
    "2021-02",
    "2022-02",
    "2023-02",
    # "2024-02",
]

In [5]:
available_data_files = list(Path("../data/httparchive_metrics/nel_config").glob("*.parquet"))

used_data_files = list(filter(lambda file: file.stem.split("_")[-1] in dates_to_visualize, available_data_files))
used_data_files

[WindowsPath('../data/httparchive_metrics/nel_config/failure_fraction_2019-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/failure_fraction_2020-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/failure_fraction_2021-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/failure_fraction_2022-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/failure_fraction_2023-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/include_subdomains_2019-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/include_subdomains_2020-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/include_subdomains_2021-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/include_subdomains_2022-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/include_subdomains_2023-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/max_age_2019-02.parquet'),
 WindowsPath('../dat

### Failure fraction configuration over months


In [6]:
# Pick out the Failure Fraction files
ma_files = list(filter(lambda file: file.stem[:file.stem.rfind("_")] == 'max_age', used_data_files))
ma_files

[WindowsPath('../data/httparchive_metrics/nel_config/max_age_2019-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/max_age_2020-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/max_age_2021-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/max_age_2022-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_config/max_age_2023-02.parquet')]

### Aggregate results to visualize

In [20]:
from results.result_utils import concat_data_from_files

ma_data = concat_data_from_files(ma_files)
ma_data['nel_max_age'] = ma_data['nel_max_age'].astype('Float32')

ma_data

Unnamed: 0,date,nel_max_age,domain_count,domain_percent
0,2019-02,0.0,1,0.270270
1,2019-02,300.0,2,0.540541
2,2019-02,360.0,1,0.270270
3,2019-02,3600.0,228,61.621622
4,2019-02,86400.0,5,1.351351
...,...,...,...,...
30,2023-02,10886400.0,24,0.001214
31,2023-02,31536000.0,838,0.042379
32,2023-02,31556952.0,4,0.000202
33,2023-02,31557600.0,3,0.000152


In [23]:
from results.result_utils import get_first_or_0, date_to_text_format

ma_result = pd.DataFrame({}, index=[
    '0',
    '0-1h',
    '1h',
    '1h-1d',
    '1d',
    '1d-7d',
    '7d',
    '7d-30d',
    '30d',
    '30d-365d',
    '365d',
    '365d-730d',
    '730d',
])

for date in dates_to_visualize:
    month_ma_data = ma_data[ma_data['date'] == date].copy()
    
    month_data_col = []
    
    # month_result['0.00']
    next_val = month_ma_data[month_ma_data['nel_max_age'] == 0].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))
    
    # month_result['0.00-0.01'] 
    next_val = month_ma_data[(month_ma_data['nel_max_age'] > 0) & (month_ma_data['nel_max_age'] < 3600)].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))

    # month_result['0.01']
    next_val = month_ma_data[month_ma_data['nel_max_age'] == 3600].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))
    
    # month_result['0.01-0.05']
    next_val = month_ma_data[(month_ma_data['nel_max_age'] > 3600) & (month_ma_data['nel_max_age'] < 86400)].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))
    
    # month_result['0.05'] 
    next_val = month_ma_data[month_ma_data['nel_max_age'] == 86400].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))

    # month_result['0.05-0.10'] 
    next_val = month_ma_data[(month_ma_data['nel_max_age'] > 86400) & (month_ma_data['nel_max_age'] < 604800)].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))

    # month_result['0.10']
    next_val = month_ma_data[month_ma_data['nel_max_age'] == 604800].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))
    
    # month_result['0.10-0.25']
    next_val = month_ma_data[(month_ma_data['nel_max_age'] > 604800) & (month_ma_data['nel_max_age'] < 2592000)].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))
    
    # month_result['0.25']
    next_val = month_ma_data[month_ma_data['nel_max_age'] == 2592000].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))

    # month_result['0.25-0.50']
    next_val = month_ma_data[(month_ma_data['nel_max_age'] > 2592000) & (month_ma_data['nel_max_age'] < 31536000)].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))
    
    # month_result['0.50']
    next_val = month_ma_data[month_ma_data['nel_max_age'] == 31536000].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))

    # month_result['0.50-1.00'] 
    next_val = month_ma_data[(month_ma_data['nel_max_age'] > 31536000) & (month_ma_data['nel_max_age'] < 63072000)].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))
    
    # month_result['1.00'] 
    next_val = month_ma_data[month_ma_data['nel_max_age'] == 63072000].groupby(['date'])['domain_count'].sum()
    month_data_col.append(get_first_or_0(next_val))
    
    ma_result[date_to_text_format(date)] = month_data_col
    
ma_result

Unnamed: 0,Feb 2019,Feb 2020,Feb 2021,Feb 2022,Feb 2023
0,1,10,9,6,6
0-1h,3,45,229,348,3112
1h,228,302,1575,2107,3221
1h-1d,0,333,0,106,6463
1d,5,52,1524,1567,2510
1d-7d,1,6,9,27,78
7d,9,74,781595,949224,1932957
7d-30d,2,105,119,138,138
30d,30,108417,222443,18076,28002
30d-365d,4,10,118,21,29
