In [7]:
from pathlib import Path

import pandas as pd

In [8]:
pd.options.display.float_format = '{:.2f}'.format

In [9]:
dates_to_visualize = [
    "2019-02",
    "2020-02",
    "2021-02",
    "2022-02",
    "2023-02",
    # "2024-02",
]

In [10]:
available_data_files = list(Path("../data/httparchive_metrics/nel_collector_provider_usage").glob("*.parquet"))

used_data_files = list(filter(lambda file: file.stem in dates_to_visualize, available_data_files))
used_data_files

[WindowsPath('../data/httparchive_metrics/nel_collector_provider_usage/2019-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_collector_provider_usage/2020-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_collector_provider_usage/2021-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_collector_provider_usage/2022-02.parquet'),
 WindowsPath('../data/httparchive_metrics/nel_collector_provider_usage/2023-02.parquet')]

### Aggregate result to visualize


In [14]:
from results.result_utils import date_to_text_format, concat_data_from_files

result = concat_data_from_files(used_data_files)

result['date_formatted'] = result['date'].map(date_to_text_format)
result

Unnamed: 0,date,providers,as_primary,share_as_primary,as_secondary,share_as_secondary,among_fallback,date_formatted
0,2019-02,report-uri.com,318.00,85.95,0.00,0.00,0.00,Feb 2019
1,2019-02,3gl.net,21.00,5.68,0.00,0.00,0.00,Feb 2019
2,2019-02,uriports.com,9.00,2.43,0.00,0.00,0.00,Feb 2019
3,2019-02,gvt2.com,5.00,1.35,5.00,100.00,8.00,Feb 2019
4,2019-02,seloc.club,5.00,1.35,0.00,0.00,0.00,Feb 2019
...,...,...,...,...,...,...,...,...
148,2023-02,loaney.in,1.00,0.00,0.00,0.00,0.00,Feb 2023
149,2023-02,loaney.es,1.00,0.00,0.00,0.00,0.00,Feb 2023
150,2023-02,skgeodesy.sk,1.00,0.00,0.00,0.00,0.00,Feb 2023
151,2023-02,ijs.si,0.00,0.00,3.00,0.70,0.00,Feb 2023


### Top 5 Providers per month


In [15]:
result[result.index < 5][['date_formatted', 'providers', 'as_primary', 'share_as_primary']]

Unnamed: 0,date_formatted,providers,as_primary,share_as_primary
0,Feb 2019,report-uri.com,318.0,85.95
1,Feb 2019,3gl.net,21.0,5.68
2,Feb 2019,uriports.com,9.0,2.43
3,Feb 2019,gvt2.com,5.0,1.35
4,Feb 2019,seloc.club,5.0,1.35
0,Feb 2020,shopifycloud.com,108255.0,98.77
1,Feb 2020,report-uri.com,623.0,0.57
2,Feb 2020,powerboutique.net,254.0,0.23
3,Feb 2020,linkedin.com,102.0,0.09
4,Feb 2020,fastly-insights.com,79.0,0.07


### NEL Collector Providers employed by N domains


In [16]:
from results.result_utils import get_first_or_0

employment_result = pd.DataFrame({}, index=[
    '1',
    '2',
    '3-10',
    '11-100',
    '101-1K',
    'More',
])

for date in dates_to_visualize:
    month_collector_data = result[result['date'] == date].copy()
    
    month_data_col = []
    
    # month_result['1']
    next_val = month_collector_data[month_collector_data['as_primary'] == 1].count()
    month_data_col.append(get_first_or_0(next_val))
    
    # month_result['2'] 
    next_val = month_collector_data[month_collector_data['as_primary'] == 2].count()
    month_data_col.append(get_first_or_0(next_val))

    # month_result['3-10']
    next_val = month_collector_data[(month_collector_data['as_primary'] >= 3) & (month_collector_data['as_primary'] >= 10)].count()
    month_data_col.append(get_first_or_0(next_val))
    
    # month_result['3-10']
    next_val = month_collector_data[(month_collector_data['as_primary'] >= 11) & (month_collector_data['as_primary'] >= 100)].count()
    month_data_col.append(get_first_or_0(next_val))
    
    # month_result['3-10']
    next_val = month_collector_data[(month_collector_data['as_primary'] >= 101) & (month_collector_data['as_primary'] >= 1000)].count()
    month_data_col.append(get_first_or_0(next_val))
    
    # month_result['3-10']
    next_val = month_collector_data[month_collector_data['as_primary'] >= 1001].count()
    month_data_col.append(get_first_or_0(next_val))
    
    
    employment_result[date_to_text_format(date)] = month_data_col
    
employment_result

Unnamed: 0,Feb 2019,Feb 2020,Feb 2021,Feb 2022,Feb 2023
1,3,19,52,63,81
2,2,4,10,20,14
3-10,2,12,19,29,31
11-100,1,4,9,12,18
101-1K,0,1,4,3,9
More,0,1,4,3,9
