## List of all 

### Testing with one country

In [1]:
import requests
import pandas as pd
resp = requests.get('https://api.ooni.io/api/v1/aggregation'
             f'?probe_cc=MM&test_name=web_connectivity&since=2023-07-01&until=2024-07-01&axis_x=measurement_start_day&axis_y=category_code')
j = resp.json()
df = pd.json_normalize(j, record_path='result')

In [2]:
print(df.head())

   anomaly_count category_code  confirmed_count  failure_count  \
0              2          ALDR                0              0   
1             43          ANON                0             12   
2              0          COMT                0              0   
3              0          DATE                0              0   
4              0           GMB                0              1   

   measurement_count measurement_start_day  ok_count  
0                  6            2023-07-01         4  
1                212            2023-07-01       157  
2                  5            2023-07-01         5  
3                  5            2023-07-01         5  
4                 13            2023-07-01        12  


### List of all domains of all countries

In [3]:
import requests
import pandas as pd
# Burma, Cambodia, Hong Kong (China), India, Indonesia, Malaysia, Philippines, Thailand, Vietnam
cc_list = [
    'MM',
    'KH',
    'HK',
    'IN',
    'ID',
    'MY',
    'PH',
    'TH',
    'VN'
]
def get_webconnectivity(probe_cc):
    resp = requests.get('https://api.ooni.io/api/v1/aggregation'
             f'?probe_cc={probe_cc}&test_name=web_connectivity&since=2023-07-01&until=2024-07-01&axis_x=measurement_start_day&axis_y=category_code')
    j = resp.json()
    df = pd.json_normalize(j, record_path='result')
    df['probe_cc'] = probe_cc
    return df

# Note: as_index=False is to combine the first 2 headers when doing agg

df_list = []
for cc in cc_list:
    df_list.append(pd.DataFrame(get_webconnectivity(cc)))
    
df_all = pd.concat(df_list)

In [4]:
df_all.to_csv('webconnectivity_2024_04_23.csv')

In [5]:
print(df_all.head())

   anomaly_count category_code  confirmed_count  failure_count  \
0              2          ALDR                0              0   
1             43          ANON                0             12   
2              0          COMT                0              0   
3              0          DATE                0              0   
4              0           GMB                0              1   

   measurement_count measurement_start_day  ok_count probe_cc  
0                  6            2023-07-01         4       MM  
1                212            2023-07-01       157       MM  
2                  5            2023-07-01         5       MM  
3                  5            2023-07-01         5       MM  
4                 13            2023-07-01        12       MM  


### aggregation

In [1]:
import requests
import pandas as pd
resp = requests.get('https://api.ooni.io/api/v1/aggregation'
             f'?probe_cc=KH&test_name=web_connectivity&since=2023-07-01&until=2024-07-01&axis_x=measurement_start_day&axis_y=domain')
j = resp.json()
df_date_domain = pd.json_normalize(j, record_path='result')

In [3]:
len(df_date_domain['domain'].unique())

1940

In [4]:
import requests
import pandas as pd
resp = requests.get('https://api.ooni.io/api/v1/aggregation'
             f'?probe_cc=KH&test_name=web_connectivity&since=2023-07-01&until=2024-07-01&axis_y=domain')
j = resp.json()
df_domain = pd.json_normalize(j, record_path='result')

In [5]:
len(df_domain['domain'].unique())

1940

In [6]:
df_domain.to_csv('2024_kh_domains.csv')

In [7]:
df_date_domain['measurement_start_day'] = pd.to_datetime(df_date_domain['measurement_start_day'])


In [8]:
df_date_domain['quarter'] = df_date_domain['measurement_start_day'].dt.to_period('Q')

In [9]:
print(df_date_domain.head())

   anomaly_count  confirmed_count              domain  failure_count  \
0              0                0  4genderjustice.org              0   
1              0                0        76crimes.com              0   
2              0                0           7day.news              0   
3              0                0            9gag.com              0   
4              0                0          abc.go.com              0   

   measurement_count measurement_start_day  ok_count quarter  
0                  1            2023-07-01         1  2023Q3  
1                  1            2023-07-01         1  2023Q3  
2                  1            2023-07-01         1  2023Q3  
3                  1            2023-07-01         1  2023Q3  
4                  1            2023-07-01         1  2023Q3  


In [10]:
df_domain_summary = df_date_domain.groupby(['quarter'], as_index=False).agg(
        domain_count = ('domain', 'nunique')
        )

In [11]:
print(df_domain_summary.head())

  quarter  domain_count
0  2023Q3          2447
1  2023Q4          2468
2  2024Q1          2476
3  2024Q2          2501


In [12]:
import requests
import pandas as pd
resp = requests.get('https://api.ooni.io/api/v1/aggregation'
             f'?probe_cc=MM&test_name=web_connectivity&since=2023-07-01&until=2024-07-01&axis_x=measurement_start_day&axis_y=probe_asn')
j = resp.json()
df_date_asn = pd.json_normalize(j, record_path='result')
df_date_asn['measurement_start_day'] = pd.to_datetime(df_date_asn['measurement_start_day'])
df_date_asn['quarter'] = df_date_asn['measurement_start_day'].dt.to_period('Q')
df_date_asn['probe_cc'] = 'MM'

In [13]:
df_asn_summary = df_date_asn.groupby(['quarter','probe_cc'], as_index=False).agg(
        asn_count = ('probe_asn', 'nunique')
        )

In [14]:
print(df_asn_summary.head())

  quarter probe_cc  asn_count
0  2023Q3       MM         24
1  2023Q4       MM         30
2  2024Q1       MM         23
3  2024Q2       MM         34


In [15]:
df_quarter = df_asn_summary.merge(df_domain_summary, on='quarter', how='inner')


In [16]:
print(df_quarter.head())

  quarter probe_cc  asn_count  domain_count
0  2023Q3       MM         24          2447
1  2023Q4       MM         30          2468
2  2024Q1       MM         23          2476
3  2024Q2       MM         34          2501


In [17]:
import requests
import pandas as pd
# Burma, Cambodia, Hong Kong (China), India, Indonesia, Malaysia, Philippines, Thailand, Vietnam, Timor Leste
cc_list = [
    'MM',
    'KH',
    'HK',
    'IN',
    'ID',
    'MY',
    'PH',
    'TH',
    'VN'
]
def get_webconnectivity(probe_cc):
    resp_asn = requests.get('https://api.ooni.io/api/v1/aggregation'
                     f'?probe_cc={probe_cc}&test_name=web_connectivity&since=2023-07-01&until=2024-07-01&axis_x=measurement_start_day&axis_y=probe_asn')
    j = resp_asn.json()
                            
    df_date_asn = pd.json_normalize(j, record_path='result')
    df_date_asn['measurement_start_day'] = pd.to_datetime(df_date_asn['measurement_start_day'])
    df_date_asn['quarter'] = df_date_asn['measurement_start_day'].dt.to_period('Q')
    df_date_asn['probe_cc'] = probe_cc
    
    df_asn_summary = df_date_asn.groupby(['quarter','probe_cc'], as_index=False).agg(
        asn_count = ('probe_asn', 'nunique'))    
                            
    resp_domain = requests.get('https://api.ooni.io/api/v1/aggregation'
                        f'?probe_cc={probe_cc}&test_name=web_connectivity&since=2023-07-01&until=2024-07-01&axis_x=measurement_start_day&axis_y=domain')
    k = resp_domain.json()
                              
    df_date_domain = pd.json_normalize(k, record_path='result')
    df_date_domain['measurement_start_day'] = pd.to_datetime(df_date_domain['measurement_start_day'])
    df_date_domain['quarter'] = df_date_domain['measurement_start_day'].dt.to_period('Q')
    df_date_domain['probe_cc'] = probe_cc
                              
    df_domain_summary = df_date_domain.groupby(['quarter'], as_index=False).agg(
        domain_count = ('domain', 'nunique'),
        ok_count = ('ok_count', 'sum'),
        anomaly_count = ('anomaly_count', 'sum'),
        confirmed_count = ('confirmed_count', 'sum'),
        failure_count = ('failure_count', 'sum'),
        measurement_count = ('measurement_count', 'sum')
    )                     
    
    df_quarter = df_asn_summary.merge(df_domain_summary, on='quarter', how='inner')

    return df_quarter

# Note: as_index=False is to combine the first 2 headers when doing agg

df_list = []
for cc in cc_list:
    df_list.append(pd.DataFrame(get_webconnectivity(cc)))
    
df_all_2 = pd.concat(df_list)

In [18]:
print(df_all_2.head())

  quarter probe_cc  asn_count  domain_count  ok_count  anomaly_count  \
0  2023Q3       MM         24          2447    160413          19496   
1  2023Q4       MM         30          2468    126593          12464   
2  2024Q1       MM         23          2476    163588          36490   
3  2024Q2       MM         34          2501    130490          35709   
0  2023Q3       KH         25          1809    650022          21921   

   confirmed_count  failure_count  measurement_count  
0             1499           9790             191198  
1             1130           5370             145557  
2              336           9274             209688  
3              230           6243             172672  
0             1941          16741             690625  


In [19]:
df_all_2.to_csv('webconnectivity_2024-04-23_2.csv')

In [20]:
#year
import requests
import pandas as pd
# Burma, Cambodia, Hong Kong (China), India, Indonesia, Malaysia, Philippines, Thailand, Vietnam
cc_list = [
    'MM',
    'KH',
    'HK',
    'IN',
    'ID',
    'MY',
    'PH',
    'TH',
    'VN'
]
def get_webconnectivity(probe_cc):
    resp_asn = requests.get('https://api.ooni.io/api/v1/aggregation'
                     f'?probe_cc={probe_cc}&test_name=web_connectivity&since=2023-07-01&until=2024-07-01&axis_x=domain&axis_y=probe_asn')
    j = resp_asn.json()
                            
    df = pd.json_normalize(j, record_path='result')
    df['probe_cc'] = probe_cc
    df_summary = df.groupby(['probe_cc'], as_index=False).agg(
        domain_count = ('domain', 'nunique'),
        asn_count = ('probe_asn', 'nunique'),
        ok_count = ('ok_count', 'sum'),
        anomaly_count = ('anomaly_count', 'sum'),
        confirmed_count = ('confirmed_count', 'sum'),
        failure_count = ('failure_count', 'sum'),
        measurement_count = ('measurement_count', 'sum')
    )

    return df_summary

# Note: as_index=False is to combine the first 2 headers when doing agg

df_list = []
for cc in cc_list:
    df_list.append(pd.DataFrame(get_webconnectivity(cc)))
    
df_summary_year = pd.concat(df_list)

In [21]:
df_summary_year

Unnamed: 0,probe_cc,domain_count,asn_count,ok_count,anomaly_count,confirmed_count,failure_count,measurement_count
0,MM,2610,49,581084,104159,3195,30677,719115
0,KH,1940,31,2290548,85645,6489,57487,2440169
0,HK,2558,81,1711583,50097,156,57002,1818838
0,IN,12827,326,6682469,561119,220140,262161,7725889
0,ID,2524,156,2801780,162058,218891,114496,3297225
0,MY,2384,33,6206747,213388,172840,181849,6774824
0,PH,2056,41,2196872,66627,1202,62043,2326744
0,TH,2350,43,3459947,119007,10676,138295,3727925
0,VN,2296,23,3817362,282731,1799,122524,4224416


In [22]:
df_summary_year.to_csv('ooni_wc_summary_total_2024_Apr.csv')