# This is a downloader tool to shortcut downloads of radar and ooni data set

In [7]:
import requests
import json
import pandas as pd
from pathlib import Path

Below is a list of countries that are not covered by OONI dataset

In [143]:
ex_cc_list = ['VG', 'KN', 'ER', 'PW', 'TC', 'FM', 'SX', 'PN', 'DM', 'BL', 'GS', 'MS', 'AI', 'VU', 'TK', 'CX', 'GI', 'NF', 'WF', 'AQ', 'NR'
              'AX', 'SH', 'KP', 'EH', 'GW', 'UM', 'VA', 'PM', 'BV', 'GQ', 'MH', 'WS', 'ZA', 'SM', 'AS', 'FK', 'TF', 'NU', 'KI', 'VI', 'IO',
               'SJ', 'BM', 'HM', 'TO', 'TV', 'MF','CK', 'MP','NR', 'AX']

Use standard country list formated as alpha 2 and 3

In [144]:
cc_filepath = "/Users/coes3/phd-labs/fraganal/data_source/cc_alpha2_3.csv"
cc_df = pd.read_csv(cc_filepath)
t_cc = cc_df['alpha-2'].to_list()

Remove ex_cc_list from the standard country list and use the result for later queries and remove(pop) the 'nan' entry as a result of difference operation

In [145]:
cc = list(set(t_cc).difference(ex_cc_list))
cc.pop(0)

nan

## Function to download protocol version distribution dataset from Cloudflare Radar, specify protocol version(ip_version or tls_version) and country for result

In [22]:
def download_radar(proto, cc):
    url = f"https://api.cloudflare.com/client/v4/radar/http/timeseries/{proto}?dateStart=2022-01-01&dateEnd=2022-12-31&location={cc}&format=csv"
    payload = {}
    headers = {
      'X-Auth-Email': 'edsland@gmail.com',
      'X-Auth-Key': '9e244d85618b64e8e9024105880368e0c3f9c',
      'Content-Type': 'application/json'
    }
    
    try:
        response = requests.request("GET", url, headers=headers, data=payload)
        response.raise_for_status()
        filename = f'{proto}_2022_{cc}.csv'
        filepath = f'./data_source/radar/{proto}/'
        with open(f"{filepath}{filename}", "w") as f:
            f.write(response.text)
            f.close()
           
    except requests.exceptions.HTTPError as errh:
        print ("Http Error:",errh)


Test function with below example

In [2]:
proto = 'tls_version'
#ip_version
cc = 'BR'

download_radar(proto, cc)

Download ip_version data set for all countries in cc list

In [35]:
proto = 'ip_version'

for i in cc:
    download_radar(proto, i)

## Function to download OONI Web Connectivity dataset from OONI, you can specify a number of parameters including country, category_code, time grain, etc. More info here:
API DOC: https://api.ooni.io/apidocs/

In [146]:
def download_ooni_wc(cc):
    url = f"https://api.ooni.io/api/v1/aggregation?probe_cc={cc}&test_name=web_connectivity&since=2022-01-01&until=2022-12-31&time_grain=day&axis_x=category_code&axis_y=measurement_start_day&format=CSV&download=true"
    payload = {}
    headers = {}
    
    try:
        response = requests.request("GET", url, headers=headers, data=payload)
        response.raise_for_status()
        filename = f'ooni_agg_wc_2022_{cc}.csv'
        filepath = f'./data_source/ooni/wc/'
        with open(f"{filepath}{filename}", "w") as f:
            f.write(response.text)
            f.close()
           
    except requests.exceptions.HTTPError as errh:
        print ("Http Error:",errh)


Test function with below example

In [147]:
download_ooni_wc('UG')

Download webconnectivity dataset for all countries in cc list

In [None]:
for i in cc:
    download_ooni_wc(i)

## Function to download OONI Circumvention Tool availability dataset from OONI, you can specify a number of parameters including country, test names, time grain, etc. More info here:
API DOC: https://api.ooni.io/apidocs/

In [2]:
def download_ooni_cir(cc):
    url = f"https://api.ooni.io/api/v1/aggregation?probe_cc={cc}&test_name=torsf,tor,stunreachability,psiphon,riseupvpn&since=2022-01-01&until=2022-12-31&time_grain=day&axis_x=test_name&axis_y=measurement_start_day&format=CSV&download=true"
    payload = {}
    headers = {}
    
    try:
        response = requests.request("GET", url, headers=headers, data=payload)
        response.raise_for_status()
        filename = f'ooni_agg_cir_2022_{cc}.csv'
        filepath = f'./data_source/ooni/cir/'
        with open(f"{filepath}{filename}", "w") as f:
            f.write(response.text)
            f.close()
           
    except requests.exceptions.HTTPError as errh:
        print ("Http Error:",errh)

Test function with below example

In [20]:
download_ooni_cir('SD')

Get country list from wc run and use this as cc_list for circumvention tool run, not neccessary if cc is correct

In [8]:
def get_cc_list(path):
    files = Path(path).glob('*.csv')
    cc_list = []

    for i in files:
        data = i.name.split('_')[-1].strip('.csv')
        cc_list.append(data)
    return cc_list

In [9]:
mypath = "/Users/coes3/phd-labs/fraganal/data_source/ooni/wc/"
cc_list = get_cc_list(mypath)

In [11]:
cc_list.remove('nan')

Download circumvention tool dataset for all countries in cc_list

In [13]:
for i in cc_list:
    download_ooni_cir(i)

Http Error: 400 Client Error: BAD REQUEST for url: https://api.ooni.io/api/v1/aggregation?probe_cc=SD&test_name=torsf,tor,stunreachability,psiphon,riseupvpn&since=2022-01-01&until=2022-12-31&time_grain=day&axis_x=test_name&axis_y=measurement_start_day&format=CSV&download=true
