# 1. Imports

In [1]:
import os
import glob
import numpy as np
import pandas as pd
from datetime import timedelta, date
import requests

# 2. Load data

## 2.1 Load LACNIC delegation files

In [2]:
path_to_lacnic_delegation_files = '../../data/processed/delegation_files/lacnic/asns'
delegation_file_a = np.array(glob.glob(path_to_lacnic_delegation_files + '/*'))
delegation_file_a = np.sort(delegation_file_a)
delegation_file_a[-1]

'../../data/processed/delegation_files/lacnic/asns/2019_07_01'

In [3]:
lacnic_latest_df = pd.read_csv(
    delegation_file_a[-1],
    names=[
        'rir',
        'cc',
        'resource',
        'asn',
        'void',
        'allocation_date',
        'status',
        'hash'
    ],
    sep='|'
)
lacnic_latest_df.head()

Unnamed: 0,rir,cc,resource,asn,void,allocation_date,status,hash
0,lacnic,*,asn,*,10171,summary,,
1,lacnic,MX,asn,278,1,19890331,allocated,31986.0
2,lacnic,AR,asn,676,1,19900523,allocated,65335.0
3,lacnic,BR,asn,1251,1,19991112,allocated,99087.0
4,lacnic,MX,asn,1292,1,19910524,allocated,66696.0


### Delegation file data adecquation

In [4]:
def datime2epoch(dt):
    #print dt
    if dt!='summary':
        if str(dt)!='nan':
            return (
                date(
                    int(dt[0:4]),
                    int(dt[4:6]),
                    int(dt[6:8])
                ) - \
                date(1970,1,1)
            ).total_seconds()
        else:
            return -1
    else:
        return -1

In [5]:
# sort values
lacnic_latest_df = lacnic_latest_df.sort_values('allocation_date', ascending=True)
# Adds epoch column
lacnic_latest_df['epoch'] = lacnic_latest_df['allocation_date'].apply(datime2epoch)
# remove non-ASN row
print(lacnic_latest_df.shape)
lacnic_latest_df = lacnic_latest_df.loc[
    lacnic_latest_df['asn'] != '*'
]
print(lacnic_latest_df.shape)
# cast ASN to int to then use with AS-REL data frame
lacnic_latest_df['asn'] = lacnic_latest_df['asn'].values.astype(float).astype(int)

(10172, 9)
(10171, 9)


## 2.2 Load CAIDA's AS-REL files

In [6]:
path_to_asrel_files = '../../data/raw/asrel'
asrel_file_a = np.array(glob.glob(path_to_asrel_files + '/*'))
asrel_file_a = np.sort(asrel_file_a)

In [7]:
active_ases_dict={}
for file_name in asrel_file_a:
    # get date
    date_str = file_name.split('/')[-1].split('.')[0]
    # open AS-REL file
    try:
        p2c_df = pd.read_csv(
            file_name,
            comment='#',
            header=None,
            sep='|',
            compression='bz2'
        )
        # set column names
        p2c_df.columns = ['provider', 'customer', 'type']
        active_ases_dict[date_str] = list(
            set().union(
                p2c_df['provider'].values.tolist(),
                p2c_df['customer'].values.tolist()
            )
        )
    except:
        print('No data available for %s' % date_str)

No data available for 20140101
No data available for 20140201
No data available for 20140301
No data available for 20140501
No data available for 20150301
No data available for 20150401


# 3. LatAm in numbers

In [8]:
date_str = delegation_file_a[-1].split('/')[-1].replace('_','')
print('Total number of active ASes in %s: %s' % (date_str, len(active_ases_dict[date_str])))

Total number of active ASes in 20190701: 65438


In [9]:
date_str = delegation_file_a[-1].split('/')[-1].replace('_','')
delegated_ases_cnt = lacnic_latest_df['asn'].values.size
print('Delegated ASes in LatAm in %s: %s' % (date_str, delegated_ases_cnt))

Delegated ASes in LatAm in 20190701: 10171


In [10]:
date_str = delegation_file_a[-1].split('/')[-1].replace('_','')
active_ases_cnt = lacnic_latest_df.loc[
    (
        lacnic_latest_df['asn'].isin(
            active_ases_dict[date_str]
        )
    )
]['asn'].values.size
print('Delegated & Active ASes in LatAm in %s: %s' % (date_str, active_ases_cnt))

Delegated & Active ASes in LatAm in 20190701: 8661


In [11]:
date_str = delegation_file_a[-1].split('/')[-1].replace('_','')
for cc in ['BR', 'AR', 'CL']:
    active_ases_cnt =lacnic_latest_df.loc[
        (
            lacnic_latest_df['asn'].isin(
                active_ases_dict[date_str]
            )
        ) & 
        (
            lacnic_latest_df['cc'] == cc
        ) 
    ]['asn'].values.size
    print('Delegated & Active ASes in %s in %s: %s' % (cc, date_str, active_ases_cnt))

Delegated & Active ASes in BR in 20190701: 6458
Delegated & Active ASes in AR in 20190701: 791
Delegated & Active ASes in CL in 20190701: 241


# 4. RIPE Atlas: Looking for active probes in LatAm

We would like to thank to Malte Hasen (temporary intern at University of Buenos Aires) for providing this piece of code

In [12]:
# function that transforms the json-html_data into a html_dataFrame
def ripe_filter(html_data):
    output_list = []
    for i in range(0, len(html_data['results'])):
        # assert that the probe has an ipv4interface and is connected
        if (html_data['results'][i]['asn_v4']) and \
        (
            (html_data['results'][i]['status']['name'] == 'Connected') or \
            (html_data['results'][i]['status']['name'] == 'Disconnected')
        ):
#         if (html_data['results'][i]['asn_v4']):
            output_list.append(
                (
                    html_data['results'][i]['country_code'],
                    html_data['results'][i]['id'],
                )
            )
    return output_list

In [13]:
# https://atlas.ripe.net/docs/api/v2/manual/overview/generic_query_parameters.html
# This is the link where all the REQUEST parameters are detailed
# https://atlas.ripe.net/docs/api/v2/reference/#!/probes/Probe_List_GET
URL_LATAM = ("https://atlas.ripe.net/api/v2/probes/?latitude__gte=-54.8"
       + "&latitude__lte=32.6&longitude__gte=-117.1&longitude__lte=-34.8"
       + "&status=1"
       + "&page_size=500")
URL_WOLRDWIDE = ("https://atlas.ripe.net/api/v2/probes/?status=1")
URL_cc = ("https://atlas.ripe.net/api/v2/probes/"
       + "?status=1"
       + "&country_code=%s"
       + "&page_size=500")

In [14]:
html_data_latam = requests.get(url=URL_LATAM).json()
html_data_worldwide = requests.get(url=URL_WOLRDWIDE).json()

In [15]:
print("print connected RIPE Atlas probes in LatAm: %d" % html_data_latam['count'])
print("print connected RIPE Atlas probes in the World: %d" %html_data_worldwide['count'])

print connected RIPE Atlas probes in LatAm: 387
print connected RIPE Atlas probes in the World: 10180


In [16]:
for cc in ['US', 'AU', 'DE', 'JP', 'ZA']:
    html_data_cc = requests.get(url=URL_cc % cc).json()
    print("print connected RIPE Atlas probes in %s: %d" % (cc, html_data_cc['count']))

print connected RIPE Atlas probes in US: 1191
print connected RIPE Atlas probes in AU: 164
print connected RIPE Atlas probes in DE: 1386
print connected RIPE Atlas probes in JP: 181
print connected RIPE Atlas probes in ZA: 100


In [17]:
# filter data and add it to the DataFrame
ripe_probes_latam_country_location_df = pd.DataFrame(
    ripe_filter(html_data_latam),
    columns=['cc','id']
)

In [18]:
count_bycountry_df = ripe_probes_latam_country_location_df.groupby('cc')['id'].count().reset_index()
count_bycountry_df = count_bycountry_df.sort_values('id')
count_bycountry_df.tail()

Unnamed: 0,cc,id
22,MX,26
7,CL,28
0,AR,37
5,BR,69
31,US,88
