# 1. Import

In [1]:
import os
import glob
from datetime import timedelta, date

import numpy as np
import pandas as pd
import matplotlib.pyplot as pl
%matplotlib inline

# 2. Functions

## 2.1 Get latest delegation file

In [2]:
def get_latest_delegation_file(
    rir, path_to_files = '../../data/processed/delegation_files/%s/asns'):
    path_to_rir_delegation_files = path_to_files % rir
    delegation_file_a = np.array(glob.glob(path_to_rir_delegation_files + '/*_07_*'))
    delegation_file_a = np.sort(delegation_file_a)
    return delegation_file_a[-1]

## 2.2 datetime transformation

In [3]:
def datime2epoch(dt):
    """
    Thi is an example script.

    It seems that it has to have THIS docstring with a summary line, a blank line
    and sume more text like here. Wow.
    """
    # cast dt into string in case it isn't 
    dt = str(dt)
    if dt != 'summary':
        if str(dt) != 'nan':
            if '_' in dt:
                YYYY, mm, dd = dt.split('_')
            else:
                YYYY = dt[0:4]
                mm = dt[4:6]
                dd = dt[6:8]
            return (
                date(
                    int(YYYY),
                    int(mm),
                    int(dd)
                ) - date(1970 ,1, 1)
            ).total_seconds()
        else:
            return -1
    else:
        return -1

## 2.3 Get all ASes on paths

In [4]:
def get_visible_ases(bgp_table_dump_df, ixp_asn):
    """
    Thi is an example script.

    It seems that it has to have THIS docstring with a summary line, a blank line
    and sume more text like here. Wow.
    """
    # create set of IXP members
    members_set = set()
    # Loop
    for as_path_str in bgp_table_dump_df.drop_duplicates(
        'as-path')['as-path'].values:
        as_path_list = as_path_str.split(',')
        # if AS-PATH contains more than one AS
        # If not, the IXP member is directly annoucing 
        # its routes to the monitor
        if len(as_path_list) > 1:
            # Removes path through HE (AS6939 from the path)
            if int(as_path_list[0]) != 6939 and int(as_path_list[1]) != 6939:
            # Checks if the route server is the once announcing the path
            # If so, remove IXP ASN from path
            # If not, it is direct member
                if int(as_path_list[0]) == ixp_asn:
                    init_val = 1
                else:
                    init_val = 0
                for i in range(1, len(as_path_list)):
                        members_set.add(int(as_path_list[i]))
        else:
            members_set.add(int(as_path_str))
    return members_set

## 2.4 Compute customer cone

In [5]:
def get_customer_cone(df, ixp_asn):
    """
    Thi is an example script.

    It seems that it has to have THIS docstring with a summary line, a blank line
    and sume more text like here. Wow.
    """
    # create set of IXP members
    output_list = []
    members_set = get_visible_ases(df, ixp_asn)
    for asn in members_set:
        tmp_set=set()
        # Look for paths that contains `asn` on them
        if ixp_asn != None:
            all_paths_a = df.loc[
                (
                    df['as-path'].str.startswith("%s,"% ixp_asn)
                ) &
                (
                    (df['as-path'].str.contains(",%s,"% asn)) | 
                    (df['as-path'].str.endswith(",%s"% asn))
                )]['as-path'].values
        else:
            all_paths_a = df.loc[
                (
                    (df['as-path'].str.contains("%s,"% asn)) | 
                    (df['as-path'].str.contains(",%s,"% asn)) | 
                    (df['as-path'].str.endswith(",%s"% asn))
                )]['as-path'].values
        # Interate over candidate paths
        for path_str in all_paths_a:
            # Turn path into an array
            path_a = np.array(path_str.split(',')).astype(int)
            # Find `asn` downstream ASes.
            # Those are the ones trail on the path
            asn_location = np.where(path_a == asn)[0]
            # It may have a loop that not have been filtered before
            if len(asn_location) == 1:
                # Add downstream ASes to the customer cone set
                for i in range(int(asn_location), int(len(path_a))):
                    tmp_set.add(path_a[i])
        # appends results to the list
        output_list.append((asn, len(tmp_set)))
    return output_list

# 3. Find out largest transit providers at IXPs

In [6]:
ixp_under_analysis_list = [
#     # Frankfurt (DE-CIX)
#     'fra',
#     # Paris (Frace-IX)
#     'cdg',
#     # Bangkok (BNKIX)
    'bkk',
#     # Johanesburg (JINX)
#     'jnb',
    # IX.br Sao Paulo
    'saopaulo',
    # CABASE-BUE
    'eze',
    # PIT Chile
    'scl'
]

In [7]:
ixp_rir_dict = {
    # Frankfurt (DE-CIX)
    'fra': 'ripe',
    # Paris (Frace-IX)
    'cdg': 'ripe',
    # Bangkok (BNKIX)
    'bkk': 'apnic',
    # Johanesburg (JINX)
    'jnb': 'afrinic',
    # IX.br Sao Paulo
    'saopaulo': 'lacnic',
    # CABASE-BUE
    'eze': 'lacnic',
    # PIT Chile
    'scl': 'lacnic'
}

In [8]:
ixp_cc_dict = {
#     # Frankfurt (DE-CIX)
#     'fra': 'DE',
#     # Paris (Frace-IX)
#     'cdg': 'FR',
    # Bangkok (BNKIX)
    'bkk': 'TH',
    # Johanesburg (JINX)
    'jnb': 'ZA',
    # IX.br Sao Paulo
    'saopaulo': 'BR',
    # CABASE-BUE
    'eze': 'AR',
    # PIT Chile
    'scl': 'CL'
}

In [9]:
ixp_asn_dict = {
    'eze': 11058,
    'scl': 61522,
    # We do not care about this now
    'fra': None,
    # We do not care about this now
    'cdg': None,
    # We do not care about this now
    'bkk': None,
    # We do not care about this now
    'jnb': None,
    # We do not care about this now
    'saopaulo': None,
}

In [10]:
PATH_TO_BGP_DUMPS = '../../data/processed/ribs/v4'
ixp_bgp_dumps_dirs_a = np.array(glob.glob(PATH_TO_BGP_DUMPS + '/*_07_*'))

In [11]:
# create output data frame
customer_cone_size_df = pd.DataFrame(
    [], 
    columns=['asn', 'customer_cones_size',
             'iata_code', 'date']
)
# Iterates over the IXPs under analysis
for ixp in ixp_under_analysis_list:
    # get july BGP table dump
    ixp_bgp_table_dump_files_a = np.array(glob.glob(
        PATH_TO_BGP_DUMPS + '/' + ixp + '/*_07_*'))
    # sort BGP table dumps by ASCENDING DATE
    ixp_bgp_table_dump_files_a = np.sort(ixp_bgp_table_dump_files_a)
#     # Loop over each BGP table dump (month-to-month analysis)
#     for bgp_table_dump_file in ixp_bgp_table_dump_files_a:
    bgp_table_dump_file = ixp_bgp_table_dump_files_a[-1]
    # get snapshot's date --> DATETIME
    date_datetime = datime2epoch(bgp_table_dump_file.split('/')[-1])
    # get snapshot's date --> STR WITHOUT _
    date_no_spaces_str = bgp_table_dump_file.split('/')[-1].replace('_','')
    # get snapshot's date --> STR 
    date_str = bgp_table_dump_file.split('/')[-1]
    # open pre-processed bgp table dump
    bgp_table_dump_df = pd.read_csv(
        bgp_table_dump_file,
        header='infer',
        sep=','
    )
    # Remove NaN path if exists
    bgp_table_dump_df = bgp_table_dump_df.loc[
        bgp_table_dump_df['as-path'].notnull()
    ]
    # find customer cones ASes from BGP table dump
    # We filter out paths that contains HE
    customer_cone_size_list = get_customer_cone(
        bgp_table_dump_df.loc[
            (~bgp_table_dump_df['as-path'].str.contains(',6939,')) &
            (~bgp_table_dump_df['as-path'].str.startswith('6939,'))
        ],
        ixp_asn_dict[ixp]
    )
    # create DF with members' set
    tmp_df = pd.DataFrame(
        customer_cone_size_list,
        columns=['asn', 'customer_cones_size']
    )
    tmp_df['iata_code'] = np.repeat(ixp, tmp_df.shape[0])
    tmp_df['date'] =  np.repeat(date_str, tmp_df.shape[0])
    customer_cone_size_df = pd.DataFrame.append(customer_cone_size_df, tmp_df)


In [12]:
customer_cone_size_df = customer_cone_size_df.sort_values(['iata_code', 'customer_cones_size'])
for iata_code in customer_cone_size_df.drop_duplicates('iata_code')['iata_code'].values:
    print('------------- %s ---------------' % iata_code)
    display(
        customer_cone_size_df.loc[
            customer_cone_size_df['iata_code'] == iata_code
        ].tail(10)[['asn', 'customer_cones_size']]
    )

------------- bkk ---------------


Unnamed: 0,asn,customer_cones_size
181,137557,7
87,9891,10
156,7470,12
105,45758,20
124,45796,27
215,38794,38
4,4621,47
220,45458,50
70,4750,82
114,45265,144


------------- eze ---------------


Unnamed: 0,asn,customer_cones_size
94,18678,38
312,11014,44
119,16814,55
34,262195,67
587,11664,81
257,19037,82
325,7049,100
435,52361,113
644,3549,219
448,52376,254


------------- saopaulo ---------------


Unnamed: 0,asn,customer_cones_size
5533,28368,153
5243,23106,161
3652,267613,168
5260,25933,201
5039,53062,202
5506,28329,207
5798,61832,209
4364,7049,218
283,262589,381
4503,16735,903


------------- scl ---------------


Unnamed: 0,asn,customer_cones_size
42,16629,25
47,18747,37
318,3549,41
14,262195,47
283,21838,48
392,14259,52
159,19228,57
199,52280,70
35,22661,87
164,7004,88


# 4. Looking into Level3's footprint in AR and CL

In [13]:
# Load LACNIC delegation file
lacnic_latest_df = pd.read_csv(
    get_latest_delegation_file('lacnic'),
    names=[
        'rir',
        'cc',
        'resource',
        'asn',
        'void',
        'allocation_date',
        'status',
        'hash'
    ],
    sep='|'
)

In [14]:
# sort values
lacnic_latest_df = lacnic_latest_df.sort_values('allocation_date', ascending=True)
# Remove rows that do not contains ASNs
lacnic_latest_df = lacnic_latest_df.loc[
    lacnic_latest_df['asn'] != '*'
]
# Adds epoch column
lacnic_latest_df['epoch'] = lacnic_latest_df['allocation_date'].apply(datime2epoch)
# cast ASN to int to then use with BGP table dump data frame
lacnic_latest_df['asn'] = lacnic_latest_df['asn'].values.astype(int)

In [15]:
lacnic_latest_df.head()

Unnamed: 0,rir,cc,resource,asn,void,allocation_date,status,hash,epoch
76,lacnic,CL,asn,6240,1,19870101,allocated,1282.0,536457600.0
65,lacnic,MX,asn,6063,1,19870101,allocated,76631.0,536457600.0
75,lacnic,PA,asn,6193,1,19870101,allocated,44882.0,536457600.0
67,lacnic,MX,asn,6065,1,19870101,allocated,55014.0,536457600.0
1,lacnic,MX,asn,278,1,19890331,allocated,31986.0,607305600.0


In [16]:
# Level3's ASes in AR and CL
level3_ases_dict = {
    "eze": 3549,
    "scl": 21838
}

In [17]:
# Iterates over the IXPs under analysis
for ixp in ['eze', 'scl']:
    # create a set for CC
    cc_set = set()
    # get all BGP table dumps
    ixp_bgp_table_dump_files_a = np.array(glob.glob(
        PATH_TO_BGP_DUMPS + '/' + ixp + '/*'))
    # sort BGP table dumps by ASCENDING DATE
    ixp_bgp_table_dump_files_a = np.sort(ixp_bgp_table_dump_files_a)
    #
    bgp_table_dump_file = ixp_bgp_table_dump_files_a[-1]
    # get snapshot's date --> DATETIME
    date_datetime = datime2epoch(bgp_table_dump_file.split('/')[-1])
    # get snapshot's date --> STR WITHOUT _
    date_no_spaces_str = bgp_table_dump_file.split('/')[-1].replace('_','')
    # get snapshot's date --> STR 
    date_str = bgp_table_dump_file.split('/')[-1]
    # open pre-processed bgp table dump
    bgp_table_dump_df = pd.read_csv(
        bgp_table_dump_file,
        header='infer',
        sep=','
    )
    # Remove NaN path if exists
    bgp_table_dump_df = bgp_table_dump_df.loc[
        bgp_table_dump_df['as-path'].notnull()
    ]
    # create DF with members' set
    all_paths_a = bgp_table_dump_df.loc[
        (
            bgp_table_dump_df['as-path'].str.startswith("%s,"% ixp_asn_dict[ixp])
        ) &
        (
            (bgp_table_dump_df['as-path'].str.contains(",%s,"% level3_ases_dict[ixp])) | 
            (bgp_table_dump_df['as-path'].str.endswith(",%s"% level3_ases_dict[ixp]))
    )]['as-path'].values
    # Interate over all paths
    for path_str in all_paths_a:
        # Turn path into an array
        path_a = np.array(path_str.split(',')).astype(int)
        # Find `asn` downstream ASes.
        # Those are the ones trail on the path
        asn_location = np.where(path_a == level3_ases_dict[ixp])[0]
        # It may have a loop that not have been filtered before
        if len(asn_location) == 1:
            # Add downstream ASes to the customer cone set
            for i in range(int(asn_location), int(len(path_a))):
                cc_set.add(path_a[i])
    print("AS nationalies of Level3's cc in %s" % ixp)
    display(
        lacnic_latest_df.loc[
            lacnic_latest_df['asn'].isin(list(cc_set))
        ].groupby('cc')['asn'].count().reset_index()
    )


AS nationalies of Level3's cc in eze


Unnamed: 0,cc,asn
0,AR,209
1,UY,4


AS nationalies of Level3's cc in scl


Unnamed: 0,cc,asn
0,AR,1
1,BR,1
2,CL,31
