# 1. Imports

In [1]:
import os
import glob
import numpy as np
import pandas as pd
from datetime import timedelta, date
import matplotlib.pyplot as pl
%matplotlib inline
from aggregate6 import aggregate

# 2. Connected ASes to the IXPs

In [2]:
path_to_ixp_bgp_dumps_dirs = '../../data/processed/ribs/v4'
ixp_bgp_dumps_dirs_a = np.array(glob.glob(path_to_ixp_bgp_dumps_dirs + '/*'))

In [3]:
ixp_asn_dict = {
    'lpb': 52499,
    'asu': 264722,
    'cor': 52374,
    'nqn': 52294,
    'pss': 52404,
    'eze': 11058,
    'scl': 61522,
    'gye': 27919,
    'ros': 52324,
    'mex': 18592,
    'sjo': 52385,
    # Not found or it may not have
    'bze': -1,
    'pap': 28056,
    # IXPs runs within the Honduras state-owned provider
    # Houndutel (AS7727)
    'tgu': 7727,
    # Not found or it may not have
    'kin': -1,
    # Not found or it may not have
    'pos': -1,
    'saopaulo': 26162 
}

In [4]:
foreing_ixp_location_list = [
    # Frankfurt (DE-CIX)
    'fra',
    # Paris (Frace-IX)
    'cdg',
    # Bangkok (BNKIX)
    'bkk',
    # Johanesburg (JINX)
    'jnb',
]

In [5]:
output_list = []
for path_to_ixp_bgp_dumps_files in ixp_bgp_dumps_dirs_a:
    # extract IXP name (actually it is its IATA code)
    ixp_name = path_to_ixp_bgp_dumps_files.split('/')[-1]
    # Exclude non-LatAm IXPs from the analysis
    if ixp_name not in foreing_ixp_location_list:
        # get all BGP table dumps for A CERTAIN IXP
        ixp_bgp_table_dump_files_a = np.array(glob.glob(path_to_ixp_bgp_dumps_files + '/*'))
        # sort BGP table dumps by ASCENDING DATE
        ixp_bgp_table_dump_files_a = np.sort(ixp_bgp_table_dump_files_a)
        # Load BGP table dump
        bgp_table_dump_df = pd.read_csv(
            ixp_bgp_table_dump_files_a[-1],
            header='infer',
            sep=','
        )
        # Remove NaN path if exists
        bgp_table_dump_df = bgp_table_dump_df.loc[
            bgp_table_dump_df['as-path'].notnull()
        ]
        # create set of IXP members
        members_set = set()
        # Loop
        for as_path_str in bgp_table_dump_df.drop_duplicates(
            'as-path')['as-path'].values:
            as_path_list = as_path_str.split(',')
            # if AS-PATH contains more than one AS
            # If not, the IXP member is directly annoucing 
            # its routes to the monitor
            if len(as_path_list) > 1:
                # Checks if the route server is the once announcing the path
                # If so, remove IXP ASN from path
                # If not, it is direct member
                if int(as_path_list[0]) == ixp_asn_dict[ixp_name]:
                    members_set.add(int(as_path_list[1]))
                else:
                    members_set.add(int(as_path_list[0]))
            else:
                members_set.add(int(as_path_str))
        # Append results (# of membs) for this IXP to a list
        output_list.append((ixp_name, len(members_set)))

In [6]:
member_df = pd.DataFrame(
    output_list,
    columns=['iata_code', 'as-cnt']
)
# sort values
member_df = member_df.sort_values('as-cnt', ascending = False)

In [7]:
member_df

Unnamed: 0,iata_code,as-cnt
4,saopaulo,1156
10,eze,127
6,scl,72
12,sjo,28
8,nqn,23
15,cor,23
11,asu,15
9,pss,14
0,ros,14
1,lpb,9


# 3. Originated address space by connected networks

In [8]:
def prefix_addr_space(prefix):
    """
    This.

    Bla bla.
    """
    mask = prefix.split('/')[-1]
    return 2 ** (32 - int(mask))

In [33]:
output_list = []
for path_to_ixp_bgp_dumps_files in ixp_bgp_dumps_dirs_a:
    # extract IXP name (actually it is its IATA code)
    ixp_name = path_to_ixp_bgp_dumps_files.split('/')[-1]
    # Exclude non-LatAm IXPs from the analysis
    if ixp_name not in foreing_ixp_location_list:
        # get all BGP table dumps for A CERTAIN IXP
        ixp_bgp_table_dump_files_a = np.array(glob.glob(path_to_ixp_bgp_dumps_files + '/*'))
        # sort BGP table dumps by ASCENDING DATE
        ixp_bgp_table_dump_files_a = np.sort(ixp_bgp_table_dump_files_a)
        # Load BGP table dump
        bgp_table_dump_df = pd.read_csv(
            ixp_bgp_table_dump_files_a[-1],
            header='infer',
            sep=','
        )
        # Remove NaN path if exists
        bgp_table_dump_df = bgp_table_dump_df.loc[
            bgp_table_dump_df['as-path'].notnull()
        ]
        # create set of announced prefixes
        prefix_set = set()
        # Loop
        for index, row in bgp_table_dump_df.iterrows():
            as_path_str = row['as-path']
            prefix = row['prefix']
            as_path_list = as_path_str.split(',')
            # if AS-PATH contains more than one AS
            # If not, the IXP member is directly annoucing 
            # its routes to the monitor
            if len(as_path_list) == 1:
                prefix_set.add(prefix)
            elif (len(as_path_list) == 2) and\
            (int(as_path_list[0]) == ixp_asn_dict[ixp_name]):
                prefix_set.add(prefix)
        # 
        aggregated_prefix_list = aggregate(list(prefix_set))
        ip_cnt = 0
        for prefix in aggregated_prefix_list:
            network, mask = prefix.split('/')
            # library aggregate6 has a bug and sometimes overaggregates some prefixes
            if int(mask) > 8:
                ip_cnt += prefix_addr_space(prefix)
        # Append results (# of membs) for this IXP to a list
        output_list.append((ixp_name, ip_cnt))

In [34]:
announced_addr_space_df = pd.DataFrame(
    output_list,
    columns=['iata_code', 'ip-cnt']
)
# sort values
announced_addr_space_df = announced_addr_space_df.sort_values('ip-cnt', ascending = False)

In [35]:
announced_addr_space_df

Unnamed: 0,iata_code,ip-cnt
4,saopaulo,26013440
6,scl,19419136
10,eze,7854080
11,asu,1514648
14,mex,795136
12,sjo,400896
7,pos,195584
5,tgu,130560
13,pap,102400
15,cor,79872
