# 1. Imports

In [1]:
import os
import glob

import numpy as np
import pandas as pd
from aggregate6 import aggregate

# 2. Address space in LG dumps

Brito _el at._ [1] observed that partial BGP dumps at LGs in Sao Paulo (sp) and Curitiba (pr) due to export filters. We would like to compare BGP table dumps from LG and RV in Sao Paulo. Major discrepancies would suggest that LG keeps applying export filters

In [2]:
def prefix_addr_space(prefix):
    """
    Compute prefix address space.

    Get mask and then compute prefix addr space.
    """
    mask = prefix.split('/')[-1]
    return 2 ** (32 - int(mask))

## 2.1 Originated address space by connected networks

Originated address space by connected networks

In [3]:
PATH_LG_DUMPS = '../../data/processed/lg-ribs/v4'
ixp_bgp_dumps_dirs_a = np.array(glob.glob(PATH_LG_DUMPS + '/*'))

In [4]:
output_list = []
for dir_name in ixp_bgp_dumps_dirs_a:
    # get LG name
    ixp_name = dir_name.split('/')[-1]
    # All LG BGP dumps from that LG
    bgp_file_a = glob.glob(PATH_LG_DUMPS + '/' + ixp_name + '/*')
    bgp_file_a = np.sort(bgp_file_a)
    # Exclude non-LatAm IXPs from the analysis
    if ixp_name in ('sp', 'pr'):
        # Load BGP table dump
        bgp_table_dump_df = pd.read_csv(
            bgp_file_a[-2],
            header='infer',
            sep=','
        )
        # Remove NaN path if exists
        bgp_table_dump_df = bgp_table_dump_df.loc[
            bgp_table_dump_df['as-path'].notnull()
        ]
        # create set of announced prefixes
        prefix_set = set()
        # Loop
        for index, row in bgp_table_dump_df.iterrows():
            as_path_str = row['as-path']
            prefix = row['prefix']
            as_path_list = as_path_str.split(',')
            # if AS-PATH contains more than one AS
            # If not, the IXP member is directly annoucing 
            # its routes to the monitor
            if len(as_path_list) == 1:
                prefix_set.add(prefix)
        # 
        aggregated_prefix_list = aggregate(list(prefix_set))
        ip_cnt = 0
        for prefix in aggregated_prefix_list:
            network, mask = prefix.split('/')
            # library aggregate6 has a bug and sometimes overaggregates some prefixes
            if int(mask) > 8:
                ip_cnt += prefix_addr_space(prefix)
        # Append results (# of membs) for this IXP to a list
        output_list.append((ixp_name, ip_cnt))

In [5]:
members_addr_space_df = pd.DataFrame(
    output_list,
    columns=['iata_code', 'ip-cnt']
)
# sort values
members_addr_space_df = members_addr_space_df.sort_values('ip-cnt', ascending = False)

In [6]:
members_addr_space_df

Unnamed: 0,iata_code,ip-cnt
1,sp,2550528
0,pr,13056


## 2.2 Announced address space

Originated address space by connected networks + customer cones

In [7]:
output_list = []
for dir_name in ixp_bgp_dumps_dirs_a:
    # get LG name
    ixp_name = dir_name.split('/')[-1]
    # All LG BGP dumps from that LG
    bgp_file_a = glob.glob(PATH_LG_DUMPS + '/' + ixp_name + '/*')
    bgp_file_a = np.sort(bgp_file_a)
    # Exclude non-LatAm IXPs from the analysis
    if ixp_name in ('sp', 'pr'):
        # Load BGP table dump
        bgp_table_dump_df = pd.read_csv(
            bgp_file_a[-2],
            header='infer',
            sep=','
        )
        # Remove NaN path if exists
        bgp_table_dump_df = bgp_table_dump_df.loc[
            bgp_table_dump_df['as-path'].notnull()
        ]
        # create set of announced prefixes
        prefix_set = set()
        # Loop
        for index, row in bgp_table_dump_df.iterrows():
            as_path_str = row['as-path']
            prefix = row['prefix']
            as_path_list = as_path_str.split(',')
            # if AS-PATH contains more than one AS
            # If not, the IXP member is directly annoucing 
            # its routes to the monitor
            if '6939' not in as_path_list:
                prefix_set.add(prefix)
        # 
        aggregated_prefix_list = aggregate(list(prefix_set))
        ip_cnt = 0
        for prefix in aggregated_prefix_list:
            network, mask = prefix.split('/')
            # library aggregate6 has a bug and sometimes overaggregates some prefixes
            if int(mask) > 8:
                ip_cnt += prefix_addr_space(prefix)
        # Append results (# of membs) for this IXP to a list
        output_list.append((ixp_name, ip_cnt))

In [8]:
announced_addr_space_df = pd.DataFrame(
    output_list,
    columns=['iata_code', 'ip-cnt']
)
# sort values
announced_addr_space_df = announced_addr_space_df.sort_values('ip-cnt', ascending = False)

In [9]:
announced_addr_space_df

Unnamed: 0,iata_code,ip-cnt
1,sp,5439232
0,pr,13464


# 3. Looking for differences in Google annoncements

We look for prefixes announced by Google (AS15169) in both datasets (RV and LG). 

## 3.1 Sao Paulo's latest LG dump

In [10]:
IXP_NAME = 'sp'
PATH_LG_DUMPS = '../../data/processed/lg-ribs/v4'

In [11]:
bgp_file_a = glob.glob(PATH_LG_DUMPS + '/' + IXP_NAME + '/' + '2019_07_*')[-1]

In [12]:
lg_dump_df = pd.read_csv(
    bgp_file_a,
    header='infer',
    sep=','
)
lg_dump_df.head()

Unnamed: 0,prefix,as-path
0,1.0.4.0/22,693948263880356203
1,1.0.4.0/22,69393804023969
2,1.0.4.0/22,6939465123969
3,1.0.4.0/22,69394788
4,1.0.4.0/22,2254823596


## 3.2 Sao Paulo's latest RV dump

In [13]:
IXP_NAME = 'saopaulo'
PATH_RV_DUMPS = '../../data/processed/ribs/v4'

In [14]:
bgp_file_a = glob.glob(PATH_RV_DUMPS + '/' + IXP_NAME + '/' + '2019_07_*')[-1]

In [15]:
rv_dump_df = pd.read_csv(
    bgp_file_a,
    header='infer',
    sep=','
)
rv_dump_df.head()

Unnamed: 0,prefix,as-path
0,1.0.4.0/22,693948263880356203
1,1.0.4.0/22,693948263880356203
2,1.0.4.0/22,693948263880356203
3,1.0.4.0/24,693948263880356203
4,1.0.4.0/24,693948263880356203


## 3.3 Compare entries

In [16]:
google_rv = rv_dump_df.loc[
    (rv_dump_df['as-path'].str.startswith('15169')) |
    (rv_dump_df['as-path'] == '15169')
]['prefix'].size
google_lg = lg_dump_df.loc[
    (lg_dump_df['as-path'].str.startswith('15169')) |
    (lg_dump_df['as-path'] == '15169')
]['prefix'].size
print("# of prefixes announced by Google visible in RV dataset: %d" % google_rv)
print("# of prefixes announced by Google visible in LG dataset: %d" % google_lg)

# of prefixes announced by Google visible in RV dataset: 498
# of prefixes announced by Google visible in LG dataset: 32


# References

[1] Brito, Samuel Henrique Bucke, et al. "Dissecting the largest national ecosystem of public internet exchange points in brazil." _International Conference on Passive and Active Network Measurement_. Springer, Cham, 2016.