In [4]:
import pandas as pd
import numpy as np
from datetime import datetime
import time
#import matplotlib.pyplot as plt
import gzip
import json
#from tabulate import tabulate
import bz2
#import os
from tqdm import tqdm

# Load the Data

## Helper functions and variables

In [5]:
def delta_date(x):
    return (str_to_date(x['end_date']) - str_to_date(x['start_date'])).days + 1

In [6]:
def str_to_date(d):
    return datetime.strptime(d, "%Y-%m-%d")

In [7]:
datapath="irrs/"

In [8]:
def overlap_db(db):
    timeless_overlap = db.merge(bgp, left_on='route', right_on='prefix', suffixes=('_route', '_prefix'))
    timeless_overlap['end_date'] = pd.to_datetime(timeless_overlap['end_date'], format="%Y-%m-%d")
    timeless_overlap['start_date'] = pd.to_datetime(timeless_overlap['start_date'], format="%Y-%m-%d")
    timeless_overlap['start']=pd.to_datetime(timeless_overlap['start'], unit='s').dt.strftime('%Y-%m-%d')
    timeless_overlap['end']=pd.to_datetime(timeless_overlap['end'], unit='s').dt.strftime('%Y-%m-%d')
    actual_overlap = timeless_overlap[(timeless_overlap.end_date > timeless_overlap.start)&(timeless_overlap.start_date < timeless_overlap.end)]
    actual_overlap['asn']='AS'+actual_overlap['asn'].astype(str)
    return actual_overlap

## Load BGP

In [9]:
bgp = pd.read_csv('pfx2as_2021_2023_merged.csv.gz', names=['prefix', 'asn', 'start', 'end'], delimiter=' ',low_memory=False)
bgp['duration'] = bgp['end'] - bgp['start']
bgpagg = bgp.groupby(['prefix', 'asn']).agg({'duration':'sum'}).reset_index()
bgpagg['asn']='AS'+bgpagg['asn'].astype(str)

## Load Radb

In [10]:
radb = pd.read_json(datapath+'radb/radb.route.json.gz', lines=True)
radb['lifetime'] = radb.apply(delta_date, axis=1)
radb_overlap=overlap_db(radb)
radbagg = radb_overlap.groupby(['route', 'origin']).agg({'lifetime':'sum'}).reset_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  actual_overlap['asn']='AS'+actual_overlap['asn'].astype(str)


## Load Altdb

In [11]:
altdb = pd.read_json(datapath+'altdb/altdb.route.json.gz', lines=True)
altdb['lifetime'] = altdb.apply(delta_date, axis=1)
altdb_overlap=overlap_db(altdb)
altdbagg = altdb_overlap.groupby(['route', 'origin']).agg({'lifetime':'sum'}).reset_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  actual_overlap['asn']='AS'+actual_overlap['asn'].astype(str)


# Get routes in BGP and inconsistent

## Import inconsistent prefixes

In [12]:
radb_cover=pd.read_csv('radb_cover.csv')
radb_inconsistent=list(radb_cover.nonauth.unique())
len(radb_inconsistent)

150402

In [13]:
altdb_cover=pd.read_csv('altdb_cover.csv')
altdb_inconsistent=list(altdb_cover.nonauth.unique())
len(altdb_inconsistent)

1206

## Appear in BGP and inconsistent

In [14]:
bgp_inconsistent_radb=radb_overlap[radb_overlap['prefix'].isin(radb_inconsistent)]
print(bgp_inconsistent_radb.prefix.nunique(),"appear in BGP and radb and are inconsistent.")

56803 appear in BGP and radb and are inconsistent.


In [15]:
bgp_inconsistent_altdb=altdb_overlap[altdb_overlap['prefix'].isin(altdb_inconsistent)]
print(bgp_inconsistent_altdb.prefix.nunique(),"appear in BGP and altdb and are inconsistent.")

619 appear in BGP and altdb and are inconsistent.


# Get full overlap

## RADB

In [16]:
df_in_b_r=radb_overlap[radb_overlap['route'].isin(bgp_inconsistent_radb.prefix.unique())]
route_r_b=df_in_b_r.groupby('route').agg({'asn':set,'origin':set}).reset_index()
route_r_b['Fullmatch']=(route_r_b['asn']==route_r_b['origin']) #python discards order when evaluating set equality
print(route_r_b[route_r_b['Fullmatch']]['route'].nunique(),"prefixes between BGP and RADB have a full overlap.")

3002 prefixes between BGP and RADB have a full overlap.


## ALTDB

In [17]:
df_in_b_a=altdb_overlap[altdb_overlap['route'].isin(bgp_inconsistent_altdb.prefix.unique())]
route_a_b=df_in_b_a.groupby('route').agg({'asn':set,'origin':set}).reset_index()
route_a_b['Fullmatch']=(route_a_b['asn']==route_a_b['origin']) #python discards order when evaluating set equality
print(route_a_b[route_a_b['Fullmatch']]['route'].nunique(),"prefixes between BGP and ALTDB have a full overlap.")

130 prefixes between BGP and ALTDB have a full overlap.


# Get partial overlap

## RADB

In [18]:
route_r_b['intersec'] = route_r_b.apply(lambda x: set(x['asn']).intersection(set(x['origin'])), axis=1)
route_r_b['partial_match']=(route_r_b['intersec']!=set())
partial=route_r_b[(~route_r_b['Fullmatch']) & (route_r_b['partial_match'])]
print(partial['route'].nunique(),"prefixes between BGP and RADB have a partial overlap.")

32838 prefixes between BGP and RADB have a partial overlap.


## ALTDB

In [19]:
route_a_b['intersec'] = route_a_b.apply(lambda x: set(x['asn']).intersection(set(x['origin'])), axis=1)
route_a_b['partial_match']=(route_a_b['intersec']!=set())
partial=route_a_b[(~route_a_b['Fullmatch']) & (route_a_b['partial_match'])]
print(partial['route'].nunique(),"prefixes between BGP and ALTDB have a partial overlap.")

177 prefixes between BGP and ALTDB have a partial overlap.


# Get no overlap

## RADB

In [20]:
route_r_b['no_overlap']=(route_r_b['intersec']==set())
print(route_r_b[route_r_b['no_overlap']]['route'].nunique(),"prefixes between BGP and RADB have no overlap.")

20963 prefixes between BGP and RADB have no overlap.


## ALTDB

In [21]:
route_a_b['no_overlap']=(route_a_b['intersec']==set())
print(route_a_b[route_a_b['no_overlap']]['route'].nunique(),"prefixes between BGP and ALTDB have no overlap.")

312 prefixes between BGP and ALTDB have no overlap.


# Altdb final numbers

In [22]:
perc=str(round(bgp_inconsistent_altdb.prefix.nunique()*100/len(altdb_inconsistent),2))+"% "
fraction="("+str(bgp_inconsistent_altdb.prefix.nunique())+"/"+str(len(altdb_inconsistent))+")"
print(perc+fraction+" Appear in BGP and inconsistent.")

51.33% (619/1206) Appear in BGP and inconsistent.


In [23]:
perc=str(round(route_a_b[route_a_b['Fullmatch']]['route'].nunique()*100/bgp_inconsistent_altdb.prefix.nunique(),2))+"% "
fraction="("+str(route_a_b[route_a_b['Fullmatch']]['route'].nunique())+"/"+str(bgp_inconsistent_altdb.prefix.nunique())+")"
print(perc+fraction+" full overlap.")

21.0% (130/619) full overlap.


In [24]:
perc=str(round(route_a_b[route_a_b['no_overlap']]['route'].nunique()*100/bgp_inconsistent_altdb.prefix.nunique(),2))+"% "
fraction="("+str(route_a_b[route_a_b['no_overlap']]['route'].nunique())+"/"+str(bgp_inconsistent_altdb.prefix.nunique())+")"
print(perc+fraction+" no overlap.")

50.4% (312/619) no overlap.


In [25]:
perc=str(round(route_a_b[(~route_a_b['Fullmatch']) & (route_a_b['partial_match'])]['route'].nunique()*100/bgp_inconsistent_altdb.prefix.nunique(),2))+"% "
fraction="("+str(route_a_b[(~route_a_b['Fullmatch']) & (route_a_b['partial_match'])]['route'].nunique())+"/"+str(bgp_inconsistent_altdb.prefix.nunique())+")"
print(perc+fraction+" partial overlap.")

28.59% (177/619) partial overlap.


## Irregular route objects

In [49]:
# BEN FILL THIS IN

## Confirming total

In [26]:
fm=route_a_b[route_a_b['Fullmatch']]['route'].nunique()
no=route_a_b[route_a_b['no_overlap']]['route'].nunique()
pm=route_a_b[(~route_a_b['Fullmatch']) & (route_a_b['partial_match'])]['route'].nunique()
fm+no+pm==bgp_inconsistent_altdb.prefix.nunique()

True

# Radb final numbers

In [27]:
perc=str(round(bgp_inconsistent_radb.prefix.nunique()*100/len(radb_inconsistent),2))+"% "
fraction="("+str(bgp_inconsistent_radb.prefix.nunique())+"/"+str(len(radb_inconsistent))+")"
print(perc+fraction+" Appear in BGP and inconsistent.")

37.77% (56803/150402) Appear in BGP and inconsistent.


In [28]:
perc=str(round(route_r_b[route_r_b['no_overlap']]['route'].nunique()*100/bgp_inconsistent_radb.prefix.nunique(),2))+"% "
fraction="("+str(route_r_b[route_r_b['no_overlap']]['route'].nunique())+"/"+str(bgp_inconsistent_radb.prefix.nunique())+")"
print(perc+fraction+" no overlap.")

36.9% (20963/56803) no overlap.


In [29]:
perc=str(round(route_r_b[route_r_b['Fullmatch']]['route'].nunique()*100/bgp_inconsistent_radb.prefix.nunique(),2))+"% "
fraction="("+str(route_r_b[route_r_b['Fullmatch']]['route'].nunique())+"/"+str(bgp_inconsistent_radb.prefix.nunique())+")"
print(perc+fraction+" full overlap.")

5.28% (3002/56803) full overlap.


In [30]:
perc=str(round(route_r_b[(~route_r_b['Fullmatch']) & (route_r_b['partial_match'])]['route'].nunique()*100/bgp_inconsistent_radb.prefix.nunique(),2))+"% "
fraction="("+str(route_r_b[(~route_r_b['Fullmatch']) & (route_r_b['partial_match'])]['route'].nunique())+"/"+str(bgp_inconsistent_radb.prefix.nunique())+")"
print(perc+fraction+" partial overlap.")

57.81% (32838/56803) partial overlap.


## irregular route objects

In [50]:
# BEN FILL THIS IN

## Confirming total

In [31]:
fm=route_r_b[route_r_b['Fullmatch']]['route'].nunique()
no=route_r_b[route_r_b['no_overlap']]['route'].nunique()
pm=route_r_b[(~route_r_b['Fullmatch']) & (route_r_b['partial_match'])]['route'].nunique()
fm+no+pm==bgp_inconsistent_radb.prefix.nunique()

True