In [1]:
import requests
import re
import pandas as pd

# create asn-country mapping

In [2]:
URL = 'http://bgp.potaroo.net/cidr/autnums.html'
res = requests.get(URL)
lines = res.text.split('\n')

In [3]:
regex = re.compile(r'(?<=AS)[0-9]+')

In [4]:
rows = []

for line in lines:
    m1 = regex.search(line)
    if m1 is None:
        continue
    asn = m1.group()
    m2 = line.find('</a>')+5
    inst = line[m2:-4]
    natl = line[-2:]
    row = {'asn':asn, 'institution':inst, 'country':natl}
    rows.append(row)

df_as = pd.DataFrame(rows).set_index('asn')

In [5]:
df_as.head(3)

Unnamed: 0_level_0,country,institution
asn,Unnamed: 1_level_1,Unnamed: 2_level_1
1,US,"LVLT-1 - Level 3 Parent, LLC"
2,US,UDEL-DCN - University of Delaware
3,US,MIT-GATEWAYS - Massachusetts Institute of Tech...


In [6]:
df_as.to_csv('asn_country_institution.tsv', sep='\t', encoding='utf-8')

# read asn-country mapping

In [7]:
df_as = pd.read_csv('asn_country_institution.tsv', sep='\t', encoding='utf-8')

In [8]:
df_as.head(3)

Unnamed: 0,asn,country,institution
0,1,US,"LVLT-1 - Level 3 Parent, LLC"
1,2,US,UDEL-DCN - University of Delaware
2,3,US,MIT-GATEWAYS - Massachusetts Institute of Tech...


# read country-continent mapping

In [9]:
df_cc = pd.read_csv('country_continent.tsv', sep='\t', index_col=None, converters={'country':str, 'continent':str})

In [10]:
df_cc.head(3)

Unnamed: 0,country,continent
0,AD,EU
1,AE,AS
2,AF,AS


# create asn-cc mapping
cc = country/continent

* AF = Africa
* AS = Asia
* EU = Europe
* NA = North America
* SA = South America
* OC = Oceania
* AN = Antarctica

In [11]:
df_as = pd.merge(df_as,df_cc)

In [12]:
df_as.head(3)

Unnamed: 0,asn,country,institution,continent
0,1,US,"LVLT-1 - Level 3 Parent, LLC",
1,2,US,UDEL-DCN - University of Delaware,
2,3,US,MIT-GATEWAYS - Massachusetts Institute of Tech...,


In [13]:
df_as.groupby(['continent']).size().to_frame('count')

Unnamed: 0_level_0,count
continent,Unnamed: 1_level_1
AF,1705
AS,14112
EU,31840
,30127
OC,3216
SA,7297


In [14]:
df_as['cc'] = df_as['country']+'/'+df_as['continent']

In [15]:
cc = df_as[['asn','cc']].set_index('asn')['cc'].to_dict()

In [16]:
cc[17579]

'KR/AS'

# modify bgp info with cc-mapping

In [17]:
f = open('sh_ip_bgp.txt','r')
lines = f.read().split('\n')
f.close()

In [20]:
new_lines = []
toggle = False
for line in lines:
    if toggle == False:
        if 'LocPrf' in line:
            toggle = True
        new_lines.append(line)
        continue
    if len(line) < 63:
        new_lines.append(line)
        continue
    path = line[63:].split()
    asn = [int(a) for a in path if a.isdigit()]
    asn_cc = []
    for a in asn:
        if a in cc:
            ac = '%s(%s)'%(a,cc[a])
        else:
            ac = a
        asn_cc.append(str(ac))
    asn_cc.append(path[-1])
    if line[61] == ' ':
        line = line[:64] + ' '.join(asn_cc)
    else:
        line = line[:61] + ' '.join(asn_cc)
    new_lines.append(line)

In [21]:
# before modified
lines[20:23]

[' * i 1.0.128.0/19     aaa.bbb.198.23           0    100      0 9318 38040 23969 i',
 ' *                    ccc.ddd.244.87                         0 9318 38040 23969 i',
 ' *>                   ccc.ddd.244.88                         0 9318 38040 23969 i']

In [22]:
# after cc-mapping applied
new_lines[20:23]

[' * i 1.0.128.0/19     aaa.bbb.198.23           0    100      9318(KR/AS) 38040(TH/AS) 23969(TH/AS) i',
 ' *                    ccc.ddd.244.87                         9318(KR/AS) 38040(TH/AS) 23969(TH/AS) i',
 ' *>                   ccc.ddd.244.88                         9318(KR/AS) 38040(TH/AS) 23969(TH/AS) i']

In [23]:
f = open('sh_ip_bgp_cc.txt','w')
f.write('\n'.join(new_lines))
f.close()