In [1]:
import sys
print(sys.version)

3.7.5 (default, Nov  7 2019, 10:50:52) 
[GCC 8.3.0]


In [2]:
import pybgpstream as pbs
import time
from datetime import datetime
from collections import defaultdict
from typing import Dict, Set
import pickle


# AMS-IX
collectors = ["route-views.amsix"]

In [3]:
# Find a time where rib dumps occurred in this range
startSearchTime = "2020-01-07 00:00:00 UTC"
endSearchTime = "2020-01-07 23:59:59 UTC"

def getFirstRibDumpTime():
    fullDayRibStream = pbs.BGPStream(
        from_time=startSearchTime,
        until_time=endSearchTime,
        collectors=collectors,
        record_type="ribs"
    )
    for rec in fullDayRibStream.records():
        for elem in rec:
            if 'next-hop' in elem.fields:
                return rec.time
            
startTime = int(getFirstRibDumpTime())
endTimeShort = startTime + 60
endTimeLong = startTime + 60 * 15
print("Found RIB dumps at time %d" % startTime)

Found RIB dumps at time 1578355200


In [4]:
ribStream = pbs.BGPStream(
        collectors=collectors,
        record_type="ribs"
    )
ribStream.add_interval_filter(startTime, endTimeShort)

updateStream = pbs.BGPStream(
        collectors=collectors,
        record_type="updates"
    )
updateStream.add_interval_filter(startTime, endTimeLong)

In [5]:
def extractMatrix(stream):
    prefixSets = defaultdict(set)
    for rec in stream.records():
        for elem in rec:
            # if the record is a route, it will have 'next-hop' in the fields
            if 'next-hop' in elem.fields:

                prefix = elem.fields['prefix']

                announcer = elem.fields['as-path'].split(" ")[0]

                prefixSets[prefix].add(announcer)

    return prefixSets
        
matrix = extractMatrix(ribStream)

In [6]:
print("%d rows in matrix" % len(matrix))
print("First 100 rows:")
for prefix, ases in list(matrix.items())[:10]:
    print(prefix, str(list(ases)))

907499 rows in matrix
First 100 rows:
0.0.0.0/0 ['51088', '61955']
1.0.0.0/24 ['267613', '39120', '12859', '61955', '1140', '1103', '39591', '51088', '57695', '293', '50763', '5394', '24875', '31019', '58511', '6830', '12779', '34968', '20953']
1.0.4.0/22 ['267613', '39120', '12859', '61955', '1140', '1103', '39591', '51088', '57695', '293', '50763', '5394', '24875', '31019', '58511', '6830', '12779', '34968', '20953']
1.0.4.0/24 ['267613', '39120', '12859', '61955', '1140', '1103', '39591', '51088', '57695', '293', '50763', '5394', '24875', '31019', '58511', '6830', '12779', '34968', '20953']
1.0.5.0/24 ['267613', '39120', '12859', '61955', '1140', '1103', '39591', '51088', '57695', '293', '50763', '5394', '24875', '31019', '58511', '6830', '12779', '34968', '20953']
1.0.6.0/24 ['267613', '39120', '12859', '61955', '1140', '1103', '39591', '51088', '57695', '293', '50763', '5394', '24875', '31019', '58511', '6830', '12779', '34968', '20953']
1.0.7.0/24 ['267613', '39120', '12859', '61

In [7]:
def extractUpdates(stream):
    updates = []
    for rec in stream.records():
        for elem in rec:
            if len(elem.fields) > 0:
                if elem.type in ['A', 'W']:
                    announcer = elem.fields.get('as-path', None)
                    if announcer is None:
                        announcer = elem.peer_asn
                    else:
                        announcer = announcer.split()[0]
                    updateType = '+' if elem.type == 'A' else '-'
                    updateTuple = (rec.time, announcer, updateType, elem.fields['prefix'])
                    updates.append(updateTuple)
                #print(elem.type)
                #print(elem.fields)
    return updates
updates = extractUpdates(updateStream)

In [8]:
print("%d updates scraped." % len(updates))
print("First 100 updates:")
for update in updates[:10]:
    print(update)

297854 updates scraped.
First 100 updates:
(1578355200.031364, '267613', '+', '209.163.124.0/24')
(1578355200.031383, '267613', '+', '202.95.201.0/24')
(1578355200.084798, '1103', '+', '109.65.126.0/24')
(1578355200.08482, '1103', '+', '131.221.139.0/24')
(1578355200.08482, '1103', '+', '131.221.136.0/24')
(1578355200.08482, '1103', '+', '131.221.137.0/24')
(1578355200.08482, '1103', '+', '131.221.138.0/24')
(1578355200.084843, '1103', '+', '170.0.108.0/22')
(1578355200.092825, '39591', '+', '2a00:ad87:4600::/48')
(1578355200.092859, 39591, '-', '131.221.137.0/24')


In [9]:
pickle_filename = "bgpstream_matrix_and_updates.pickle"
print("Saving matrix and updates to pickle", pickle_filename)

obj = {'matrix': matrix, 'updates':updates}
with open(pickle_filename, 'wb') as fp:
    pickle.dump(obj, fp)
print("Done.")

Saving matrix and updates to pickle bgpstream_matrix_and_updates.pickle
Done.
