In [1]:
from sqlalchemy import create_engine
engine = create_engine('postgres://meshmon:meshmon@localhost', echo=True)

In [2]:
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()

In [3]:
import enum
from sqlalchemy import Column, Enum, Integer, String

In [4]:
# https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Enum
# > [...] the values of the Python Enum, here indicated as integers, are not used;
# the value of each enum can therefore be any kind of Python object
# whether or not it is persistable.
class NetObjectType(enum.Enum):
    AS = enum.auto()
    IX = enum.auto()

In [5]:
class NetObject(Base):
    __tablename__ = "net_objects"
    
    id = Column(Integer, primary_key=True)
    type = Column(Enum(NetObjectType), nullable=False)
    name = Column(String, nullable=False)
    number = Column(Integer)

In [6]:
NetObject.__table__

Table('net_objects', MetaData(bind=None), Column('id', Integer(), table=<net_objects>, primary_key=True, nullable=False), Column('type', Enum('AS', 'IX', name='netobjecttype'), table=<net_objects>, nullable=False), Column('name', String(), table=<net_objects>, nullable=False), Column('number', Integer(), table=<net_objects>), schema=None)

In [7]:
from sqlalchemy.orm import sessionmaker

In [8]:
Session = sessionmaker()
Session.configure(bind=engine)
session = Session()

In [11]:
# Load objects from database
objects = {NetObjectType.AS: {}, NetObjectType.IX: {}}

In [12]:
for row in session.query(NetObject).all():
    objects[row.type][row.name] = row.id

2020-05-16 15:11:38,636 INFO sqlalchemy.engine.base.Engine SELECT net_objects.id AS net_objects_id, net_objects.type AS net_objects_type, net_objects.name AS net_objects_name, net_objects.number AS net_objects_number 
FROM net_objects
2020-05-16 15:11:38,637 INFO sqlalchemy.engine.base.Engine {}


In [121]:
objects

NameError: name 'objects' is not defined

In [3]:
import sys
sys.path.append('/home/maxmouchet/Clones/fetchmesh/')

In [8]:
from pathlib import Path
from tqdm import tqdm

In [9]:
from fetchmesh.asn import ASNDB
from fetchmesh.peeringdb import PeeringDB
from fetchmesh.io import AtlasRecordsReader
from fetchmesh.transformers import TracerouteFlatIPTransformer, TracerouteMapASNTransformer, TracerouteMapIXTransformer

In [10]:
asndb = ASNDB.from_file('/home/maxmouchet/Clones/goasn/rib.20200516.0800.bz2.txt')
asntree = asndb.radix_tree()

In [11]:
peeringdb = PeeringDB.from_api()
ixtree = peeringdb.radix_tree()

In [16]:
traceroute_dataset = Path('/home/maxmouchet/Clones/thesis/datasets/traceroute_v4_1577833200_1578351600-pairs')

In [32]:
files = list(traceroute_dataset.glob("*.ndjson.zst"))

In [35]:
from collections import defaultdict

In [37]:
# Here we make the assumption that ASN are
# always different from IXP names. This sounds
# reasonable but we may do something more robust.

In [83]:
from math import ceil
# Find the smallest multiple of `x` not smaller than `n`.
closest_multiple = lambda x, n: x * ceil(n/x)

In [109]:
from itertools import chain

In [119]:
class CountWorker:
    def __init__(self, transformers, resolution=3600):
        self.transformers = transformers
        self.resolution = resolution
        
    def do(self, file):
        as_counts = defaultdict(int)
        ix_counts = defaultdict(int)
        with AtlasRecordsReader(file, transformers = self.transformers) as rdr:
            for record in rdr:
                time = closest_multiple(self.resolution, record['timestamp'])
                for x in filter(lambda x: x, chain.from_iterable(record['asn'])):
                    as_counts[(x, time)] += 1
                for x in filter(lambda x: x, chain.from_iterable(record['ix'])):
                    ix_counts[(x, time)] += 1
        return as_counts, ix_counts

In [120]:
worker = CountWorker(transformers, 3600)
worker.do(files[0])

(defaultdict(int,
             {(202422, 1577836800): 4,
              (7922, 1577836800): 16,
              (17054, 1577836800): 12,
              (202422, 1577840400): 4,
              (7922, 1577840400): 16,
              (17054, 1577840400): 12,
              (202422, 1577844000): 4,
              (7922, 1577844000): 16,
              (17054, 1577844000): 12,
              (202422, 1577847600): 4,
              (7922, 1577847600): 16,
              (17054, 1577847600): 12,
              (202422, 1577851200): 4,
              (7922, 1577851200): 16,
              (17054, 1577851200): 12,
              (202422, 1577854800): 4,
              (7922, 1577854800): 16,
              (17054, 1577854800): 12,
              (202422, 1577858400): 4,
              (7922, 1577858400): 16,
              (17054, 1577858400): 12,
              (202422, 1577862000): 4,
              (7922, 1577862000): 16,
              (17054, 1577862000): 12,
              (202422, 1577865600): 4,
              (

In [85]:
transformers = [
            TracerouteMapASNTransformer(asntree),
            TracerouteMapIXTransformer(ixtree),
            TracerouteFlatIPTransformer(as_set = True, drop_dup = True, drop_late = True, extras_fields=('asn', 'ix'))
        ]

In [46]:
r['timestamp']

1577833664

In [124]:
# path = Path('/home/maxmouchet/Clones/thesis/datasets/traceroute_v4_1577833200_1578351600-pairs/10098612_6333.ndjson.zst')
transformers = [
    TracerouteMapASNTransformer(asntree),
    TracerouteMapIXTransformer(ixtree),
    TracerouteFlatIPTransformer(as_set = True, drop_dup = True, drop_late = True, extras_fields=('asn', 'ix'))
]
paths = []

for file in tqdm(files):
    as_counts = defaultdict(int)
    ix_counts = defaultdict(int)
    worker.do(file)
#     with AtlasRecordsReader(file, transformers=transformers) as rdr:
#         for record in rdr:
#             for asns, ixs in zip(record['asn'], record['ix']):
#                 for asn in asns:
#                     as_counts[asn] += 1
#                 for ix in ixs:
#                     ix_counts[ix] += 1

  0%|          | 64/16814 [00:07<33:08,  8.42it/s]Traceback (most recent call last):
  File "<ipython-input-119-cf127afada55>", line 10, in do
    for record in rdr:
  File "/home/maxmouchet/Clones/fetchmesh/fetchmesh/transformers/record.py", line 17, in __call__
    return self.transform(record)
  File "/home/maxmouchet/Clones/fetchmesh/fetchmesh/transformers/record.py", line 41, in transform
    new_record["result"][i]["result"][j]["asn"] = asn
KeyboardInterrupt
  0%|          | 64/16814 [00:07<30:57,  9.02it/s]


KeyboardInterrupt: 

In [42]:
as_counts

defaultdict(int, {None: 960, 12414: 1920, 1103: 2400})

In [43]:
ix_counts

defaultdict(int, {None: 4800, 'AMS-IX': 480})

In [20]:
len(list(traceroute_dataset.glob('*.zst')))

16814

In [9]:
paths

[]

In [21]:
paths[0]['result'][0]['result'][0]['from'] = '80.249.208.42'

In [22]:
 TracerouteFlatIPTransformer(as_set = True, drop_dup = True, drop_late = True, extras_fields=('asn', 'ix')).transform(TracerouteMapIXTransformer(ixtree).transform(paths[0]))

{'timestamp': 1577833986,
 'from': '80.67.163.251',
 'src_addr': '80.67.163.251',
 'dst_addr': '203.174.85.106',
 'paris_id': 7,
 'hops': [{'80.249.208.42', '80.67.163.243'},
  {'80.231.79.69'},
  {'80.231.153.177'},
  {'129.250.8.1'},
  {'129.250.5.39'},
  {'129.250.4.133'},
  {'129.250.4.24'},
  {'129.250.2.26'},
  {'129.250.7.65'},
  {'129.250.3.130'},
  {'129.250.7.85'},
  {'116.51.26.114'},
  {None},
  {'203.174.85.106'}],
 'asn': [{None},
  {None},
  {None},
  {None},
  {None},
  {None},
  {None},
  {None},
  {None},
  {None},
  {None},
  {None},
  {None},
  {None}],
 'ix': [{'AMS-IX', None},
  {None},
  {None},
  {None},
  {None},
  {None},
  {None},
  {None},
  {None},
  {None},
  {None},
  {None},
  {None},
  {None}]}

In [20]:
for path in paths:
    for ixs in path["ix"]:
        if len(ixs - {None}) > 0:
            print(ixs)

In [47]:
type(asntree)

radix.Radix

In [30]:
len(paths)

576

In [42]:
for path in paths:
    path["asn"] = []
    for hop in path["hops"]:
        for addr in hop:
            if addr:
                m = asntree.search_best(addr)
                if m:
                    path["asn"].append(m.data)

In [43]:
paths

[{'timestamp': 1577833804,
  'from': '94.136.2.190',
  'src_addr': '94.136.2.190',
  'dst_addr': '203.174.85.106',
  'paris_id': 13,
  'hops': [{'94.136.2.189'},
   {'94.136.0.190'},
   {'94.136.1.10'},
   {'94.136.0.3'},
   {'212.69.163.185'},
   {'83.167.55.51'},
   {'83.167.55.208'},
   {'64.125.29.237'},
   {'64.125.30.254'},
   {'64.125.29.54'},
   {'64.125.29.59'},
   {'64.125.29.80'},
   {'64.125.29.17'},
   {'64.125.29.118'},
   {None},
   {None},
   {'64.125.29.19'},
   {'64.125.31.219'},
   {'64.125.27.189'},
   {'64.125.35.190'},
   {'203.208.183.45'},
   {'203.208.153.246'},
   {'203.208.177.110'},
   {None},
   {'203.174.85.106'}],
  'asn': [{'origins': [8218]},
   {'origins': [8218]},
   {'origins': [8218]},
   {'origins': [6461]},
   {'origins': [6461]},
   {'origins': [6461]},
   {'origins': [6461]},
   {'origins': [6461]},
   {'origins': [6461]},
   {'origins': [6461]},
   {'origins': [6461]},
   {'origins': [6461]},
   {'origins': [6461]},
   {'origins': [6461]},
   {

In [29]:
asntree.search_best(None).data

TypeError: No address specified (use 'address' or 'packed')

In [15]:
paths

[{'timestamp': 1577833804,
  'from': '94.136.2.190',
  'src_addr': '94.136.2.190',
  'dst_addr': '203.174.85.106',
  'paris_id': 13,
  'hops': [{'94.136.2.189'},
   {'94.136.0.190'},
   {'94.136.1.10'},
   {'94.136.0.3'},
   {'212.69.163.185'},
   {'83.167.55.51'},
   {'83.167.55.208'},
   {'64.125.29.237'},
   {'64.125.30.254'},
   {'64.125.29.54'},
   {'64.125.29.59'},
   {'64.125.29.80'},
   {'64.125.29.17'},
   {'64.125.29.118'},
   {None},
   {None},
   {'64.125.29.19'},
   {'64.125.31.219'},
   {'64.125.27.189'},
   {'64.125.35.190'},
   {'203.208.183.45'},
   {'203.208.153.246'},
   {'203.208.177.110'},
   {None},
   {'203.174.85.106'}]},
 {'timestamp': 1577834698,
  'from': '94.136.2.190',
  'src_addr': '94.136.2.190',
  'dst_addr': '203.174.85.106',
  'paris_id': 14,
  'hops': [{'94.136.2.189'},
   {'94.136.0.190'},
   {'94.136.1.10'},
   {'94.136.0.3'},
   {'212.69.163.185'},
   {'83.167.55.51'},
   {'83.167.55.208'},
   {'64.125.29.237'},
   {'64.125.30.254'},
   {'64.125.29