In [145]:
from datetime import datetime
from ripe.atlas.cousteau import AtlasResultsRequest
from ripe.atlas.sagan import DnsResult
from dataclasses import dataclass, asdict, field, replace
from typing import List, Tuple
from tqdm.contrib.concurrent import thread_map
import pandas as pd
import itertools

In [4]:
kwargs = {
    "msm_id": 8310237,
    "start": datetime(2024, 6, 1),
    "stop": datetime(2024, 7, 1),
}


In [141]:
@dataclass(frozen=False)
class AtlasResults:
    msm_id: int
    probe_id: int
    origin: str
    ts: int
    entry_idx: int
    sub_id: int = 0
    time: int = 0
    src_addr: str = ""
    dst_addr: str = ""
    error: str = ""
    proto: str = ""
    af: int = 0
    answers: List[dict] = field(default_factory=list)
    rt: float = 0.0

    dict = asdict

In [146]:
def result2dataclass(t: Tuple[int, dict]) -> dict:
    i, r = t 
    o = AtlasResults(msm_id=r['msm_id'], probe_id=r['prb_id'], ts=r['timestamp'], origin=r['from'], entry_idx=i)
    
    if 'resultset' in r:
        for rs in r['resultset']:
            try:
                o = replace(o, **{k: rs[k] for k in ['proto', 'time']})
                o.dst_addr = rs.get('dst_name', rs.get('dst_addr', None))
                o.src_addr = rs.get('src_addr', '')
                o.error = rs.get('error', "")
                o.af = rs.get('af', 0)
                o.sub_id = rs.get('subid', 0)
                
                if 'result' in rs:
                    # print(rs['result'])
                    if 'answers' in rs['result']:
                        # print(rs['result']['answers'])
                        o.answers = rs['result']['answers']
                    if 'rt' in rs['result']:
                        o.rt = rs['result']['rt']
            except KeyError as e:
                print(f"KeyError: {e} -- {rs}")
            
            
            yield o.dict()

In [5]:
is_success, results = AtlasResultsRequest(**kwargs).create()

In [6]:
is_success

True

In [7]:
len(results)

8759198

In [147]:
for idx, r in enumerate(results[:10]):
    l = list(result2dataclass((idx, r)))
    print(idx, l)


0 [{'msm_id': 8310237, 'probe_id': 1000095, 'origin': '31.187.64.140', 'ts': 1717200028, 'entry_idx': 0, 'sub_id': 1, 'time': 1717200028, 'src_addr': '31.187.64.140', 'dst_addr': '8.8.8.8', 'error': '', 'proto': 'UDP', 'af': 4, 'answers': [{'TYPE': 'TXT', 'NAME': 'o-o.myaddr.l.google.com', 'RDATA': ['173.194.169.104']}, {'TYPE': 'TXT', 'NAME': 'o-o.myaddr.l.google.com', 'RDATA': ['edns0-client-subnet 31.187.64.0/24']}], 'rt': 5.686}, {'msm_id': 8310237, 'probe_id': 1000095, 'origin': '31.187.64.140', 'ts': 1717200028, 'entry_idx': 0, 'sub_id': 2, 'time': 1717200029, 'src_addr': '31.187.64.140', 'dst_addr': '8.8.4.4', 'error': '', 'proto': 'UDP', 'af': 4, 'answers': [{'TYPE': 'TXT', 'NAME': 'o-o.myaddr.l.google.com', 'RDATA': ['173.194.169.104']}, {'TYPE': 'TXT', 'NAME': 'o-o.myaddr.l.google.com', 'RDATA': ['edns0-client-subnet 31.187.64.0/24']}], 'rt': 4.397}]
1 [{'msm_id': 8310237, 'probe_id': 1000748, 'origin': '23.157.112.122', 'ts': 1717200024, 'entry_idx': 1, 'sub_id': 1, 'time': 

In [148]:
ar = [_ for _ in itertools.chain.from_iterable(thread_map(result2dataclass, enumerate(results[0:100]), max_workers=8))]

0it [00:00, ?it/s]

In [149]:
pd.DataFrame(ar)

Unnamed: 0,msm_id,probe_id,origin,ts,entry_idx,sub_id,time,src_addr,dst_addr,error,proto,af,answers,rt
0,8310237,1000095,31.187.64.140,1717200028,0,1,1717200028,31.187.64.140,8.8.8.8,,UDP,4,"[{'TYPE': 'TXT', 'NAME': 'o-o.myaddr.l.google....",5.686
1,8310237,1000095,31.187.64.140,1717200028,0,2,1717200029,31.187.64.140,8.8.4.4,,UDP,4,"[{'TYPE': 'TXT', 'NAME': 'o-o.myaddr.l.google....",4.397
2,8310237,1000748,23.157.112.122,1717200024,1,1,1717200024,2605:c640:dc1:10c8::3,2001:4860:4860::8844,,UDP,6,"[{'TYPE': 'TXT', 'NAME': 'o-o.myaddr.l.google....",19.133
3,8310237,1000967,24.213.13.3,1717200018,2,1,1717200018,127.0.0.1,127.0.0.11,,UDP,4,"[{'TYPE': 'TXT', 'NAME': 'o-o.myaddr.l.google....",109.688
4,8310237,1001259,95.95.207.179,1717200022,3,1,1717200022,127.0.0.1,127.0.0.1,,UDP,4,"[{'TYPE': 'TXT', 'NAME': 'o-o.myaddr.l.google....",17.857
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
179,8310237,27667,76.39.65.164,1717200006,97,1,1717200006,2603:6000:81f0:a4d0:f6f2:6dff:fe5d:cc08,2603:6000:81f0:a4d0::1,,UDP,6,"[{'TYPE': 'TXT', 'NAME': 'o-o.myaddr.l.google....",29.504
180,8310237,27667,76.39.65.164,1717200006,97,2,1717200007,192.168.1.212,192.168.1.1,,UDP,4,"[{'TYPE': 'TXT', 'NAME': 'o-o.myaddr.l.google....",28.702
181,8310237,285,83.191.164.214,1717200024,98,1,1717200024,172.29.164.254,172.29.164.1,,UDP,4,"[{'TYPE': 'TXT', 'NAME': 'o-o.myaddr.l.google....",28.187
182,8310237,2874,76.136.206.217,1717200054,99,1,1717200054,10.1.10.18,75.75.75.75,,UDP,4,"[{'TYPE': 'TXT', 'NAME': 'o-o.myaddr.l.google....",67.483


In [8]:
results[0]

{'fw': 5080,
 'mver': '2.6.2',
 'lts': 42098889,
 'resultset': [{'time': 1717200028,
   'lts': 42098889,
   'subid': 1,
   'submax': 2,
   'dst_addr': '8.8.8.8',
   'dst_port': '53',
   'af': 4,
   'src_addr': '31.187.64.140',
   'proto': 'UDP',
   'result': {'rt': 5.686,
    'size': 116,
    'abuf': 'uPOBgAABAAIAAAAAA28tbwZteWFkZHIBbAZnb29nbGUDY29tAAAQAAHADAAQAAEAAAA8ABAPMTczLjE5NC4xNjkuMTA0wAwAEAABAAAAPAAjImVkbnMwLWNsaWVudC1zdWJuZXQgMzEuMTg3LjY0LjAvMjQ=',
    'ID': 47347,
    'ANCOUNT': 2,
    'QDCOUNT': 1,
    'NSCOUNT': 0,
    'ARCOUNT': 0,
    'answers': [{'TYPE': 'TXT',
      'NAME': 'o-o.myaddr.l.google.com',
      'RDATA': ['173.194.169.104']},
     {'TYPE': 'TXT',
      'NAME': 'o-o.myaddr.l.google.com',
      'RDATA': ['edns0-client-subnet 31.187.64.0/24']}]}},
  {'time': 1717200029,
   'lts': 42098890,
   'subid': 2,
   'submax': 2,
   'dst_addr': '8.8.4.4',
   'dst_port': '53',
   'af': 4,
   'src_addr': '31.187.64.140',
   'proto': 'UDP',
   'result': {'rt': 4.397,
    'si

In [28]:
dir(DnsResult.get(results[0]))

['ACTION_FAIL',
 'ACTION_IGNORE',
 'ACTION_WARN',
 'PROTOCOL_ICMP',
 'PROTOCOL_MAP',
 'PROTOCOL_TCP',
 'PROTOCOL_UDP',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__nonzero__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_handle_error',
 '_handle_malformation',
 '_is_property_name',
 '_on_error',
 '_on_malformation',
 'build_responses',
 'bundle',
 'calculate_median',
 'clean_protocol',
 'created',
 'created_timestamp',
 'ensure',
 'error_message',
 'firmware',
 'get',
 'group_id',
 'is_error',
 'is_malformed',
 'keys',
 'measurement_id',
 'origin',
 'probe_id',
 'raw_data',
 'responses',
 'responses_total',
 'seconds_since_sync',
 

In [29]:
dir(DnsResult.get(results[0]).responses[0])

['ACTION_FAIL',
 'ACTION_IGNORE',
 'ACTION_WARN',
 'PROTOCOL_ICMP',
 'PROTOCOL_MAP',
 'PROTOCOL_TCP',
 'PROTOCOL_UDP',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__nonzero__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abuf',
 '_get_buf',
 '_handle_error',
 '_handle_malformation',
 '_is_property_name',
 '_on_error',
 '_on_malformation',
 '_parse_buf',
 '_qbuf',
 'abuf',
 'af',
 'clean_protocol',
 'destination_address',
 'ensure',
 'error_message',
 'is_error',
 'is_malformed',
 'keys',
 'protocol',
 'qbuf',
 'raw_data',
 'response_id',
 'response_size',
 'response_time',
 'source_address']

In [44]:
DnsResult.get(results[0]).responses[0]

<ripe.atlas.sagan.dns.Response at 0x70eb247810>