In [7]:
from dns.resolver import Resolver, NXDOMAIN, NoNameservers, Timeout, NoAnswer, query
from uuid import uuid4
from tqdm import tqdm

import matplotlib as plt
import pandas as pd
import numpy as np

import requests
import time
import json
import copy
import os

%matplotlib inline

In [2]:
atlas_api_key = 'e057e19f-53ae-4b66-9b5e-c8bc00d7b4fe'
url_dns_measurements_create = 'https://atlas.ripe.net:443/api/v2/measurements/dns/'
url_dns_measurements_get = 'https://atlas.ripe.net:443/api/v2/measurements/dns/'

newline = '\n'

figsize = (6, 4) #default
figsize = (15, 10)

min_meas_id = 8759930
max_meas_id = 8770979

headers = {'Content-type': 'application/json', 'Accept': 'text/plain'}

In [24]:
def write_list(fn, data):
    '''Writes a list to a file with each value on a new line'''
    with open(fn, 'w') as f:
        for datum in data:
            f.write(datum + newline)
        
def append_list(fn, data):
    '''Appends a list to a file with each value on a new line'''
    with open(fn, 'a') as f:
        for datum in data:
            f.write(datum + newline)
    
def read_list(fn):
    '''Reads a file and '''
    with open(fn, 'r') as f:
        return [line.strip(newline) for line in f]
    
def write_json(fn, data):
    with open(fn, 'w') as f:
        f.write(json.dumps(data))
            
def read_json(fn):
    '''Read a json file (fn) and returns it as a dictionary'''
    with open(fn, 'r') as f:
        return json.loads(f.read())

In [25]:
def write_data(fn, data):
    """Backs up the previous version of the data if it exists and writes the new data to a file."""
    # Backs up the previous data if it exists.
    try:
        write_json("data/backup/{}.json ".format(fn) + time.ctime().replace(' ', '-'), 
                   read_json("data/{}.json".format(fn)))
    except:
        pass

    write_json("data/{}.json".format(fn), data)

In [None]:
data = []

with open('data/tlds', 'r') as f:
    next(f)
    
    for line in f:
        data.append({'tld': line[:-1].lower()})

In [None]:
# Test tld set
# data = [{'tld': 'nl'}, {'tld': 'audi'}]

In [29]:
def find_nxdomain(tld, max_tries = 3):
    for _ in range(max_tries):
        domain = '{}.{}'.format(str(uuid4()), tld)
        
        try:
            query(domain)
        except:
            return domain
    
    return None

def find_nxdomain_wildcard(tld, max_tries = 3):
    for _ in range(max_tries):
        domain = '{}.{}'.format(str(uuid4()), tld)

        response = !dig soa +noall +authority +noidn {domain}

        if response[0].startswith(tld):
            return domain
    
    return None

In [None]:
for datum in data:
    domain = str(uuid4()) + '.' + datum['tld']
        
    try:
        query(domain)
        print(datum['tld'], 'DOMAIN EXISTS')
    except NXDOMAIN:
        datum['domain'] = domain
    except NoNameservers:
        print(datum['tld'], 'NO NAMESERVERS')
    except Timeout:
        print(datum['tld'], 'TIME OUT')
    except NoAnswer:
        print(datum['tld'], 'NO ANSWER')

In [None]:
for datum in data:
    if not 'domain' in datum:
        domain = str(uuid4()) + '.' + datum['tld']

        try:
            query(domain)
            print(datum['tld'], 'DOMAIN EXISTS')
        except NXDOMAIN:
            datum['domain'] = domain
        except NoNameservers:
            print(datum['tld'], 'NO NAMESERVERS')
        except Timeout:
            print(datum['tld'], 'TIME OUT')
        except NoAnswer:
            print(datum['tld'], 'NO ANSWER')

In [None]:
# wildcard check
for item in data:
    if len(item) == 1:
        domain = str(uuid4()) + '.' + item['tld']
        print(domain)
        
        bashCommand = "dig soa +noall +authority " + domain
        process = subprocess.Popen(bashCommand, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        out = str(process.stdout.read())
        
        print(out)
        
        if out.startswith("b'" + item['tld']):
            item['domain'] = domain

In [None]:
data_2 = data.copy()

In [None]:
# data = [{'tld': 'nl'}]
for datum in data:
    if not 'domain' in datum:
        print(datum)

In [None]:
df = pd.DataFrame(data)
df.loc[df.domain.isnull()].tld

In [None]:
payload = {
    "bill_to": "bickerkards@gmail.com",
    "is_oneoff": True,
    "definitions": [],
    "probes": []
}
    
definition = {
    "af":4,
    "query_class":"IN",
    "query_type":"A",
    "query_argument": "nlnetlabs.nl",
    "description":"Test getting probes",
    "use_probe_resolver":True,
    "resolve_on_probe":False,
    "set_nsid_bit":True,
    "protocol":"UDP",
    "udp_payload_size":512,
    "retry":0,
    "skip_dns_check":False,
    "include_qbuf":False,
    "include_abuf":True,
    "prepend_probe_id":False,
    "set_rd_bit":False,
    "set_do_bit":False,
    "set_cd_bit":False,
    "type":"dns",
    "is_public":True
}

In [None]:
probe_ids = [10262, 10287, 11040, 11429, 12515, 12873, 12956, 13623, 13728, 13769, 13788, 13799, 13804, 13805, 13810, 14237, 26057, 14564, 15156, 14691, 15594, 15799, 4205, 18131, 18195, 18691, 19326, 19740, 20111, 20353, 20493, 20531, 20621, 21003, 21035, 21122, 21251, 21345, 21703, 22286, 22695, 23031, 23085, 28240, 27972, 23697, 24807, 25011, 25148, 25323, 26936, 26378, 26627, 4155, 26823, 28355, 30676, 4829, 29006, 29183, 29405, 30225, 30324, 31201, 19306, 19634, 6025, 11660, 22388, 25182, 4123, 3812, 20923, 14384, 12389]

probes = [
    {
        "value": str(probe_ids)[1:-1],
        "type": "probes",
        "requested": len(probe_ids)
    }
]

In [None]:
payloads = []
step_size = 50
    
for i in range(0, len(data), step_size):
    defintions = []
    
    for datum in data[i:i + step_size]:
        definition_caching = definition.copy()
        definition_caching['query_type'] = "NS"
        definition_caching['query_argument'] = datum['tld']
        definition_caching['description'] = "caching " + datum['tld']
        defintions.append(definition_caching)

        definition_measuring = definition.copy()
        definition_measuring['query_type'] = "SOA"
        definition_measuring['query_argument'] = datum['domain']
        definition_measuring['description'] = "measuring " + datum['tld']
        defintions.append(definition_measuring)        

    new_payload = payload.copy()
    new_payload['probes'] = probes
    new_payload['definitions'] = defintions

    payloads.append(new_payload)

In [None]:
measurement_ids = []
measurement_responses = []
url = url_dns_measurements_create + '?key=' + atlas_api_key

In [None]:
with open('data/payloads', 'r') as f:
    payloads = json.loads(f.read())

In [None]:
# measurement_ids.append(requests.post(url, data = json.dumps(payloads[0]), headers = headers))
# [x['description'] for x in payloads[1]['definitions']]
# request.status_code
# measurement_ids
# len(payloads)
# request.json()
payloads[23]['definitions'][-1]

In [None]:
# split into 3
for payload in payloads[23:]:
    
    request = requests.post(url, data = json.dumps(payload), headers = headers)
    print(request.status_code)
    
    while request.status_code == 400:
        print(request.json())
        request = requests.post(url, data = json.dumps(payload), headers = headers)
        time.sleep(300)
        print(request.status_code)
    
    measurement_ids += measurement_ids + request.json()

In [None]:
for id in measurement_ids:
    print(requests.get(url_dns_measurements_get + '?id__in=' + str(id)).json())

In [None]:
# with open('data/measurement_ids', 'w') as f:
#     f.write(json.dumps(measurement_ids))

In [None]:
temp_url = url_dns_measurements_get + '?id__lte=' + str(max(measurement_ids)) + '&id__gte=' + str(min(measurement_ids)) + '&description__startswith=measuring&mine=true'

In [None]:
temp_url = url_dns_measurements_get + '?id__lte=' + str(max_meas_id) + '&id__gte=' + str(min_meas_id) + '&description__startswith=measuring&mine=true'

In [None]:
# # measurements = [requests.get(url_dns_measurements_get + '?id__in=' + str(id)).json() for id in measurement_ids]
# measurements = []
# pbar = tqdm(total=len(measurement_ids))

# for id in measurement_ids:
#     measurements.append(requests.get(url_dns_measurements_get + '?id__in=' + str(id)).json()) 
        
#     pbar.update(1)
# pbar.close()

In [None]:
# xyz = probe_ids.copy()

# for probe_idsd in [r['prb_id'] for r in request]:
#     xyz.remove(probe_id)

In [None]:
tlds = []

with open('data/tlds') as f:
#     next(f)
    
    for line in f:
        tlds.append(line.strip('\n').lower())

In [None]:
tld_timeouts

In [None]:
results = {}
xprobes = {}

next = True
measurements = requests.get(temp_url).json()

while next:
    for result in measurements['results']:
        measurement_type, tld = result['description'].split()

        if measurement_type == 'measuring':
            request = requests.get(result['result']).json()

            for probe in request:
                for result2 in probe['resultset']:
                    if 'result' in result2:
                        if tld in results:
                            results[tld].append(result2['result']['rt'])
                        else:
                            results[tld] = [result2['result']['rt']]
                    else:
                        print(tld, probe['prb_id'], result2['error'])
                        
                        if tld in tld_timeouts:
                            tld_timeouts[tld] += 1
                        else:
                            tld_timeouts[tld] = 1

                        if probe['prb_id'] in xprobes:
                            xprobes[probe['prb_id']] += 1
                        else:
                            xprobes[probe['prb_id']] = 1
                            
    if measurements['next']:
        print('\n' + measurements['next'].split('=')[-1] + '\n')
        measurements = requests.get(measurements['next']).json() 
    else:
        next = False
        print('Done')

In [10]:
!mkdir data/atlas
!mkdir data/atlas/ns
!mkdir data/atlas/soa

In [11]:
temp_url = url_dns_measurements_get + '?id__lte=' + str(max_meas_id) + '&id__gte=' + str(min_meas_id) + '&mine=true'

In [35]:
next_result = True
measurements = requests.get(temp_url).json()
pbar = tqdm(total=1531)
# print('1')

while next_result:
    for result in measurements['results']:
        if len(result['description'].split()) == 2:
            measurement_type, tld = result['description'].split()
            request = requests.get(result['result']).json()

            if measurement_type == 'measuring':
                with open('data/atlas/soa/{}.json'.format(tld.upper()), 'w') as f:
                    temp = copy.deepcopy(result)
                    temp['result'] = request

                    f.write(json.dumps(temp))

            elif measurement_type == 'caching':
                with open('data/atlas/ns/{}.json'.format(tld.upper()), 'w') as f:
                    temp = copy.deepcopy(result)
                    temp['result'] = request

                    f.write(json.dumps(temp))

        pbar.update(1)
    pbar.close()
    
    if measurements['next']:
#         print(measurements['next'].split('=')[-1])
        measurements = requests.get(measurements['next']).json() 
    else:
        next_result = False
        print('Done')


  0%|          | 0/1531 [00:00<?, ?it/s][A
  0%|          | 1/1531 [00:00<05:10,  4.92it/s][A
  0%|          | 2/1531 [00:00<05:05,  5.00it/s][A
  0%|          | 3/1531 [00:00<04:56,  5.15it/s][A
  0%|          | 4/1531 [00:00<04:41,  5.42it/s][A
  0%|          | 5/1531 [00:03<27:53,  1.10s/it][A
  0%|          | 6/1531 [00:04<21:40,  1.17it/s][A
  0%|          | 7/1531 [00:04<19:31,  1.30it/s][A
  1%|          | 8/1531 [00:05<16:34,  1.53it/s][A
  1%|          | 9/1531 [00:05<13:47,  1.84it/s][A
  1%|          | 10/1531 [00:05<11:49,  2.14it/s][A
  1%|          | 11/1531 [00:05<09:56,  2.55it/s][A
  1%|          | 12/1531 [00:06<08:49,  2.87it/s][A
  1%|          | 13/1531 [00:06<07:36,  3.33it/s][A
  1%|          | 14/1531 [00:06<07:00,  3.60it/s][A
  1%|          | 15/1531 [00:06<06:32,  3.86it/s][A
  1%|          | 16/1531 [00:07<05:58,  4.23it/s][A
  1%|          | 17/1531 [00:07<05:58,  4.23it/s][A
  1%|          | 18/1531 [00:07<05:45,  4.38it/s][A
  1%|     

KeyboardInterrupt: 

In [22]:
indir = 'data/atlas/soa/'

data_perf = []

for root, dirs, filenames in os.walk(indir):
    for f in filenames:
#         tld, _ = f.split('.')
        tld = f
        
        datum = {'tld': tld, 'rt': [], 'timeouts': 0}
        
        with open(indir + f, 'r') as f:
            tld_results = json.loads(f.read())
            
            for probe in tld_results['result']:
                for result in probe['resultset']:
                    if 'result' in result:
                        datum['rt'].append(result['result']['rt'])                    
                    elif 'error' in result and 'timeout' in result['error']:
                        datum['timeouts'] += 1
        
        datum['rt'] = np.mean(datum['rt'])
        data_perf.append(datum)

In [26]:
write_data('data_perf', data_perf)

In [None]:
# df2.sort_values('ntimeouts', ascending=False).head()
# df2.ntimeouts.hist(bins=8, align='right')

In [None]:
df2 = pd.DataFrame(list(tld_timeouts.items()), columns=['tld', 'ntimeouts'])
ax = df2.hist(bins=8, align='left', color='grey')

for a in ax:
    for b in a:
        b.set_xlabel("Number of timeouts")
        b.set_ylabel("Number of TLDs")
        b.set_title('')
        b.set_yscale('log')
        b.set_facecolor('lightgrey')
        fig = b.get_figure()
        fig.savefig("imgs/per_timeouts.pdf")

In [None]:
[x for x in tld_timeouts if tld_timeouts[x] > 7]

In [None]:
tld_timeouts['xn--ygbi2ammx']

In [None]:
c = 0
for tld in tld_timeouts:
    if tld_timeouts[tld] > 0:
        c+=1
c / len(tld_timeouts)

In [None]:
df2.loc[df2.ntimeouts > 5].sort_values('ntimeouts', ascending=False)

In [None]:
tld_timeouts['fk']

In [None]:
xdata = {'tld': [tld for tld in results], 'rt': [np.mean(results[tld]) for tld in results]}
df = pd.DataFrame(xdata, columns = ['tld', 'rt'])
df.index = df['tld']
del df['tld']

In [None]:
edata = {'tld': [tld for tld in results], 'rt': [np.std(results[tld]) for tld in results]}
dfe = pd.DataFrame(edata, columns = ['tld', 'rt'])
dfe.index = dfe['tld']
del dfe['tld']

In [None]:
ax = df.hist(bins=160, range=(0,1400))

for a in ax:
    for b in a:
        b.set_xlim(0,1400)
        b.set_xlabel("Response time (ms)")
        b.set_ylabel("Number of TLDs")
        b.set_title('')
        fig = b.get_figure()
        fig.savefig("imgs/per.pdf")

In [None]:
df.loc[df.rt > 600]

In [None]:
caching_ids = [response.json() for response in chaching_ids]

In [None]:
chaching_ids2 = []

for i in range(202,1531,200):
    print(i)
    for payload in payloads_caching[i:i + 200]:
        url = url_dns_measurements_create + '?key=' + atlas_api_key
        chaching_ids2.append(requests.post(url, data = json.dumps(payload), headers = headers))
    
    time.sleep(300)

In [None]:
# tmp_res = []

for item in chaching_ids2:
    if isinstance(item.json(), list):
        tmp_res.append((item.json()))

In [None]:
tmp_res2 = []

for i in tmp_res:
    for j in i:
        tmp_res2.append(j)

In [None]:
rs = []

for id in tmp_res2:
    rs.append(requests.get(url_dns_measurements_get + 'id__in=' + str(id)))

In [None]:
chaching_ids2[0].json()

In [None]:
tlds2 = tlds.copy()

for tld in deltlds:
    tlds2.remove(tld)

In [None]:
# measuring
for item in data:
    item['definitions']['query_type'] = "SOA"
    item['definitions']['query_argument'] = item['domain']
    item['definitions']['description'] = item['tld'] + ' measurement'
    
url = url_dns_measurements_create + '?key=' + atlas_api_key
measurement_ids = requests.post(url, data = json.dumps(payload), headers = headers)

In [None]:
for id in measurement ids.json():
    r = requests.get(url_dns_measurements_get + 'id__in=' + id)
    # s = requests.get(r.json()['result'])

In [27]:
import base64
import dns.message

x = dns.message.from_wire(base64.b64decode('rqaBgwABAAAAAQABJDc0NzAzMmVmLWNlOGUtNGY0Yy05OWY5LWU0MzMwNzM3MzhkZAJkZQAAAQABwDEABgABAAAVGAAoAWYDbmljwDEDaXRzBWRlbmljwDF4OcFRAAAcIAAAHCAANu6AAAAcIAAAKQ+gAAAAAAAA'))
print(x)

id 44710
opcode QUERY
rcode NXDOMAIN
flags QR RD RA
edns 0
payload 4000
;QUESTION
747032ef-ce8e-4f4c-99f9-e433073738dd.de. IN A
;ANSWER
;AUTHORITY
de. 5400 IN SOA f.nic.de. its.denic.de. 2017050961 7200 7200 3600000 7200
;ADDITIONAL


In [None]:
# help(x)

In [None]:
for i in range(0, len(data), 50):
    print(data[i]['tld'])

In [None]:
definition

In [None]:
probes

In [None]:
def find(lst, key, value):
    for i, dic in enumerate(lst):
        if dic[key] == value:
            return i
    return -1

In [None]:
with open('data/tld_type', 'r') as f:
    data_type = json.loads(f.read())

In [None]:
with open('data/tld_age', 'r') as f:
    data_age = json.loads(f.read())

In [None]:
data_rt = [{'tld': tld, 'rt': np.mean(results[tld])} for tld in results]

In [None]:
for datum in data_rt:
    ix = find(data_type, 'tld', datum['tld'])
    datum['type'] = data_type[ix]['type']

In [None]:
find(data_type, 'tld', 'aaa')

In [None]:
dft = pd.DataFrame(data_rt, columns = ['rt', 'tld', 'type'])
dft.index = dft['tld']
del dft['tld']

In [None]:
dft.loc[dft.type == 'country-code'].mean().rt, dft.loc[dft.type == 'generic'].mean().rt

In [None]:
dfa.loc[dfa.age == 'new'].mean().rt, dfa.loc[dfa.age == 'old'].mean().rt

In [None]:
df.mean().rt

In [None]:
nbins = 160

In [None]:
ax = dft.loc[dft.type == 'country-code'].hist('rt', 
                                              bins=nbins, 
                                              range=(0,1400),
                                              cumulative=False,
                                              align='mid',
                                              figsize=(6,4))

for a in ax:
    for b in a:
        b.set_xlim(0,1400)
        b.set_xlabel("Response time (ms)")
        b.set_ylabel("Number of TLDs")
        b.set_title('')
        fig = b.get_figure()
        fig.savefig("imgs/per_cctlds.pdf")

In [None]:
ax = dft.loc[dft.type == 'generic'].hist('rt', bins=nbins, range=(0,1400),cumulative=False,figsize=(6,4))

for a in ax:
    for b in a:
        b.set_xlim(0,1400)
        b.set_xlabel("Response time (ms)")
        b.set_ylabel("Number of TLDs")
        b.set_title('')
        fig = b.get_figure()
        fig.savefig("imgs/per_gtlds.pdf")

In [None]:
data_age[0]

In [None]:
for datum in data_age:
    print(datum['tld'])
    ix = find(data_rt, 'tld', datum['tld'].lower())
    datum['rt'] = data_rt[ix]['rt']

In [None]:
dfa = pd.DataFrame(data_age, columns = ['age', 'rt', 'tld'])
dfa.index = dfa['tld']
del dfa['tld']

In [None]:
ax = dfa.loc[dfa.age == 'new'].hist('rt', 
                                    bins=nbins, 
                                    range=(0,1400),
                                    cumulative=False)

for a in ax:
    for b in a:
        b.set_xlim(0,1400)
        b.set_xlabel("Response time (ms)")
        b.set_ylabel("Number of TLDs")
        b.set_title('')
        fig = b.get_figure()
        fig.savefig("imgs/per_new.pdf")

In [None]:
ax = dfa.loc[dfa.age == 'old'].hist('rt', 
                                    bins=nbins, 
                                    range=(0,1400),
                                    cumulative=False)

for a in ax:
    for b in a:
        b.set_xlim(0,1400)
        b.set_xlabel("Response time (ms)")
        b.set_ylabel("Number of TLDs")
        b.set_title('')
        fig = b.get_figure()
        fig.savefig("imgs/per_old.pdf")

In [None]:
indir = 'data/ripe/soa/'
prb_rt = {}

for root, dirs, filenames in os.walk(indir):
    for f in filenames:
        tld, _ = f.split('.')
        
        with open(indir + f, 'r') as f:
            tld_results = json.loads(f.read())
            
            for probe in tld_results['result']:
                prb_id = probe['prb_id']
                
                for result in probe['resultset']:
                    if 'result' in result:
                        if prb_id in prb_rt:
                            prb_rt[prb_id].append(result['result']['rt'])
                        else:
                            prb_rt[prb_id] = [result['result']['rt']]

In [None]:
prb_data = [{'probe': probe, 'rt': np.mean(prb_rt[probe])} for probe in prb_rt]

In [None]:
df = pd.DataFrame(prb_data, columns = ['probe', 'rt'])
df.index = df['probe']
del df['probe']

In [None]:
df.plot.bar(figsize=(18,9))

In [None]:
r = requests.get('https://atlas.ripe.net/api/v2/probes/?id__in=1' + str([probe for probe in prb_rt])[1:-1])

In [None]:
probe_cc = {}

for v in r.json()['results']:
    probe_cc[v['id']] = v['country_code']

In [None]:
xyz = []

for datum in prb_data:
    if datum['probe'] in probe_cc:
        xyz.append({'cc': probe_cc[datum['probe']], 'rt': datum['rt']})
#         datum['probe'] = probe_cc[datum['probe']]

In [None]:
with open('data/cc_rt', 'r') as f:
    xyz = json.loads(f.read())

In [None]:
xyz2 = {}

for x in xyz:
    if x['cc'] in xyz2:
        xyz2[x['cc']][0] = x['rt'] + xyz2[x['cc']][0] / 2
        xyz2[x['cc']][1] += 1
    else: 
        xyz2[x['cc']] = [x['rt'], 1]

In [None]:
xyz = [{'cc': x + ' (' + str(xyz2[x][1]) + ')', 'rt': xyz2[x][0]} for x in xyz2]

In [None]:
dfcc = pd.DataFrame(xyz, columns = ['cc', 'rt'])
dfcc.index = dfcc['cc']
del dfcc['cc']

In [None]:
# dfcc

In [None]:
dfcc.sort_values('rt', ascending=False).plot.bar(figsize=(12,7))

In [None]:
f = read_list('data/dig/tld_nss')
data = {}

for line in f:
    tld, _, _, _, ns = line.split()
    tld = tld.strip('.')

    if tld != '.':
        if tld in data:
            data[tld] += 1
        else:
            data[tld] = 1

In [None]:
ns_data = {}

for i in range(0,16):
    ns_data[i] = []

In [None]:
for tld in results:
    try:
        ns_data[data[tld]].append({'tld': tld, 'rt': np.mean(results[tld])})
    except:
        print(tld)

In [None]:
for i in range(15,16):
    if ns_data[i] == []:
        del ns_data[i]

In [None]:
# bins = []

# for i in ns_data:
#     dfxyz = pd.DataFrame(ns_data[i])
#     bins.append(dfxyz.rt.nunique())
    
# bins = min(bins)

for i in ns_data:
    dfxyz = pd.DataFrame(ns_data[i])
    ax = dfxyz.hist('rt', bins=32, range=(0,1400))

    for a in ax:
        for b in a:
            b.set_xlim(0,1400)
            b.set_xlabel("Response time (ms)")
            b.set_ylabel("Number of TLDs")
            b.set_title('Average response times of TLDs with ' + str(i) + ' name servers')
            fig = b.get_figure()
            fig.savefig("imgs/per_ns_" + str(i) + ".png")

In [None]:
tld_orgs = []
import os
indir = 'data/whois/'

for root, dirs, filenames in os.walk(indir):
    tld_orgs = [{'tld': tld, 'organisations': []} for tld in filenames]
    
    for fn in filenames:
        with open(indir + fn, 'r') as f:
            for line in f:
                if line.startswith('organisation'):
                    _, org = line.split('rganisation: ')
                    i = find(tld_orgs, 'tld', fn)
                    tld_orgs[i]['organisations'].append(org.strip('\n'))
    
# tld_creation

In [None]:
org_tlds = {}

for item in tld_orgs:
    for org in item['organisations']:
        if org in org_tlds:
            org_tlds[org].append(item['tld'].lower())
        else:
            org_tlds[org] = [item['tld'].lower()]

In [None]:
org_tlds_50 = [i for i in org_tlds if len(org_tlds[i]) >= 50]
org_tlds_100 = [i for i in org_tlds if len(org_tlds[i]) >= 100]

In [None]:
donut_tlds = [i['tld'].lower() for i in tld_orgs if ' Donuts Inc.' in i['organisations']]

In [None]:
org_results = {}

for org in org_tlds_50:
    for tld in results:
        if tld in org_tlds[org]:
            v = {'tld': tld, 'rt': np.mean(results[tld])}

            if org in org_results:
                org_results[org].append(v)
            else:
                org_results[org] = [v]

In [None]:
len(donut_tlds), len(results_donut)

In [None]:
bins = []

for i in org_results:
    dfxyz = pd.DataFrame(org_results[i])
    bins.append(dfxyz.rt.nunique())
    
bins = min(bins)

for i in org_results:
    dfxyz = pd.DataFrame(org_results[i])
    ax = dfxyz.hist(bins=160, range=(0,1400),cumulative=False)

    for a in ax:
        for b in a:
            b.set_xlim(0,1400)
            b.set_xlabel("Response time (ms)")
            b.set_ylabel("Number of TLDs")
            b.set_title('Average response times of TLDs organised by ' + str(i))
            fig = b.get_figure()
            fig.savefig("imgs/per_org_" + str(i) + ".png")

In [None]:
len([i['rt'] for i in org_results['Afilias']]), len(xdata['rt'])

In [None]:
dftest = pd.DataFrame(xdata)
dftest.plot.hist(stacked=True, bins=153, range=(0,1400),cumulative=True)

In [None]:
# [i for i in tld_orgs if 'Neustar, Inc.' in i['organisations']]