In [130]:
import csv
from netaddr import *
import urllib, urllib2, json
import os, glob
from collections import defaultdict
import multiprocessing
from bs4 import BeautifulSoup
import wget
from tinydb import TinyDB, Query

BGP_LOOKING_GLASS_URL="https://stat.ripe.net/data/looking-glass/data.json?resource="
URL_IRREXPLORER = "http://irrexplorer.nlnog.net/json/prefix/"
IPSETS_PATH = "data/ipsets/subset/"
IPRESOURCES_PATH = "ftp://ftp.afrinic.net/stats/afrinic/delegated-afrinic-extended-latest"
BOGON_URL = "https://www.cidr-report.org/as2.0/#Bogons"
BOGUS_AS_URL = "https://www.cidr-report.org/as2.0/bogus-as-advertisements.html"

#load bogons prefixes
def fetchBogonPrefixes(url):
    try:
        res = urllib2.urlopen(url)
    except Exception, e:
        print e
    
    mydict = {}
    
    soup = BeautifulSoup(res, "html.parser")
    uls = soup.findAll("ul")
    myul = None
    for ul in uls:
        h3 = ul.find('h3')
        if (h3 and h3.text == "Possible Bogus Routes"):
            myul = ul
            break
    
    bogons = []
    for row in myul.findAll('tr'):
        myrow = []
        for td in row:
            if td.text:
                myrow.append(td.text)
            elif td.find('a'):
                myrow.append(td.find('a').text)
            else:
                myrow.append("NULL")
        bogons.append(myrow)
    
    return bogons

#load bogons prefixes
def fetchBogonAS(url):
    try:
        res = urllib2.urlopen(url)
    except Exception, e:
        print e
        
    soup = BeautifulSoup(res, "html.parser")
    trs = soup.findAll('tr')
    
    bogons = []
    for tr in trs:
        row = []
        for td in tr.findAll('td'):
            row.append(td.text)
        
        bogons.append(row)
    
    return bogons
      
#get the number of objects found in the IRR databases
def findIRRObjects(prefix):
    url = URL_IRREXPLORER + prefix
    response = urllib2.urlopen(url)
    data = json.loads(response.read())
    return len(data)

def findBLIPs(filename):
    with open(filename) as fp:
        for line in fp:
            if line.startswith("#"):
                continue
            ipn = IPNetwork(line)
            s = IPSet(ipn)
            if afrinicPrefixes.intersection(s):
                print (ipn, filename)

def loadAFRINICResources(filepath, db):
    
    asn = db.table('asn')
    ipv4 = db.table('ipv4')
    ipv6 = db.table('ipv6')
    
    ranges = IPSet()
    
    with open(filepath) as csvfile:
        reader = csv.reader(csvfile, delimiter='|')
        
        #skip header
        next(csvfile, None)
        next(csvfile, None)
        next(csvfile, None)
        next(csvfile, None)
        
        for p in reader:
            
            cc = p[1]
            rtype = p[2]
            resource = p[3]
            prefix_length = p[4]
            reg_date = p[5]
            status = p[6]
            
            if (rtype == 'asn'):
                asn.insert({'as': resource, 'cc': cc, 'reg_date': reg_date, 'status': status })
            elif (rtype == 'ipv4'):
                startip = IPAddress(resource)
                endipint = int(startip) + int(prefix_length) - 1
                endip = IPAddress(endipint)
                iprange = IPRange(startip, endip)
                ipv4.insert({'prefix': str(iprange.cidrs()[0]), 'cc': cc, 'reg_date': reg_date, 'status': status })
            elif (rtype == 'ipv6'):
                ipv6.insert({'prefix': resource + "/" + prefix_length  , 'cc': cc, 'reg_date': reg_date, 'status': status })
 
                
def loadAFRINICPrefixes(filepath):
    ranges = IPSet()
    with open(filepath) as csvfile:
        reader = csv.reader(csvfile, delimiter='|')
        for p in reader:
            prefix = p[3]
            prefixlength = p[4]
            startip = IPAddress(prefix)
            endipint = int(startip) + int(prefixlength) -1
            endip = IPAddress(endipint)
            range = IPRange(startip, endip)
            ranges.add(range.cidrs()[0])
    return ranges


In [131]:
db = TinyDB('data/db.json')
#resources = db.table('resources')
#bogus_as = db.table('bogus_as')
#bogus_prefixes = db.table('bogus_prefixes')

In [None]:
#print "downloading delegated file"
#urllib.urlcleanup()
#urllib.urlretrieve(IPRESOURCES_PATH, "data/delegated.txt")

#print "loading AFRINIC prefixes"
loadAFRINICResources('data/delegated.txt', db)

db.ipv4.all()


#print "fetching bogon prefixes"
#bogonPrefixes = fetchBogonPrefixes(BOGON_URL)

#print "fetching bogus ASs"
#bogonASs = fetchBogonAS(BOGUS_AS_URL)