In [1]:
import pandas as pd
import numpy as np
import copy
import os
import re
from netaddr import *
import ipaddress
import time



### df読み込み

In [25]:
# route information
fr_file = "fullroute_20191030.0000"
dtyp = {"Peer ASN (ASN of the monitor)": "int16"}
rcc06_df = pd.read_csv(fr_file, delimiter = "|", names = ("BGP Protocol", "timestamp", "W/A/B", "Peer IP(address of the monitor)", "Peer ASN (ASN of the monitor)", "Prefix", "ASPath", "Origin Protocol"), dtype=dtyp)
# jp addresses
jp_file = "jp_cidr"
jp_df = pd.read_csv(jp_file, delimiter = "\t", names = ("cc (JP)", "IP block"))
# unallocated jp addresses
jpnic_pooled = "unallocated-jpnic-20191101.txt"
pooled_df = pd.read_csv(jpnic_pooled, header = None, names = ("IP block",))

### /8ごとに経路とjpを比較し，共通部分をとる (保存処理を書いていないので次回以降実装)

In [3]:
# fr_file = "fullroute_20191030.0000"
# dtyp = {"Peer ASN (ASN of the monitor)": "int16"}
# rcc06_df = pd.read_csv(fr_file, delimiter = "|", names = ("BGP Protocol", "timestamp", "W/A/B", "Peer IP(address of the monitor)", "Peer ASN (ASN of the monitor)", "Prefix", "ASPath", "Origin Protocol"), dtype=dtyp)

del rcc06_df["BGP Protocol"], rcc06_df["timestamp"], rcc06_df["W/A/B"], \
    rcc06_df["Peer IP(address of the monitor)"], rcc06_df["Peer ASN (ASN of the monitor)"], \
    rcc06_df["ASPath"], rcc06_df["Origin Protocol"]

start = time.time()

scope_blocks = []
# /8ごとにjp_dfを切り取って回る
for i in range(1, 256):
    # i.***.***.***/?となる部分だけを拾う
    head = str(i) + "{1}\."
    jp_section = jp_df.applymap(lambda x: bool(re.match(head, x)))
    jp_sub_table = jp_df[jp_section]["IP block"].dropna()
    
    rcc_section = rcc06_df.applymap(lambda x: bool(re.match(head, x)))
    rcc_sub_table = rcc06_df[rcc_section]["Prefix"].dropna()
    
    scope_raw = set()
    for rcc_row in rcc_sub_table:
        n1 = IPSet()
        n1.add(IPNetwork(rcc_row))
    
        for jp_row in jp_sub_table:
            n2 = IPSet()
            n2.add(IPNetwork(jp_row))
            # n1とn2の共通部分
            intersection = n1 & n2
            scope_raw.add(str(intersection))
            
    scope = list(scope_raw)
    blocks = [x.replace("IPSet(['", "").replace("'])", "") for i, x in enumerate(scope_raw) if x != "IPSet([])"]
    scope_blocks.append(blocks) 
    print(f"{i}.0.0.0/8: {len(blocks)} blocks are in scope")

elapsed = time.time() - start

print(f"elapsed time: {elapsed/(60*60)} [h]")

1.0.0.0/8: 285 blocks are in scope
2.0.0.0/8: 0 blocks are in scope
3.0.0.0/8: 0 blocks are in scope
4.0.0.0/8: 0 blocks are in scope
5.0.0.0/8: 4 blocks are in scope
6.0.0.0/8: 0 blocks are in scope
7.0.0.0/8: 0 blocks are in scope
8.0.0.0/8: 0 blocks are in scope
9.0.0.0/8: 0 blocks are in scope
10.0.0.0/8: 0 blocks are in scope
11.0.0.0/8: 0 blocks are in scope
12.0.0.0/8: 0 blocks are in scope
13.0.0.0/8: 0 blocks are in scope
14.0.0.0/8: 32 blocks are in scope
15.0.0.0/8: 0 blocks are in scope
16.0.0.0/8: 0 blocks are in scope
17.0.0.0/8: 0 blocks are in scope
18.0.0.0/8: 0 blocks are in scope
19.0.0.0/8: 0 blocks are in scope
20.0.0.0/8: 0 blocks are in scope
21.0.0.0/8: 0 blocks are in scope
22.0.0.0/8: 0 blocks are in scope
23.0.0.0/8: 1 blocks are in scope
24.0.0.0/8: 2 blocks are in scope
25.0.0.0/8: 0 blocks are in scope
26.0.0.0/8: 0 blocks are in scope
27.0.0.0/8: 200 blocks are in scope
28.0.0.0/8: 0 blocks are in scope
29.0.0.0/8: 0 blocks are in scope
30.0.0.0/8: 0 bloc

### 次にその中でもEUへ未割り当てのものをとる; 共通部分をとる

In [150]:
sus_raw = set()
# regular expression of an ip address
ip_regex = "\d+\.\d+\.\d+\.\d+"

for block in scope_blocks:
    n1 = IPSet()
    n1.add(IPNetwork(block))
    for p_block in pooled_df.itertuples():
        n2 = IPSet()
        if ("-" in (p_block[1])): # *** - *** のような範囲での記述のエントリがある
            match = re.findall(ip_regex, p_block[1]) # returns a list of matched parts
            s1 = IPSet()
            s1.add(IPRange(match[0], match[1]))
            # n1とs1の共通部分
            intersection = n1 & s1
        else:
            n2.add(IPNetwork(str(p_block[1])))
            # n1とn2の共通部分
            intersection = n1 & n2
        sus_raw.add(str(intersection))

sus = list(sus_raw)
# 本来は正規表現でcidr部分だけを抽出したい
sus_blocks = [x.replace("IPSet(['", "").replace("'])", "") for i, x in enumerate(sus) if x != "IPSet([])"]
print(f"{len(sus_blocks)} blocks found")

0 blocks found


In [None]:
sus_raw = set()
# regular expression of an ip address
ip_regex = "\d+\.\d+\.\d+\.\d+"

for blocks in scope_blocks:
    for bl in blocks:
        n1 = IPSet()
        n1.add(IPNetwork(bl))
        for p_block in pooled_df.itertuples():
            n2 = IPSet()
            if ("-" in (p_block[1])): # *** - *** のような範囲での記述のエントリがある
                match = re.findall(ip_regex, p_block[1]) # returns a list of matched parts
                s1 = IPSet()
                s1.add(IPRange(match[0], match[1]))
                # n1とs1の共通部分
                intersection = n1 & s1
            else:
                n2.add(IPNetwork(str(p_block[1])))
                # n1とn2の共通部分
                intersection = n1 & n2
            sus_raw.add(str(intersection))

sus = list(sus_raw)
cidr_regex = "\d+\.\d+\.\d+\.\d+/\d+"
sus_blocks = [re.findall(cidr_regex, x) for i, x in enumerate(sus) if x != "IPSet([])"]
print(sus_blocks)

# 試験的にマッチしそうなところだけ拾って実行

In [26]:
# fr_file = "fullroute_20191030.0000"
# dtyp = {"Peer ASN (ASN of the monitor)": "int16"}
# rcc06_df = pd.read_csv(fr_file, delimiter = "|", names = ("BGP Protocol", "timestamp", "W/A/B", "Peer IP(address of the monitor)", "Peer ASN (ASN of the monitor)", "Prefix", "ASPath", "Origin Protocol"), dtype=dtyp)

del rcc06_df["BGP Protocol"], rcc06_df["timestamp"], rcc06_df["W/A/B"], \
    rcc06_df["Peer IP(address of the monitor)"], rcc06_df["Peer ASN (ASN of the monitor)"], \
    rcc06_df["ASPath"], rcc06_df["Origin Protocol"]

start = time.time()

scope_blocks = []
# /8ごとにjp_dfを切り取って回る とりあえず実験に1-5だけ
for i in range(202, 203):
    # i.***.***.***/?となる部分だけを拾う
    head = str(i) + "{1}\."
    jp_section = jp_df.applymap(lambda x: bool(re.match(head, x)))
    jp_sub_table = jp_df[jp_section]["IP block"].dropna()
    
    rcc_section = rcc06_df.applymap(lambda x: bool(re.match(head, x)))
    rcc_sub_table = rcc06_df[rcc_section]["Prefix"].dropna()
    
    scope_raw = set()
    for rcc_row in rcc_sub_table:
        n1 = IPSet()
        n1.add(IPNetwork(rcc_row))
    
        for jp_row in jp_sub_table:
            n2 = IPSet()
            n2.add(IPNetwork(jp_row))
            # n1とn2の共通部分
            intersection = n1 & n2
            scope_raw.add(str(intersection))
            
    scope = list(scope_raw)
    blocks = [x.replace("IPSet(['", "").replace("'])", "") for i, x in enumerate(scope_raw) if x != "IPSet([])"]
    scope_blocks.append(blocks) 
    print(f"{i}.0.0.0/8: {len(blocks)} blocks are in scope")

elapsed = time.time() - start

print(f"elapsed time: {elapsed/(60*60)} [h]")

202.0.0.0/8: 1529 blocks are in scope
elapsed time: 0.28721652739577824 [h]


In [31]:
sus_raw = set()
# regular expression of an ip address
ip_regex = "\d+\.\d+\.\d+\.\d+"

for blocks in scope_blocks:
    for bl in blocks:
        n1 = IPSet()
        n1.add(IPNetwork(bl))
        for p_block in pooled_df.itertuples():
            n2 = IPSet()
            if ("-" in (p_block[1])): # *** - *** のような範囲での記述のエントリがある
                match = re.findall(ip_regex, p_block[1]) # returns a list of matched parts
                s1 = IPSet()
                s1.add(IPRange(match[0], match[1]))
                # n1とs1の共通部分
                intersection = n1 & s1
            else:
                n2.add(IPNetwork(str(p_block[1])))
                # n1とn2の共通部分
                intersection = n1 & n2
            sus_raw.add(str(intersection))

sus = list(sus_raw)
cidr_regex = "\d+\.\d+\.\d+\.\d+/\d+"
sus_blocks = [re.findall(cidr_regex, x) for i, x in enumerate(sus) if x != "IPSet([])"]
print(sus_blocks)

[['202.233.154.0/24']]


In [183]:
# pandasの表示行数の制限を変える
pd.options.display.max_rows = 3000

rcc06_dfのindex 127316 - 210960はIPv6 これ以外にもありそう

In [117]:
rcc06_df[127315:210961][["Prefix"]]

Unnamed: 0,Prefix
127315,32.0.0.0/8
127316,2001::/32
127317,2001::/32
127318,2001::/32
127319,2001:4:112::/48
...,...
210956,240f::/24
210957,240f:100::/24
210958,240f:100::/24
210959,240f:100::/24


## 元のデータフレーム

#### 経路情報

In [181]:
rcc06_df

Unnamed: 0,BGP Protocol,timestamp,W/A/B,Peer IP(address of the monitor),Peer ASN (ASN of the monitor),Prefix,ASPath,Origin Protocol
0,TABLE_DUMP2,10/30/19 00:00:00,B,202.249.2.169,2497,1.0.0.0/24,2497 13335,IGP
1,TABLE_DUMP2,10/30/19 00:00:00,B,202.249.2.185,25152,1.0.0.0/24,25152 2914 13335,IGP
2,TABLE_DUMP2,10/30/19 00:00:00,B,202.249.2.20,4777,1.0.0.0/24,4777 13335,IGP
3,TABLE_DUMP2,10/30/19 00:00:00,B,202.249.2.169,2497,1.0.4.0/22,2497 4826 38803 56203,IGP
4,TABLE_DUMP2,10/30/19 00:00:00,B,202.249.2.185,25152,1.0.4.0/22,25152 6939 4826 38803 56203,IGP
...,...,...,...,...,...,...,...,...
2571504,TABLE_DUMP2,10/30/19 00:00:00,B,202.249.2.185,25152,223.255.253.0/24,25152 2914 4134 58519,IGP
2571505,TABLE_DUMP2,10/30/19 00:00:00,B,202.249.2.20,4777,223.255.253.0/24,4777 2516 4134 58519,IGP
2571506,TABLE_DUMP2,10/30/19 00:00:00,B,202.249.2.169,2497,223.255.254.0/24,2497 7473 3758 55415,IGP
2571507,TABLE_DUMP2,10/30/19 00:00:00,B,202.249.2.185,25152,223.255.254.0/24,25152 6939 4657 55415 55415 55415 55415 55415 ...,IGP


#### jpnicに割り振られたもの

In [72]:
jp_df

Unnamed: 0,cc (JP),IP block
0,JP,1.0.16.0/20
1,JP,1.0.64.0/18
2,JP,1.1.64.0/18
3,JP,1.5.0.0/16
4,JP,1.21.0.0/16
...,...,...
2852,JP,223.223.160.0/21
2853,JP,223.223.208.0/21
2854,JP,223.223.224.0/19
2855,JP,223.252.64.0/19


#### jpnicに割り振られたが，EUへは未割り当てのもの

In [147]:
pooled_df

Unnamed: 0,IP block
0,61.122.4.0/22
1,61.122.8.0/21
2,61.122.20.0/22
3,61.122.24.0/21
4,61.122.96.0/22
5,61.122.100.0/22
6,61.122.104.0/21
7,61.122.192.0/20
8,61.125.176.0/22
9,61.125.180.0/22
