In [3]:
import pandas as pd
import json

def xls_to_json(file_path, output_path):
    # 读取Excel文件
    df = pd.read_excel(file_path)
    
    # 将每一行的数据转换为JSON对象，并存储在一个列表中
    data = df.to_dict(orient='records')
    
    # 将数据写入到JSON文件中
    with open(output_path, 'w', encoding='utf-8') as json_file:
        json.dump(data, json_file, ensure_ascii=False, indent=4)

# 使用示例
file_path = '/Users/hugo/Projects/NLGraph/NLGraph/BGP/raw_data/bgpleak.xls'
output_path = '/Users/hugo/Projects/NLGraph/NLGraph/BGP/raw_data/bgpleak.json'
xls_to_json(file_path, output_path)

In [8]:
import json
from collections import defaultdict

def process_as_path(as_path):
    # 将as_path字符串按空格分隔
    as_numbers = as_path.strip().split()
    
    # 去除连续重复的对象
    processed_as_numbers = []
    prev_number = None
    for number in as_numbers:
        if number != prev_number:
            processed_as_numbers.append(number)
        prev_number = number
    
    # 将分隔符从空格改为"-"
    return '-'.join(processed_as_numbers)

def process_and_separate_json(input_path, output_directory):
    # 读取JSON文件
    with open(input_path, 'r', encoding='utf-8') as json_file:
        data = json.load(json_file)
    
    length_to_data = defaultdict(list)

    # 处理每个对象的as_path字段
    for obj in data:
        if 'as_path' in obj:
            obj['as_path'] = process_as_path(obj['as_path'])
            as_path_length = len(obj['as_path'].split('-'))
            length_to_data[as_path_length].append(obj)
    
    # 将按长度分类后的数据写回到不同的JSON文件
    for length, objects in length_to_data.items():
        output_path = f"{output_directory}/as_path_length_{length}.json"
        with open(output_path, 'w', encoding='utf-8') as json_file:
            json.dump(objects, json_file, ensure_ascii=False, indent=4)

    # 打印每个长度对应的对象个数
    for length, objects in length_to_data.items():
        print(f"Number of objects with as_path length {length}: {len(objects)}")

# 使用示例
input_path = '/Users/hugo/Projects/NLGraph/NLGraph/BGP/raw_data/bgpleak.json'            # 替换为你的输入JSON文件路径
output_directory = '/Users/hugo/Projects/NLGraph/NLGraph/BGP/filtered_data/bglleak_different_length'               # 替换为你希望输出的目录路径
process_and_separate_json(input_path, output_directory)

Number of objects with as_path length 11: 157
Number of objects with as_path length 12: 138
Number of objects with as_path length 9: 130
Number of objects with as_path length 10: 163
Number of objects with as_path length 14: 51
Number of objects with as_path length 16: 14
Number of objects with as_path length 7: 84
Number of objects with as_path length 8: 106
Number of objects with as_path length 13: 90
Number of objects with as_path length 15: 21
Number of objects with as_path length 19: 2
Number of objects with as_path length 6: 54
Number of objects with as_path length 17: 2
Number of objects with as_path length 18: 2


In [None]:
import asyncio
from gql import Client, gql
from gql.transport.aiohttp import AIOHTTPTransport
from ipaddress import ip_network
from tqdm.asyncio import tqdm  # 使用tqdm的asyncio支持版本

# 定义GraphQL查询模板
query_template = """
{
  asn(asn:"{asn}") {
      asnDegree {
         transit
      }
   }
}
"""

# 创建一个GraphQL客户端
transport = AIOHTTPTransport(url="https://api.asrank.caida.org/v2/graphql")
client = Client(transport=transport, fetch_schema_from_transport=True)

# 异步发送GraphQL查询并处理响应
async def fetch_validation_state(prefix, asn):
    query = gql(query_template.format(prefix=prefix, asn=asn))
    try:
        response = await client.execute_async(query)
        state = response['data']['validation']['state'].lower()
    except Exception as e:
        state = 'notfound'
    result = f"{prefix},{asn},{state}"
    return result

# 判断IP地址是IPv4还是IPv6
def ip_version(prefix):
    return ip_network(prefix, strict=False).version

# 主函数：读取文件，发送请求，并将结果保存到相应文件
async def main():
    ipv4_results = []
    ipv6_results = []

    with open('/home/Phoenix/data/prefix2as20240325.data', 'r') as file:
        lines = file.readlines()

    async for line in tqdm(lines, desc="Processing prefixes"):
        prefix, asn = line.strip().split('-')
        result = await fetch_validation_state(prefix, asn)
        if ip_version(prefix) == 4:
            ipv4_results.append(result)
        else:
            ipv6_results.append(result)

    with open('/home/Phoenix/data/cloudflare_20240325_ipv4.data', 'w') as ipv4_file:
        for result in ipv4_results:
            ipv4_file.write(result + '\n')

    with open('/home/Phoenix/data/cloudflare_20240325_ipv6.data', 'w') as ipv6_file:
        for result in ipv6_results:
            ipv6_file.write(result + '\n')

# 运行主函数
if __name__ == "__main__":
    asyncio.run(main())