In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd drive/MyDrive/Thesis Code

Mounted at /content/drive
/content/drive/MyDrive/Thesis Code


In [2]:
import json
import random
from pathlib import Path

**AS Rank and AS Customer Cone**

In [None]:
as_rank_eval_dataset_path = 'evaluations/asrank/as_rank_dataset.json'
with open(as_rank_eval_dataset_path, 'r') as f:
    asn_data = json.load(f)

# Choosing the first 50 elements for now
asn_data = dict(list(asn_data.items())[:50])

In [None]:
as_cone_query = lambda asn: f"What is the size of the customer cone of AS{asn}?"
as_rank_query = lambda asn: f"What is the rank of AS{asn}?"

In [None]:
rank_qas = [
    {
        "question": as_rank_query(asn),
        "answer": f"The rank of AS{asn} is: {details['rank']}",
    }
    for asn, details in asn_data.items()
]

cone_qas = [
    {
        "question": as_cone_query(asn),
        "answer": f"The customer cone size of AS{asn} is: {details['cone']}",
    }
    for asn, details in asn_data.items()
]

In [None]:
rank_file = Path("as_rank_qas.json")
cone_file = Path("as_cone_qas.json")

with rank_file.open("w", encoding="utf-8") as f:
    json.dump(rank_qas, f, indent=2)

with cone_file.open("w", encoding="utf-8") as f:
    json.dump(cone_qas, f, indent=2)

print(f"Generated {len(rank_qas)} rank Q&A pairs → {rank_file}")
print(f"Generated {len(cone_qas)} cone Q&A pairs → {cone_file}")

**Bogons**

In [None]:
bogons_eval_dataset_path = 'evaluations/bogons/bogons.json'
with open(bogons_eval_dataset_path, 'r') as f:
    bogons_data = json.load(f)

In [None]:
bogons_data = dict(list(bogons_data.items())[:50])

In [None]:
bogons_query = lambda prefix: f"Is the following IP Address a bogon:{prefix}?"

In [None]:
bogons_qas = [
    {
        "question": bogons_query(prefix),
        "answer": f"The prefix {prefix} is {details.lower()}",
    }
    for prefix, details in bogons_data.items()
]

In [None]:
bogons_file = Path("bogons_qas.json")

with bogons_file.open("w", encoding="utf-8") as f:
    json.dump(bogons_qas, f, indent=2)

print(f"Generated {len(bogons_qas)} rank Q&A pairs → {bogons_file}")

**AS Organization**

In [3]:
orgs_eval_dataset_path = 'evaluations/as2org/orgs_dataset.json'
with open(orgs_eval_dataset_path, 'r') as f:
    orgs_data = json.load(f)

as2org_eval_dataset_path = 'evaluations/as2org/as2org.json'
with open(as2org_eval_dataset_path, 'r') as f:
    as2org_data = json.load(f)

orgs_data = dict(list(orgs_data.items())[:50])
as2org_data = dict(list(as2org_data.items())[:50])

In [4]:
largest_as2org_query = lambda org: f"What is ASN of the largest AS which {org} owns?"
num_of_as2org_query = lambda org: f"What is the number of ASes which {org} owns?"
as2org_query = lambda asn: f"Which organization owns AS{asn}?"

In [5]:
largest_as2org_qas = [
    {
        "question": largest_as2org_query(org),
        "answer": f"The largest AS in {org} is: {details['largest_asn']}",
    }
    for org, details in orgs_data.items()
]

num_of_as2org_qas = [
    {
        "question": num_of_as2org_query(org),
        "answer": f"The number of ASes in {org} is: {details['num_asns']}",
    }
    for org, details in orgs_data.items()
]

as2org_qas = [
    {
        "question": as2org_query(asn),
        "answer": f"AS{asn} is owned by: {details['Names'][0]}",
    }
    for asn, details in as2org_data.items()
]

In [6]:
largest_as2orgs_file = Path("largest_as2org_qas.json")
num_of_as2org_file = Path("num_of_as2org_qas.json")
as2org_file = Path("as2org_qas.json")

with largest_as2orgs_file.open("w", encoding="utf-8") as f:
    json.dump(largest_as2org_qas, f, indent=2)

with num_of_as2org_file.open("w", encoding="utf-8") as f:
    json.dump(num_of_as2org_qas, f, indent=2)

with as2org_file.open("w", encoding="utf-8") as f:
    json.dump(as2org_qas, f, indent=2)

print(f"Generated {len(largest_as2org_qas)} largest AS in organization Q&A pairs → {largest_as2orgs_file}")
print(f"Generated {len(num_of_as2org_qas)} number of ASes in organization pairs → {num_of_as2org_file}")
print(f"Generated {len(as2org_qas)} AS2Org Q&A pairs → {as2org_file}")

Generated 50 largest AS in organization Q&A pairs → largest_as2org_qas.json
Generated 50 number of ASes in organization pairs → num_of_as2org_qas.json
Generated 50 AS2Org Q&A pairs → as2org_qas.json


**AS Class Type Inference**

In [None]:
as_type_eval_dataset_path = 'evaluations/as_class_type/as_type_dataset.json'
with open(as_type_eval_dataset_path, 'r') as f:
    as_type_data = json.load(f)

In [None]:
as_type_data = dict(random.sample(list(as_type_data.items()), 50))

In [None]:
as_class_type_query = lambda asn: f"What is the AS type of AS{asn}?"

In [None]:
as_class_type_qas = [
    {
        "question": as_class_type_query(asn),
        "answer": f"The class type of AS{asn} is {details}",
    }
    for asn, details in as_type_data.items()
]

In [None]:
as_class_type_qas_file = Path("as_class_type_qas.json")
with as_class_type_qas_file.open("w", encoding="utf-8") as f:
    json.dump(as_class_type_qas, f, indent=2)

print(f"Generated {len(as_class_type_qas)} rank Q&A pairs → {as_class_type_qas_file}")

**AS Tier Inference**

In [None]:
as_tiers_eval_dataset_path = 'evaluations/as_tier/tiers.json'
with open(as_tiers_eval_dataset_path, 'r') as f:
    as_tiers_data = json.load(f)

In [None]:
as_tiers_data = dict(random.sample(list(as_tiers_data.items()), 50))

In [None]:
as_tier_query = lambda asn: f"What is the tier of AS{asn}?"

In [None]:
as_tier_qas = [
    {
        "question": as_tier_query(asn),
        "answer": f"The tier of AS{asn} is: {tier}",
    }
    for asn, tier in as_tiers_data.items()
]

In [None]:
as_tier_qas_file = Path("as_tier_qas.json")
with as_tier_qas_file.open("w", encoding="utf-8") as f:
    json.dump(as_tier_qas, f, indent=2)

print(f"Generated {len(as_tier_qas)} AS Tier Q&A pairs → {as_tier_qas_file }")

Generated 50 AS Tier Q&A pairs → as_tier_qas.json


**AS Relationship Type Classification**

In [None]:
as_rels_dataset_path = 'evaluations/as_rel/as_relationships_dataset.json'
with open(as_rels_dataset_path, 'r') as f:
    as_rels_data = json.load(f)

In [None]:
as_rels_data = as_rels_data[:50]

In [None]:
siblings_dataset = []
seen = set()

for asn, details in as2org_data.items():
    for sib in details.get("Sibling ASNs", []):
        pair = tuple(sorted((int(asn), int(sib))))  # sort to avoid duplicates
        if pair not in seen:
            siblings_dataset.append({"AS1": pair[0], "AS2": pair[1], "Rel": "S"})
            seen.add(pair)

print(siblings_dataset)

[{'AS1': 173, 'AS2': 4697, 'Rel': 'S'}, {'AS1': 173, 'AS2': 4710, 'Rel': 'S'}, {'AS1': 173, 'AS2': 55817, 'Rel': 'S'}, {'AS1': 4697, 'AS2': 4710, 'Rel': 'S'}, {'AS1': 4697, 'AS2': 55817, 'Rel': 'S'}, {'AS1': 4710, 'AS2': 55817, 'Rel': 'S'}, {'AS1': 1221, 'AS2': 9514, 'Rel': 'S'}, {'AS1': 9514, 'AS2': 37978, 'Rel': 'S'}, {'AS1': 9514, 'AS2': 132029, 'Rel': 'S'}, {'AS1': 9514, 'AS2': 132292, 'Rel': 'S'}, {'AS1': 9514, 'AS2': 135887, 'Rel': 'S'}, {'AS1': 9514, 'AS2': 133859, 'Rel': 'S'}, {'AS1': 9514, 'AS2': 135313, 'Rel': 'S'}, {'AS1': 9514, 'AS2': 150689, 'Rel': 'S'}, {'AS1': 9514, 'AS2': 141886, 'Rel': 'S'}, {'AS1': 9514, 'AS2': 135052, 'Rel': 'S'}, {'AS1': 9514, 'AS2': 135083, 'Rel': 'S'}, {'AS1': 4632, 'AS2': 9514, 'Rel': 'S'}, {'AS1': 9514, 'AS2': 133931, 'Rel': 'S'}, {'AS1': 9514, 'AS2': 135599, 'Rel': 'S'}, {'AS1': 9514, 'AS2': 149288, 'Rel': 'S'}, {'AS1': 1221, 'AS2': 37978, 'Rel': 'S'}, {'AS1': 1221, 'AS2': 132029, 'Rel': 'S'}, {'AS1': 1221, 'AS2': 132292, 'Rel': 'S'}, {'AS1': 1

In [None]:
new_rel_dataset = siblings_dataset[:50] + as_rels_data[:50]

In [None]:
rels_dataset = random.sample(new_rel_dataset, 50)

In [None]:
as_rels_query = lambda asn1, asn2: f"What is the type of relationship between the following ASes: AS{asn1}, AS{asn2}?"

In [None]:
REL_MAP = {
    'S': 'Siblings',
    1: 'Siblings',
    '1': 'Siblings',
    0: 'Peers',
    '0': 'Peers',
    -1: 'Provider to Customer',
    '-1': 'Provider to Customer',
}


def build_as_relationship_qas(rels_dataset):
    """Return a Q&A list with descriptive relationship names.

    Parameters
    ----------
    rels_dataset : Iterable[dict]
        Each item must have keys 'AS1', 'AS2', and 'Rel'.
    """
    return [
        {
            "question": as_rels_query(rel_data['AS1'], rel_data['AS2']),
            "answer": (
                f"The relationship between AS{rel_data['AS1']} and "
                f"AS{rel_data['AS2']} is: "
                f"{REL_MAP.get(rel_data['Rel'], 'Unknown')}"
            ),
        }
        for rel_data in rels_dataset
    ]

In [None]:
as_rels_qas = build_as_relationship_qas(rels_dataset)

In [None]:
as_rels_qas_file = Path("as_rels_qas.json")
with as_rels_qas_file.open("w", encoding="utf-8") as f:
    json.dump(as_rels_qas, f, indent=2)

print(f"Generated {len(as_rels_qas)} AS Relationships Q&A pairs → {as_rels_qas_file }")

Generated 50 AS Relationships Q&A pairs → as_rels_qas.json


**Valley-Free Validation**

In [None]:
vf_eval_dataset_path = 'evaluations/valley_free/paths.json'
with open('evaluations/valley_free/paths.json', 'r') as f:
    vf_data = json.load(f)

In [None]:
vf_data = dict(random.sample(list(vf_data.items()), 50))

In [None]:
vf_query = lambda path: f"Is the following path is valley-free {path}"

In [None]:
vf_qas = [
    {
        "question": vf_query(path),
        "answer": f"The path {path} is {'valid and valley free' if classification =='1' else 'invalid and not a valley free'} path",
    }
    for path, classification in vf_data.items()
]

In [None]:
vf_qas_file = Path("vf_qas.json")
with as_tier_qas_file.open("w", encoding="utf-8") as f:
    json.dump(vf_qas, f, indent=2)

print(f"Generated {len(vf_qas)} valley free validation Q&A pairs → {vf_qas_file}")

Generated 50 AS Tier Q&A pairs → vf_qas.json


**Top ASes with Certain Relationship**

In [None]:
combined_eval_dataset_path = 'evaluations/combined_as_rel_class_type/asn_types_with_rel.json'
with open(combined_eval_dataset_path, 'r') as f:
    combined_data = json.load(f)

In [None]:
combined_data = random.sample(combined_data, 50)

In [None]:
combined_qa_dataset = [
    {
        "question": f"List the top {entry['x']} ASes with {entry['rel']} with AS{entry['asn']}",
        "answer": ", ".join(map(str, entry['top_asns'][: int(entry['x'])])),
    }
    for entry in combined_data
]

In [None]:
combined_qas_file = Path("combined_qas.json")
with combined_qas_file.open("w", encoding="utf-8") as f:
    json.dump(combined_qa_dataset, f, indent=2)

print(f"Generated {len(combined_qa_dataset)} valley free validation Q&A pairs → {combined_qas_file}")

Generated 50 valley free validation Q&A pairs → combined_qas.json
