In [2]:
import json
import csv
from collections import defaultdict

# Load enriched knowledge data
with open("../output/knowledge_skills_full.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# 1️⃣ Build node list
nodes = []
for uri, info in data.items():
    if "error" in info:
        continue
    nodes.append({
        "id": uri,
        "title": info.get("title"),
        "preferredLabel_es": info.get("preferredLabel_es"),
        "preferredLabel_en": info.get("preferredLabel_en"),
        "description": info.get("description"),
        "level": info.get("level"),
        "parent_uri": (info.get("parent") or {}).get("uri"),
        "fetchedAt": info.get("fetchedAt"),
    })

# 2️⃣ Map: occupation URI → set of knowledge skill URIs
occ_to_know = defaultdict(set)
for uri, info in data.items():
    if "error" in info:
        continue
    for occ in info.get("occupations", []):
        occ_to_know[occ["uri"]].add(uri)

# 3️⃣ Build weighted edges: (K1, K2) → number of shared occupations
edge_weights = defaultdict(int)

for uri_set in occ_to_know.values():
    uris = list(uri_set)
    for i in range(len(uris)):
        for j in range(i + 1, len(uris)):
            source, target = sorted([uris[i], uris[j]])
            edge_weights[(source, target)] += 1

# 4️⃣ Write nodes.csv
with open("../output/nodes.csv", "w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=nodes[0].keys())
    writer.writeheader()
    writer.writerows(nodes)

# 5️⃣ Write edges.csv
with open("../output/edges.csv", "w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=["source", "target", "weight"])
    writer.writeheader()
    for (source, target), weight in edge_weights.items():
        writer.writerow({
            "source": source,
            "target": target,
            "weight": weight
        })

print("✅ nodes.csv and edges.csv created with weights.")


✅ nodes.csv and edges.csv created with weights.
