# 02. Graph Construction (Week 2)

## Week 2: Graph Theory & NetworkX

This notebook processes the raw API data into a multimodal graph that captures the structural mechanics of the game.

### Course Concepts Applied
-   **Nodes & Edges:** Defining what constitutes a node (Item, NPC) and an edge (Drop, Location).
-   **Attributes:** Attaching metadata (Scaling, Bell Bearings) to nodes for later analysis.
-   **Graph Construction:** Building the network structure from tabular data.

### Outputs
-   `data/processed/nodes.csv`: Canonical node table.
-   `data/processed/edges.csv`: Multimodal relationships.

In [41]:
import json
import re
from dataclasses import dataclass
from itertools import combinations
from pathlib import Path
from typing import Dict, List, Sequence, Tuple

import pandas as pd

CANDIDATES = [Path.cwd(), Path.cwd().parent, Path.cwd().parents[1]]
PROJECT_ROOT = None
for candidate in CANDIDATES:
    if (candidate / "data").exists() and (candidate / "scripts").exists():
        PROJECT_ROOT = candidate.resolve()
        break
if PROJECT_ROOT is None:
    raise RuntimeError("Could not locate project root.")

RAW_DIR = PROJECT_ROOT / "data" / "raw"
PROCESSED_DIR = PROJECT_ROOT / "data" / "processed"
PROCESSED_DIR.mkdir(parents=True, exist_ok=True)

print(f"Project root: {PROJECT_ROOT}")
print(f"Raw dir: {RAW_DIR}")
print(f"Processed dir: {PROCESSED_DIR}")

Project root: C:\social_graphs_project
Raw dir: C:\social_graphs_project\data\raw
Processed dir: C:\social_graphs_project\data\processed


In [42]:
ENDPOINTS = [
    "items", "weapons", "npcs", "locations", "bosses", "armors", "talismans", "incantations", "classes"
]
RAW_PAYLOADS: Dict[str, List[dict]] = {}
for endpoint in ENDPOINTS:
    path = RAW_DIR / f"{endpoint}.json"
    if not path.exists():
        raise FileNotFoundError(f"Missing {path}. Run 01_data_collection.ipynb first.")
    RAW_PAYLOADS[endpoint] = json.loads(path.read_text(encoding="utf-8"))
    print(f"Loaded {endpoint}: {len(RAW_PAYLOADS[endpoint])} rows")

Loaded items: 462 rows
Loaded weapons: 307 rows
Loaded npcs: 55 rows
Loaded locations: 177 rows
Loaded bosses: 106 rows
Loaded armors: 568 rows
Loaded talismans: 87 rows
Loaded incantations: 98 rows
Loaded classes: 14 rows


In [43]:
NODE_CONFIG = {
    "bosses": "boss",
    "npcs": "npc",
    "creatures": "creature",
    "locations": "location",
    "items": "item",
    "armors": "armor",
    "talismans": "talisman",
    "incantations": "incantation",
    "weapons": "weapon",
    "classes": "class"
}
LOCATION_EDGES = {
    "bosses": "boss_located_in",
    "npcs": "npc_located_in",
}
DROP_EDGES = {
    "bosses": "boss_drops",
    "creatures": "creature_drops",
}
DROP_TARGET_TYPES = {"item", "weapon", "armor", "talisman", "incantation"}

FATE_TOKENS = {"bell bearing", "bell-bearing"}
INT_TOKENS = {"intelligence", "sorcery", "glintstone"}
FAITH_TOKENS = {"faith", "holy", "incantation", "frenzied"}
FACTION_KEYWORDS: Dict[str, Tuple[str, ...]] = {
    "Golden Order": ("golden order", "two fingers", "order fundamentalist", "goldmask"),
    "Raya Lucaria": ("raya lucaria", "academy", "glintstone", "cuckoo knight"),
    "Carian Royalty": ("carian", "rennala", "royal knight", "royal house"),
    "Frenzied Flame": ("frenzied flame", "three fingers", "madness"),
    "Dragon Cult": ("dragon cult", "ancient dragon", "dragon communion"),
    "Haligtree": ("haligtree", "malenia", "miquella"),
    "Volcano Manor": ("volcano manor", "rykard", "serpent hunter", "taker"),
    "Godskin Apostles": ("godskin", "apostle", "noble"),
    "Lord of Blood": ("lord of blood", "mohg", "blood oath"),
    "Crucible Knights": ("crucible knight", "crucible"),
    "Redmane Army": ("redmane", "radahn"),
    "Beast Clergy": ("beast clergyman", "gurranq", "bestial"),
    "Night's Cavalry": ("night's cavalry", "night cavalry"),
    "Servants of Rot": ("scarlet rot", "kindred of rot", "rot god"),
}

def clean_text(value: str | None) -> str:
    if not value:
        return ""
    return re.sub(r"\s+", " ", value).strip()

def explode_locations(raw_value: str | None) -> List[str]:
    if not raw_value:
        return []
    seps = [",", "/", " and ", " & ", ";"]
    parts = [raw_value]
    for sep in seps:
        nxt = []
        for part in parts:
            nxt.extend(part.split(sep))
        parts = nxt
    return [p.strip() for p in parts if p.strip()]

def normalize_name(value: str | None) -> str | None:
    if not value:
        return None
    value = value.strip().casefold()
    return value or None

In [44]:
@dataclass
class NodeRecord:
    node_id: str
    node_type: str
    name: str
    description: str | None
    int_scaling: bool
    faith_scaling: bool
    bell_bearing_source: bool
    merchant_flag: bool
    faction_tags: Tuple[str, ...]
    raw_endpoint: str
    extra: dict

@dataclass
class EdgeRecord:
    source: str
    target: str
    edge_type: str
    relationship: str
    weight: float | None = None
    metadata: dict | None = None

def detect_scaling(text: str | None, tokens: set[str]) -> bool:
    if not text:
        return False
    text_lower = text.casefold()
    return any(tok in text_lower for tok in tokens)

def detect_factions(*parts: str | None) -> Tuple[str, ...]:
    blob = " ".join(p or "" for p in parts).casefold()
    matches = []
    for faction, keywords in FACTION_KEYWORDS.items():
        if any(keyword in blob for keyword in keywords):
            matches.append(faction)
    return tuple(dict.fromkeys(matches))  # preserve order, deduplicate

def build_nodes() -> List[NodeRecord]:
    nodes: List[NodeRecord] = []
    for endpoint, node_type in NODE_CONFIG.items():
        payload = RAW_PAYLOADS.get(endpoint, [])
        for row in payload:
            node_id = row.get("id") or f"{endpoint}:{row.get('name')}"
            name = row.get("name") or node_id
            description = row.get("description") or row.get("effect")
            
            # Special handling for classes to use explicit stats
            if node_type == "class":
                stats = row.get("stats", {})
                # Thresholds: 14 seems to be a good cutoff for "specialized" classes
                int_flag = int(stats.get("intelligence", 0)) >= 14
                faith_flag = int(stats.get("faith", 0)) >= 14
            else:
                scaling_hint = json.dumps(row, ensure_ascii=False)
                int_flag = detect_scaling(scaling_hint, INT_TOKENS)
                faith_flag = detect_scaling(scaling_hint, FAITH_TOKENS)
            
            bell_flag = detect_scaling(name + " " + (description or ""), FATE_TOKENS)
            merchant_flag = detect_scaling(row.get("role"), {"merchant", "teacher"})
            factions = detect_factions(name, description, json.dumps(row, ensure_ascii=False))
            extra = {k: v for k, v in row.items() if k not in {"id", "name", "description", "effect"}}
            nodes.append(NodeRecord(
                node_id=node_id,
                node_type=node_type,
                name=name,
                description=description,
                int_scaling=int_flag,
                faith_scaling=faith_flag,
                bell_bearing_source=bell_flag,
                merchant_flag=merchant_flag,
                faction_tags=factions,
                raw_endpoint=endpoint,
                extra=extra,
            ))
    return nodes

In [45]:
def build_edges(nodes: List[NodeRecord]) -> List[EdgeRecord]:
    edges: List[EdgeRecord] = []
    node_index = {node.node_id: node for node in nodes}
    location_nodes = [n for n in nodes if n.node_type == "location"]
    loc_index = {normalize_name(n.name): n.node_id for n in location_nodes}

    for endpoint, rel in LOCATION_EDGES.items():
        for row in RAW_PAYLOADS.get(endpoint, []):
            source_id = row.get("id")
            if not source_id:
                continue
            for loc in explode_locations(row.get("location")):
                target_id = loc_index.get(normalize_name(loc))
                if not target_id:
                    continue
                edges.append(EdgeRecord(source_id, target_id, rel, "located_in"))

    drop_index = {normalize_name(n.name): n.node_id for n in nodes if n.node_type in DROP_TARGET_TYPES}
    for endpoint, rel in DROP_EDGES.items():
        for row in RAW_PAYLOADS.get(endpoint, []):
            source_id = row.get("id")
            if not source_id:
                continue
            for drop in row.get("drops", []) or []:
                target_id = drop_index.get(normalize_name(drop))
                if target_id:
                    edges.append(EdgeRecord(source_id, target_id, rel, "drops"))
    
    mention_edges = []
    name_map = {normalize_name(n.name): n.node_id for n in nodes}
    for node in nodes:
        desc = (node.description or "").casefold()
        if not desc:
            continue
        for token, target_id in name_map.items():
            if not token or target_id == node.node_id:
                continue
            if len(token) < 5:
                continue
            if token in desc:
                mention_edges.append(EdgeRecord(node.node_id, target_id, "description_mentions", "mentions"))
                break
    edges.extend(mention_edges)

    share_loc_edges = []
    loc_buckets: Dict[str, List[str]] = {}
    for endpoint in ["bosses", "npcs"]:
        for row in RAW_PAYLOADS.get(endpoint, []):
            src = row.get("id")
            if not src:
                continue
            for loc in explode_locations(row.get("location")):
                key = normalize_name(loc)
                if not key:
                    continue
                loc_buckets.setdefault(key, []).append(src)
    for node_ids in loc_buckets.values():
        for a, b in combinations(sorted(set(node_ids)), 2):
            share_loc_edges.append(EdgeRecord(a, b, "share_location", "share_location"))
    edges.extend(share_loc_edges)
    return edges

In [46]:
nodes = build_nodes()
edges = build_edges(nodes)
len(nodes), len(edges)

(1874, 1237)

In [47]:
node_records = [
    {
        "node_id": n.node_id,
        "node_type": n.node_type,
        "name": n.name,
        "description": n.description,
        "int_scaling": n.int_scaling,
        "faith_scaling": n.faith_scaling,
        "bell_bearing_source": n.bell_bearing_source,
        "merchant_flag": n.merchant_flag,
        "faction_tags": "|".join(n.faction_tags),
        "raw_endpoint": n.raw_endpoint,
        "extra_json": json.dumps(n.extra, ensure_ascii=False)
    }
    for n in nodes
]
edge_records = [
    {
        "source": e.source,
        "target": e.target,
        "edge_type": e.edge_type,
        "relationship": e.relationship,
        "weight": e.weight,
        "metadata_json": json.dumps(e.metadata or {}, ensure_ascii=False)
    }
    for e in edges
]
node_df = pd.DataFrame(node_records)
edge_df = pd.DataFrame(edge_records)
node_path = PROCESSED_DIR / "nodes.csv"
edge_path = PROCESSED_DIR / "edges.csv"
node_df.to_csv(node_path, index=False)
edge_df.to_csv(edge_path, index=False)
print(f"[saved] {node_path} ({len(node_df)} rows)")
print(f"[saved] {edge_path} ({len(edge_df)} rows)")
node_df.head()

[saved] C:\social_graphs_project\data\processed\nodes.csv (1874 rows)
[saved] C:\social_graphs_project\data\processed\edges.csv (1237 rows)


Unnamed: 0,node_id,node_type,name,description,int_scaling,faith_scaling,bell_bearing_source,merchant_flag,faction_tags,raw_endpoint,extra_json
0,17f695abef9l0i1ujzrup5y0z7rw6,boss,Abductor Virgins,Deadly mechanical constructs who wield massive...,False,False,False,False,Volcano Manor,bosses,"{""image"": null, ""region"": ""Mount Gelmir"", ""loc..."
1,17f69b4ba0al0i1uk6s98t1nbtxunt,boss,"Alecto, Black Knife Ringleader",Remarkably agile and aggressive warrior. Dange...,False,False,False,False,,bosses,"{""image"": ""https://eldenring.fanapis.com/image..."
2,17f69d0313fl0i1uk8pokynv71bkz8,boss,Alecto Black Knife Ringleader,Remarkably agile and aggressive warrior. Dange...,False,False,False,False,,bosses,"{""image"": ""https://eldenring.fanapis.com/image..."
3,17f69b85929l0i1ukc4l2up7yp9v0i,boss,Alabaster Lord,An Evergaol prisoner wielding a vicious sword ...,False,False,False,False,,bosses,"{""image"": ""https://eldenring.fanapis.com/image..."
4,17f69a3ca37l0i1ukd1lq14ctafbk,boss,"Adan, Thief Of Fire","An evergaol prisoner and former battle mage, A...",False,False,False,False,,bosses,"{""image"": ""https://eldenring.fanapis.com/image..."
