# 01. Data Collection (Week 1)

## Week 1: APIs & Data Acquisition

**Research Question:** "To what extent do game mechanics (specifically stat scaling and item utility) dictate the narrative topology and character fates of the Elden Ring universe?"

### Course Concepts Applied
-   **APIs:** Interacting with the Elden Ring Fan API.
-   **Data Handling:** Pagination, Rate Limiting, and Caching (JSON).
-   **Reproducibility:** Creating a robust data pipeline.

**Endpoints:** `items`, `weapons`, `npcs`, `locations`, `bosses`, `armors`, `talismans`, `incantations`.

In [1]:
import datetime as dt
import json
import sys
import time
from pathlib import Path
from typing import Dict, List, Sequence

import requests

CANDIDATES = [Path.cwd(), Path.cwd().parent, Path.cwd().parents[1]]
PROJECT_ROOT = None
for candidate in CANDIDATES:
    if (candidate / "data").exists() and (candidate / "scripts").exists():
        PROJECT_ROOT = candidate.resolve()
        break

if PROJECT_ROOT is None:
    raise RuntimeError("Could not locate project root from current working directory.")

RAW_DIR = PROJECT_ROOT / "data" / "raw"
RAW_DIR.mkdir(parents=True, exist_ok=True)

print(f"Project root: {PROJECT_ROOT}")
print(f"Raw cache: {RAW_DIR}")

Project root: C:\Users\biagu\Documents\GitHub\social_graphs_project
Raw cache: C:\Users\biagu\Documents\GitHub\social_graphs_project\data\raw


In [2]:
BASE_URL = "https://eldenring.fanapis.com/api"
ENDPOINTS = [
    "items",
    "weapons",
    "npcs",
    "locations",
    "bosses",
    "armors",
    "talismans",
    "incantations",
    "classes" # Added classes endpoint
]
PAGE_SIZE = 100
RATE_LIMIT_SECONDS = 0.25
FORCE_REFRESH = False  # flip to True when you want to overwrite existing caches

In [3]:
session = requests.Session()
session.headers.update({"User-Agent": "MarwanProposal/1.0"})

def fetch_endpoint(endpoint: str) -> List[dict]:
    page = 0
    records: List[dict] = []
    while True:
        params = {"limit": PAGE_SIZE, "page": page}
        response = session.get(f"{BASE_URL}/{endpoint}", params=params, timeout=30)
        if response.status_code != 200:
            raise RuntimeError(f"Request failed for {endpoint} page {page}: {response.status_code}")
        payload = response.json()
        rows = payload.get("data", [])
        if not rows:
            break
        records.extend(rows)
        total = payload.get("total")
        if total is not None and len(records) >= int(total):
            break
        page += 1
        time.sleep(RATE_LIMIT_SECONDS)
    return records

def write_cache(endpoint: str, rows: Sequence[dict]) -> Path:
    target = RAW_DIR / f"{endpoint}.json"
    if target.exists() and not FORCE_REFRESH:
        print(f"[cache] {target} already exists; skipping (set FORCE_REFRESH=True to overwrite).")
        return target
    target.write_text(json.dumps(rows, indent=2), encoding="utf-8")
    print(f"[saved] {target} ({len(rows)} rows)")
    return target

In [4]:
provenance = []
timestamp = dt.datetime.utcnow().isoformat() + "Z"
for endpoint in ENDPOINTS:
    print(f"[fetch] {endpoint}")
    rows = fetch_endpoint(endpoint)
    cache_path = write_cache(endpoint, rows)
    provenance.append({"endpoint": endpoint, "rows": len(rows), "cache_path": str(cache_path)})

summary_path = RAW_DIR / "provenance.json"
summary_payload = {"fetched_at": timestamp, "endpoints": provenance}
summary_path.write_text(json.dumps(summary_payload, indent=2), encoding="utf-8")
print(f"[done] wrote provenance to {summary_path}")


  timestamp = dt.datetime.utcnow().isoformat() + "Z"


[fetch] items
[cache] C:\Users\biagu\Documents\GitHub\social_graphs_project\data\raw\items.json already exists; skipping (set FORCE_REFRESH=True to overwrite).
[fetch] weapons
[cache] C:\Users\biagu\Documents\GitHub\social_graphs_project\data\raw\weapons.json already exists; skipping (set FORCE_REFRESH=True to overwrite).
[fetch] npcs
[cache] C:\Users\biagu\Documents\GitHub\social_graphs_project\data\raw\npcs.json already exists; skipping (set FORCE_REFRESH=True to overwrite).
[fetch] locations
[cache] C:\Users\biagu\Documents\GitHub\social_graphs_project\data\raw\locations.json already exists; skipping (set FORCE_REFRESH=True to overwrite).
[fetch] bosses
[cache] C:\Users\biagu\Documents\GitHub\social_graphs_project\data\raw\bosses.json already exists; skipping (set FORCE_REFRESH=True to overwrite).
[fetch] armors
[saved] C:\Users\biagu\Documents\GitHub\social_graphs_project\data\raw\armors.json (568 rows)
[fetch] talismans
[saved] C:\Users\biagu\Documents\GitHub\social_graphs_project

In [None]:
import pandas as pd
from IPython.display import display

df = pd.DataFrame(provenance)
display(df)

Unnamed: 0,endpoint,rows,cache_path
0,items,462,C:\Users\biagu\Documents\GitHub\social_graphs_...
1,weapons,307,C:\Users\biagu\Documents\GitHub\social_graphs_...
2,npcs,55,C:\Users\biagu\Documents\GitHub\social_graphs_...
3,locations,177,C:\Users\biagu\Documents\GitHub\social_graphs_...
4,bosses,106,C:\Users\biagu\Documents\GitHub\social_graphs_...
5,armors,568,C:\Users\biagu\Documents\GitHub\social_graphs_...
6,talismans,87,C:\Users\biagu\Documents\GitHub\social_graphs_...
7,incantations,98,C:\Users\biagu\Documents\GitHub\social_graphs_...
8,classes,14,C:\Users\biagu\Documents\GitHub\social_graphs_...


: 