# FCKG Simple Demo Notebook

This notebook intentionally stays minimal:

1. Load core RDF data (`movieontology.ttl` + `data/instances/*.ttl`)
2. Run a few local SPARQL queries
3. Show one API example each for TMDB, OMDb (using IMDb ID), and Wikidata


In [20]:
from pathlib import Path
import os

import pandas as pd
import requests
from rdflib import Graph

# Match enrichment pipeline behavior: load keys from .env into environment
try:
    from dotenv import load_dotenv
    load_dotenv()  # loads .env
except Exception:
    pass

TMDB_API_KEY = os.getenv("TMDB_API_KEY", "")
OMDB_API_KEY = os.getenv("OMDB_API_KEY", "")

print(f"TMDB key configured: {bool(TMDB_API_KEY)}")
print(f"OMDb key configured: {bool(OMDB_API_KEY)}")


def normalize_tmdb_id(value):
    """Normalize TMDB IDs so numeric values never keep a trailing '.0'."""
    if value is None:
        return None

    try:
        if pd.isna(value):
            return None
    except Exception:
        pass

    raw = str(value).strip()
    if not raw or raw.lower() == "nan":
        return None

    if raw.endswith(".0"):
        raw = raw[:-2]

    return str(int(raw)) if raw.isdigit() else raw


TMDB key configured: True
OMDb key configured: True


In [21]:
# Load ontology + all canonical instance files
onto_path = Path("movieontology.ttl")
instance_paths = sorted(Path("data/instances").glob("*.ttl"))

g = Graph()
g.parse(onto_path, format="turtle")
for p in instance_paths:
    g.parse(p, format="turtle")

print(f"Loaded ontology: {onto_path}")
print(f"Loaded instance files: {len(instance_paths)}")
print(f"Total triples: {len(g):,}")


Loaded ontology: movieontology.ttl
Loaded instance files: 6
Total triples: 179,924


## Local SPARQL Examples

In [22]:
# Query 1: sample Best Picture nominations (with nominee person when present)
q_best_picture = """
PREFIX msh: <http://example.org/ontologies/MovieSHACL3#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT ?filmTitle ?yearFilm ?winner ?personName WHERE {
  ?nom a msh:Nomination ;
       msh:hasCategory ?cat ;
       msh:hasFilm ?film ;
       msh:yearFilm ?yearFilm ;
       msh:winner ?winner .
  ?cat rdfs:label "BEST PICTURE" .
  ?film msh:title ?filmTitle .

  OPTIONAL {
    ?nom msh:hasNominee ?person .
    OPTIONAL { ?person msh:fullName ?personFullName . }
    OPTIONAL { ?person rdfs:label ?personLabel . }
    BIND(COALESCE(?personFullName, ?personLabel) AS ?personName)
  }
}
ORDER BY DESC(?yearFilm) ?filmTitle ?personName
LIMIT 20
"""

rows = []
for r in g.query(q_best_picture):
    person = str(r.personName) if getattr(r, "personName", None) else None
    rows.append(
        {
            "film": str(r.filmTitle),
            "year": int(str(r.yearFilm)),
            "winner": str(r.winner),
            "person": person,
        }
    )

pd.DataFrame(rows)


Unnamed: 0,film,year,winner,person
0,Bugonia,2025,False,Andrew Lowe
1,Bugonia,2025,False,Ed Guiney
2,Bugonia,2025,False,Emma Stone
3,Bugonia,2025,False,Lars Knudsen
4,Bugonia,2025,False,Yorgos Lanthimos
5,F1,2025,False,Brad Pitt
6,F1,2025,False,Chad Oman
7,F1,2025,False,Dede Gardner
8,F1,2025,False,Jeremy Kleiner
9,F1,2025,False,Jerry Bruckheimer


In [29]:
# Query 2: sample films with external IDs (IMDb/TMDB/Wikidata when present)
q_external_ids = """
PREFIX msh: <http://example.org/ontologies/MovieSHACL3#>

SELECT DISTINCT ?filmTitle ?releaseYear ?imdbVal ?tmdbVal ?wikidataVal WHERE {
  ?film msh:title ?filmTitle .
  OPTIONAL { ?film msh:releaseYear ?releaseYear . }
  OPTIONAL { ?film msh:imdbId ?imdbVal . }
  OPTIONAL { ?film msh:tmdbId ?tmdbVal . }
  OPTIONAL { ?film msh:wikidataId ?wikidataVal . }

  FILTER(BOUND(?imdbVal) || BOUND(?tmdbVal) || BOUND(?wikidataVal))
}
ORDER BY ?filmTitle
LIMIT 30
"""

id_rows = []
for r in g.query(q_external_ids):
    id_rows.append({
        "film": str(r.filmTitle),
        "year": int(str(r.releaseYear)) if r.releaseYear else None,
        "imdb_id": str(r.imdbVal) if r.imdbVal else None,
        "tmdb_id": normalize_tmdb_id(str(r.tmdbVal) if r.tmdbVal else None),
        "wikidata_id": str(r.wikidataVal) if r.wikidataVal else None,
    })

id_df = pd.DataFrame(id_rows)
id_df.head(15)


Unnamed: 0,film,year,imdb_id,tmdb_id,wikidata_id
0,"$1,000 a Minute",1935,tt0026027,218388,Q161259
1,'38',1986,tt0090554,42016,Q228659
2,'Crocodile' Dundee,1986,tt0090555,9671,Q615254
3,'Round Midnight,1986,tt0090557,14670,Q576371
4,(A) Torzija [(A) Torsion],2003,tt0350476,244987,Q13080972
5,...And Justice for All,1979,tt0078718,17443,Q426517
6,10,1979,tt0078721,9051,Q184591
7,102 Dalmatians,2000,tt0211181,10481,Q165847
8,12,2007,tt0488478,20714,Q175014
9,12 Angry Men,1957,tt0050083,389,Q2345


In [30]:
# Query 3: nomination counts by film year
q_nom_by_year = """
PREFIX msh: <http://example.org/ontologies/MovieSHACL3#>

SELECT ?yearFilm (COUNT(?nom) AS ?nomCount) WHERE {
  ?nom a msh:Nomination ;
       msh:yearFilm ?yearFilm .
}
GROUP BY ?yearFilm
ORDER BY DESC(?yearFilm)
LIMIT 20
"""

year_rows = [
    {"year": int(str(r.yearFilm)), "nominations": int(str(r.nomCount))}
    for r in g.query(q_nom_by_year)
]
pd.DataFrame(year_rows)


Unnamed: 0,year,nominations
0,2025,219
1,2024,238
2,2023,222
3,2022,238
4,2021,228
5,2020,234
6,2019,212
7,2018,230
8,2017,219
9,2016,209


## API Examples (Single Title)

Pick one film row from the SPARQL result and use available IDs for API lookups.

In [39]:
# Prefer a fixed demo title for API examples
preferred_title = "Tár"

q_preferred = f"""
PREFIX msh: <http://example.org/ontologies/MovieSHACL3#>
SELECT ?filmTitle ?year ?imdb ?tmdb ?wikidata WHERE {{
  ?film a msh:Film ; msh:title ?filmTitle .
  FILTER(LCASE(STR(?filmTitle)) = "{preferred_title.lower()}")
  OPTIONAL {{ ?film msh:releaseYear ?year . }}
  OPTIONAL {{ ?film msh:imdbId ?imdb . }}
  OPTIONAL {{ ?film msh:tmdbId ?tmdb . }}
  OPTIONAL {{ ?film msh:wikidataId ?wikidata . }}
}}
LIMIT 5
"""

sample = None
for r in g.query(q_preferred):
    imdb_id = str(r.imdb) if getattr(r, "imdb", None) else None
    tmdb_id = normalize_tmdb_id(str(r.tmdb) if getattr(r, "tmdb", None) else None)
    if imdb_id and tmdb_id:
        sample = {
            "film": str(r.filmTitle),
            "year": int(str(r.year)) if getattr(r, "year", None) else None,
            "imdb_id": imdb_id,
            "tmdb_id": tmdb_id,
            "wikidata_id": str(r.wikidata) if getattr(r, "wikidata", None) else None,
        }
        break

# Fallback: first row with both IMDb + TMDB IDs from the sample table
if sample is None:
    for _, r in id_df.iterrows():
        if pd.notna(r.get("imdb_id")) and pd.notna(r.get("tmdb_id")):
            sample = {
                "film": r.get("film"),
                "year": r.get("year"),
                "imdb_id": r.get("imdb_id"),
                "tmdb_id": normalize_tmdb_id(r.get("tmdb_id")),
                "wikidata_id": r.get("wikidata_id"),
            }
            break

if sample is None:
    raise RuntimeError("No sample film with external IDs found in graph.")

sample_film = sample["film"]
sample_year = sample.get("year")
sample_imdb = sample.get("imdb_id")
sample_tmdb = sample.get("tmdb_id")
sample_wikidata = sample.get("wikidata_id")

print("Sample film:", sample_film)
print("Year:", sample_year)
print("IMDb ID:", sample_imdb)
print("TMDB ID:", sample_tmdb)
print("Wikidata ID:", sample_wikidata)


Sample film: Tár
Year: 2022
IMDb ID: tt14444726
TMDB ID: 817758
Wikidata ID: Q108649516


In [40]:
# TMDB example
if not sample_tmdb:
    raise RuntimeError("TMDB call aborted: sample film has no TMDB ID.")
if not TMDB_API_KEY:
    raise RuntimeError(
        "TMDB_API_KEY is missing. Add it to your .env file."
    )

try:
    tmdb_resp = requests.get(
        f"https://api.themoviedb.org/3/movie/{sample_tmdb}",
        params={"api_key": TMDB_API_KEY},
        timeout=20,
    )
except requests.RequestException as exc:
    raise RuntimeError(f"TMDB request failed: {exc}") from exc

if tmdb_resp.status_code != 200:
    snippet = tmdb_resp.text[:300]
    raise RuntimeError(f"TMDB request failed ({tmdb_resp.status_code}): {snippet}")

d = tmdb_resp.json()
tmdb_out = {
    "title": d.get("title"),
    "release_date": d.get("release_date"),
    "runtime": d.get("runtime"),
    "vote_average": d.get("vote_average"),
    "vote_count": d.get("vote_count"),
    "budget": d.get("budget"),
    "revenue": d.get("revenue"),
}
pd.DataFrame([tmdb_out])


Unnamed: 0,title,release_date,runtime,vote_average,vote_count,budget,revenue
0,TÁR,2022-09-23,158,7.1,1477,35000000,29048571


In [41]:
# OMDb example (queried with IMDb ID)
if not sample_imdb:
    raise RuntimeError("OMDb call aborted: sample film has no IMDb ID.")
if not OMDB_API_KEY:
    raise RuntimeError(
        "OMDB_API_KEY is missing. Add it to your .env file."
    )

try:
    omdb_resp = requests.get(
        "https://www.omdbapi.com/",
        params={"i": sample_imdb, "apikey": OMDB_API_KEY},
        timeout=20,
    )
except requests.RequestException as exc:
    raise RuntimeError(f"OMDb request failed: {exc}") from exc

if omdb_resp.status_code != 200:
    snippet = omdb_resp.text[:300]
    raise RuntimeError(f"OMDb request failed ({omdb_resp.status_code}): {snippet}")

d = omdb_resp.json()
if d.get("Response") != "True":
    raise RuntimeError(f"OMDb API error: {d.get('Error', 'unknown error')}")

omdb_out = {
    "Title": d.get("Title"),
    "Year": d.get("Year"),
    "Rated": d.get("Rated"),
    "Runtime": d.get("Runtime"),
    "Genre": d.get("Genre"),
    "Director": d.get("Director"),
    "imdbRating": d.get("imdbRating"),
    "imdbVotes": d.get("imdbVotes"),
}
pd.DataFrame([omdb_out])


Unnamed: 0,Title,Year,Rated,Runtime,Genre,Director,imdbRating,imdbVotes
0,Tár,2022,R,158 min,"Drama, Music",Todd Field,7.4,108148


In [53]:
# Wikidata example (public SPARQL endpoint; uses IMDb ID)
if not sample_imdb:
    raise RuntimeError("Wikidata query aborted: sample film has no IMDb ID.")

wikidata_query = """
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
PREFIX pq: <http://www.wikidata.org/prop/qualifier/>
PREFIX bd: <http://www.bigdata.com/rdf#>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT ?item ?itemLabel ?publicationDate ?countryLabel
       (GROUP_CONCAT(DISTINCT ?genreLabel; separator=", ") AS ?genres)
       (GROUP_CONCAT(DISTINCT ?bechdelOutcomeLabel; separator=", ") AS ?bechdelOutcomes)
WHERE {
  ?item wdt:P345 "__IMDB_ID__" .
  OPTIONAL { ?item wdt:P577 ?publicationDate . }
  OPTIONAL { ?item wdt:P495 ?country . }

  OPTIONAL {
    ?item wdt:P136 ?genre .
    ?genre rdfs:label ?genreLabel .
    FILTER(LANG(?genreLabel) = "en")
  }

  OPTIONAL {
    ?item p:P5021 ?bechdelStmt .
    ?bechdelStmt ps:P5021 wd:Q4165246 .
    OPTIONAL {
      ?bechdelStmt pq:P9259 ?bechdelOutcome .
      ?bechdelOutcome rdfs:label ?bechdelOutcomeLabel .
      FILTER(LANG(?bechdelOutcomeLabel) = "en")
    }
  }

  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
GROUP BY ?item ?itemLabel ?publicationDate ?countryLabel
LIMIT 1
""".replace("__IMDB_ID__", str(sample_imdb))

try:
    wd_resp = requests.get(
        "https://query.wikidata.org/sparql",
        params={"query": wikidata_query, "format": "json"},
        headers={"User-Agent": "fckg-simple-notebook/1.0"},
        timeout=30,
    )
except requests.RequestException as exc:
    raise RuntimeError(f"Wikidata request failed: {exc}") from exc

if wd_resp.status_code != 200:
    snippet = wd_resp.text[:300]
    raise RuntimeError(f"Wikidata request failed ({wd_resp.status_code}): {snippet}")

bindings = wd_resp.json().get("results", {}).get("bindings", [])
parsed = []
for b in bindings:
    parsed.append({
        "item": b.get("item", {}).get("value"),
        "label": b.get("itemLabel", {}).get("value"),
        "publication_date": b.get("publicationDate", {}).get("value"),
        "country": b.get("countryLabel", {}).get("value"),
        "genre": b.get("genres", {}).get("value"),
        "bechdel_outcome": b.get("bechdelOutcomes", {}).get("value"),
    })

if not parsed:
    raise RuntimeError(f"Wikidata returned no item for IMDb ID {sample_imdb}.")

pd.DataFrame(parsed)


Unnamed: 0,item,label,publication_date,country,genre,bechdel_outcome
0,http://www.wikidata.org/entity/Q108649516,Tár,2022-09-01T00:00:00Z,United States,"musical film, drama film, psychological film, ...",passes
