In [1]:
import pandas as pd
from rdflib import Graph, Namespace, URIRef, Literal, RDF, RDFS, FOAF, OWL, XSD

In [2]:
# Initialize RDF Graph
g = Graph()

# Define Namespaces
ns = Namespace("http://example.org/ns/")
foaf = FOAF
schema = Namespace("http://schema.org/")
dcterms = Namespace("http://purl.org/dc/terms/")
prov = Namespace("http://www.w3.org/ns/prov#")
wd = Namespace("http://www.wikidata.org/entity/")
intavia = Namespace("https://bionet.intavia.eu/person/")
colonial = Namespace("https://data.colonialcollections.nl/entity/")

In [3]:
# Bind namespaces
g.bind("ns", ns)
g.bind("foaf", foaf)
g.bind("schema", schema)
g.bind("dcterms", dcterms)
g.bind("prov", prov)
g.bind("wd", wd)
g.bind("intavia", intavia)
g.bind("colonial", colonial)

In [5]:
# Load CSV Data
bio_df = pd.read_csv("/Volumes/Extreme SSD/Python_Projects/NIAA Project/FINAL/bio_FINAL.csv")
spouses_df = pd.read_csv("/Volumes/Extreme SSD/Python_Projects/NIAA Project/FINAL/spouses_FINAL.csv")
events_df = pd.read_csv("/Volumes/Extreme SSD/Python_Projects/NIAA Project/FINAL/event_FINAL.csv")


In [6]:
# Load External ID Mapping (replace with your actual data)
external_mapping = {
    0: {"wikidata": "Q12345", "intavia": "vb-001", "colonial": "wm-001"},
    1: {"wikidata": "Q67890", "intavia": "vb-002", "colonial": "wm-002"},
}

In [7]:
# Process bio_FINAL.csv
for _, row in bio_df.iterrows():
    person_id = row["id"]
    person_uri = ns[f"person/{person_id}"]
    
    # Add basic person data
    g.add((person_uri, RDF.type, foaf.Person))
    g.add((person_uri, foaf.name, Literal(row["nama orang"])))
    
    # Link to external sources (if mapped)
    if person_id in external_mapping:
        mapping = external_mapping[person_id]
        if "wikidata" in mapping:
            wikidata_uri = wd[mapping["wikidata"]]
            g.add((person_uri, OWL.sameAs, wikidata_uri))
        if "intavia" in mapping:
            intavia_uri = URIRef(f"{intavia}{mapping['intavia']}")
            g.add((person_uri, OWL.sameAs, intavia_uri))
        if "colonial" in mapping:
            colonial_uri = URIRef(f"{colonial}{mapping['colonial']}")
            g.add((person_uri, OWL.sameAs, colonial_uri))


In [8]:
# Process spouses_FINAL.csv
for _, row in spouses_df.iterrows():
    person_id = row["id"]
    spouse_uri = ns[f"spouse/{person_id}_{row.name}"]  # Unique URI per spouse entry
    
    # Create spouse entity
    g.add((spouse_uri, RDF.type, foaf.Person))
    g.add((spouse_uri, foaf.name, Literal(row["spouse_name"])))
    g.add((spouse_uri, ns.birthDate, Literal(row["birth_date"])))
    g.add((spouse_uri, ns.deathDate, Literal(row["death_date"])))
    
    # Link to main person
    person_uri = ns[f"person/{person_id}"]
    g.add((person_uri, schema.spouse, spouse_uri))


In [9]:
# Process event_FINAL.csv
for _, row in events_df.iterrows():
    person_id = row["id"]
    event_uri = ns[f"event/{person_id}_{row.name}"]  # Unique URI per event
    
    # Create event entity
    g.add((event_uri, RDF.type, prov.Activity))
    g.add((event_uri, dcterms.description, Literal(row["werkgebied en -soort"])))
    g.add((event_uri, prov.startedAtTime, Literal(row["werkperiode"])))
    g.add((event_uri, dcterms.coverage, Literal(row["organ."])))
    
    # Link to main person
    person_uri = ns[f"person/{person_id}"]
    g.add((person_uri, prov.wasAssociatedWith, event_uri))

In [10]:
# Save the RDF Graph
g.serialize("linked_data.ttl", format="turtle")

<Graph identifier=N7b1357f00ec34e73a52691fca5410657 (<class 'rdflib.graph.Graph'>)>