In [1]:
%pip install rdflib

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [17]:
json_file_path = "triplets/merged.json"
ttl_file_path = "csv/data.ttl"
output_ttl_file = "og24_pre_data.ttl"
updated_ttl_file = "og24_data.ttl"

In [18]:
from rdflib import Graph, URIRef, Literal, Namespace
from rdflib.namespace import RDF, RDFS, SKOS, XSD
import json

# Define namespaces
SE = Namespace("http://example.org/olympics2024/schema#")
EX = Namespace("http://example.org/vocab/")
SKOS_NS = Namespace("http://www.w3.org/2004/02/skos/core#")


In [21]:
# Initialize graphs
data_graph = Graph()
data_graph.bind("se", SE)
data_graph.bind("ex", EX)

# Map disciplines to SKOS concepts from se_skos.ttl
skos_graph = Graph()

try:
    skos_graph.parse("se_skos.ttl", format="turtle")
    print("Successfully loaded SKOS file.")
except Exception as e:
    print(f"Error loading SKOS file: {e}")
    exit(1)

discipline_to_skos = {}
for s, p, o in skos_graph.triples((None, RDF.type, SKOS.Concept)):
    label = skos_graph.value(s, SKOS.prefLabel)
    if label:
        discipline_to_skos[label.toPython()] = s

def process_data(input_file, output_file):
    """Transforms TTL data into structured RDF objects."""
    try:
        # Load the input data
        data_graph.parse(input_file, format="turtle")
        print("Successfully loaded input TTL file.")
    except Exception as e:
        print(f"Error loading input TTL file: {e}")
        return

    # Graph to hold the output
    output_graph = Graph()
    output_graph.bind("se", SE)
    output_graph.bind("ex", EX)
    output_graph.bind("skos", SKOS_NS)

    for result in data_graph.subjects(predicate=EX.Medal_type):
        print(f"Processing result: {result}")
        # Extract attributes
        medal_type = data_graph.value(result, EX.Medal_type)
        athlete_name = data_graph.value(result, EX.Name)
        discipline = data_graph.value(result, EX.Discipline)
        event = data_graph.value(result, EX.Event)
        country_name = data_graph.value(result, EX.Country)

        # Create URIs
        if medal_type:
            medal_uri = URIRef(f"http://example.org/medal/{medal_type.replace(' ', '_')}")
            output_graph.add((medal_uri, RDF.type, SE.MedalType))
            output_graph.add((medal_uri, RDFS.label, Literal(medal_type, datatype=XSD.string)))

        if athlete_name:
            athlete_uri = URIRef(f"http://example.org/athlete/{athlete_name.replace(' ', '_')}")
            output_graph.add((athlete_uri, RDF.type, SE.Athlete))
            output_graph.add((athlete_uri, RDFS.label, Literal(athlete_name, datatype=XSD.string)))

        if event:
            event_uri = URIRef(f"http://example.org/event/{event.replace(' ', '_')}")
            output_graph.add((event_uri, RDF.type, SE.Event))
            output_graph.add((event_uri, RDFS.label, Literal(event, datatype=XSD.string)))

        if country_name:
            country_uri = URIRef(f"http://example.org/country/{country_name.replace(' ', '_')}")
            output_graph.add((country_uri, RDF.type, SE.Country))
            output_graph.add((country_uri, RDFS.label, Literal(country_name, datatype=XSD.string)))

        # Link discipline to SKOS concept
        if discipline:
            if discipline in discipline_to_skos:
                discipline_uri = discipline_to_skos[discipline]
            else:
                discipline_uri = URIRef(f"http://example.org/discipline/{discipline.replace(' ', '_')}")
                output_graph.add((discipline_uri, RDF.type, SKOS.Concept))
                output_graph.add((discipline_uri, SKOS.prefLabel, Literal(discipline, datatype=XSD.string)))
            output_graph.add((result, EX.Discipline, discipline_uri))

        # Add links for results
        if medal_type:
            output_graph.add((result, EX.Medal_type, medal_uri))
        if athlete_name:
            output_graph.add((result, EX.Name, athlete_uri))
        if discipline:
            output_graph.add((result, EX.Discipline, discipline_uri))
        if event:
            output_graph.add((result, EX.Event, event_uri))
        if country_name:
            output_graph.add((result, EX.Country, country_uri))

    # Serialize the output graph
    try:
        output_graph.serialize(destination=output_file, format="turtle")
        print(f"Transformed data saved to {output_file}")
    except Exception as e:
        print(f"Error saving output TTL file: {e}")

# Process the data
process_data(ttl_file_path, output_ttl_file)





Successfully loaded SKOS file.
Successfully loaded input TTL file.
Processing result: http://example.org/results/0
Processing result: http://example.org/results/1
Processing result: http://example.org/results/7
Processing result: http://example.org/results/10
Processing result: http://example.org/results/11
Processing result: http://example.org/results/16
Processing result: http://example.org/results/21
Processing result: http://example.org/results/25
Processing result: http://example.org/results/26
Processing result: http://example.org/results/27
Processing result: http://example.org/results/29
Processing result: http://example.org/results/30
Processing result: http://example.org/results/31
Processing result: http://example.org/results/32
Processing result: http://example.org/results/34
Processing result: http://example.org/results/37
Processing result: http://example.org/results/39
Processing result: http://example.org/results/40
Processing result: http://example.org/results/43
Proce

In [24]:
g = Graph()
g.bind("se", SE)
g.bind("ex", EX)

def add_sports_locations(json_file, rdf_file):
    """Add sports and their locations to an RDF graph."""
    # Load existing RDF graph
    try:
        g.parse(rdf_file, format="turtle")
        print("Successfully loaded RDF file.")
    except Exception as e:
        print(f"Error loading RDF file: {e}")
        return

    # Load JSON data
    try:
        with open(json_file, "r") as file:
            data = json.load(file)
            print("Successfully loaded JSON file.")
    except Exception as e:
        print(f"Error loading JSON file: {e}")
        return

    # Process JSON and add data to RDF graph
    for entry in data:
        head = entry["head"].capitalize()
        relation = entry["type"]
        tail = entry["tail"].capitalize()

        # Skip entries related to paralympics
        if (head.startswith("Para") or head.startswith("Wheelchair") or head.startswith("Sitting") or head.startswith("Blind") or tail.startswith("Para") or tail.startswith("Wheelchair") or tail.startswith("Sitting") or tail.startswith("Blind")):
            continue

        # Create URIs for sports and locations
        head_uri = URIRef(f"http://example.org/{head.replace(' ', '_')}")
        tail_uri = URIRef(f"http://example.org/{tail.replace(' ', '_')}")

        # Handle specific relations
        if relation == "sport":  # Sport takes place in a location
            g.add((head_uri, RDF.type, SE.Venue))
            g.add((head_uri, SE.sport, tail_uri))
            g.add((tail_uri, RDF.type, SE.Sport))
            g.add((tail_uri, RDFS.label, Literal(tail, datatype=XSD.string)))
        elif relation == "location":  # General location for events or venues
            g.add((head_uri, SE.location, tail_uri))
            g.add((tail_uri, RDF.type, SE.Location))
            g.add((tail_uri, RDFS.label, Literal(tail, datatype=XSD.string)))
        else:
            # Add other relationships as generic triples
            relation_uri = URIRef(f"http://example.org/{relation.replace(' ', '_')}")
            g.add((head_uri, relation_uri, tail_uri))

    # Serialize the graph to a new RDF file
    try:
        g.serialize(destination=updated_ttl_file, format="turtle")
        print(f"Updated RDF data saved to {updated_ttl_file}")
    except Exception as e:
        print(f"Error saving updated RDF file: {e}")


# Add sports locations
add_sports_locations(json_file_path, output_ttl_file)

Successfully loaded RDF file.
Successfully loaded JSON file.
Updated RDF data saved to og24_data.ttl
