In [1]:
%pip install rdflib

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
json_file_path = "triplets/merged.json"
ttl_file_path = "csv/data.ttl"

In [2]:
from rdflib import Graph, URIRef, Literal, Namespace
from rdflib.namespace import RDF, RDFS, SKOS, XSD

# Define namespaces
SE = Namespace("http://example.org/olympics2024/schema#")
EX = Namespace("http://example.org/vocab/")
SKOS_NS = Namespace("http://www.w3.org/2004/02/skos/core#")

# Initialize graphs
data_graph = Graph()
data_graph.bind("se", SE)
data_graph.bind("ex", EX)

# Map disciplines to SKOS concepts from se_skos.ttl
skos_graph = Graph()
skos_graph.parse("se_skos.ttl", format="turtle")

discipline_to_skos = {}
for s, p, o in skos_graph.triples((None, RDF.type, SKOS.Concept)):
    label = skos_graph.value(s, SKOS.prefLabel)
    if label:
        discipline_to_skos[label.toPython()] = s

# Function to process data and generate RDF triples
def process_data(input_file, output_file):
    """Transforms TTL data into structured RDF objects."""
    # Load the input data
    data_graph.parse(input_file, format="turtle")

    # Graph to hold the output
    output_graph = Graph()
    output_graph.bind("se", SE)
    output_graph.bind("ex", EX)
    output_graph.bind("skos", SKOS_NS)

    for result in data_graph.subjects(RDF.type, None):
        # Extract attributes
        medal_type = data_graph.value(result, EX.Medal_type)
        athlete_name = data_graph.value(result, EX.Name)
        discipline = data_graph.value(result, EX.Discipline)
        event = data_graph.value(result, EX.Event)
        country_name = data_graph.value(result, EX.Country)

        # Create URIs
        if medal_type:
            medal_uri = URIRef(f"http://example.org/medal/{medal_type.replace(' ', '_')}")
            output_graph.add((medal_uri, RDF.type, SE.MedalType))
            output_graph.add((medal_uri, RDFS.label, Literal(medal_type, datatype=XSD.string)))

        if athlete_name:
            athlete_uri = URIRef(f"http://example.org/athlete/{athlete_name.replace(' ', '_')}")
            output_graph.add((athlete_uri, RDF.type, SE.Athlete))
            output_graph.add((athlete_uri, RDFS.label, Literal(athlete_name, datatype=XSD.string)))

        if event:
            event_uri = URIRef(f"http://example.org/event/{event.replace(' ', '_')}")
            output_graph.add((event_uri, RDF.type, SE.Event))
            output_graph.add((event_uri, RDFS.label, Literal(event, datatype=XSD.string)))

        if country_name:
            country_uri = URIRef(f"http://example.org/country/{country_name.replace(' ', '_')}")
            output_graph.add((country_uri, RDF.type, SE.Country))
            output_graph.add((country_uri, RDFS.label, Literal(country_name, datatype=XSD.string)))

        # Link discipline to SKOS concept
        if discipline:
            if discipline in discipline_to_skos:
                discipline_uri = discipline_to_skos[discipline]
            else:
                discipline_uri = URIRef(f"http://example.org/discipline/{discipline.replace(' ', '_')}")
                output_graph.add((discipline_uri, RDF.type, SKOS.Concept))
                output_graph.add((discipline_uri, SKOS.prefLabel, Literal(discipline, datatype=XSD.string)))
            output_graph.add((result, EX.Discipline, discipline_uri))

        # Add links for results
        output_graph.add((result, EX.Medal_type, medal_uri))
        output_graph.add((result, EX.Name, athlete_uri))
        output_graph.add((result, EX.Discipline, discipline_uri))
        output_graph.add((result, EX.Event, event_uri))
        output_graph.add((result, EX.Country, country_uri))

    # Serialize the output graph
    output_graph.serialize(destination=output_file, format="turtle")
    print(f"Transformed data saved to {output_file}")

# File paths
input_ttl_file = "csv/data.ttl"
output_ttl_file = "og24.ttl"

# Process the data
process_data(input_ttl_file, output_ttl_file)





Transformed data saved to og24.ttl
