In [3]:
%pip install rdflib

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [22]:
json_file_path = "triplets/merged.json"
ttl_file_path = "csv/data.ttl"
output_ttl_file = "og24_pre_data.ttl"
updated_ttl_file = "og24_data.ttl"
skos_file = "se_skos.ttl"

In [5]:
from rdflib import Graph, URIRef, Literal, Namespace
from rdflib.namespace import RDF, RDFS, SKOS, XSD
import json

# Define namespaces
SE = Namespace("http://example.org/olympics2024/schema#")
EX = Namespace("http://example.org/vocab/")
SKOS_NS = Namespace("http://www.w3.org/2004/02/skos/core#")


In [24]:
# Initialize graphs
data_graph = Graph()
data_graph.bind("se", SE)
data_graph.bind("ex", EX)

# Map disciplines to SKOS concepts from se_skos.ttl
skos_graph = Graph()

try:
    skos_graph.parse(skos_file, format="turtle")
    print("Successfully loaded SKOS file.")
except Exception as e:
    print(f"Error loading SKOS file: {e}")
    exit(1)

discipline_to_skos = {}
for s, p, o in skos_graph.triples((None, RDF.type, SKOS.Concept)):
    label = skos_graph.value(s, SKOS.prefLabel)
    if label:
        # Normalize SKOS labels by converting to lowercase and stripping whitespace
        discipline_to_skos[label.toPython().strip().lower()] = s

def process_data(input_file, output_file):
    """Transforms TTL data into structured RDF objects with SKOS discipline matching."""
    try:
        data_graph.parse(input_file, format="turtle")
        print("Successfully loaded input TTL file.")
    except Exception as e:
        print(f"Error loading input TTL file: {e}")
        return

    # Graph to hold the output
    output_graph = Graph()
    output_graph.bind("se", SE)
    output_graph.bind("ex", EX)
    output_graph.bind("skos", SKOS)

    print("Available SKOS disciplines:")
    for label in discipline_to_skos.keys():
        print(f"- {label}")

    for result in data_graph.subjects(predicate=EX.Medal_type):
        print(f"Processing result: {result}")
        output_graph.add((result, RDF.type, SE.Result))

        # Extract attributes
        medal_type = data_graph.value(result, EX.Medal_type)
        athlete_name = data_graph.value(result, EX.Name)
        discipline = data_graph.value(result, EX.Discipline)
        event = data_graph.value(result, EX.Event)
        country_name = data_graph.value(result, EX.Country)

        # Add medal type
        if medal_type:
            medal_uri = URIRef(f"http://example.org/medal/{medal_type.replace(' ', '_')}")
            output_graph.add((medal_uri, RDF.type, SE.MedalType))
            output_graph.add((medal_uri, RDFS.label, Literal(medal_type, datatype=XSD.string)))
            output_graph.add((result, SE.value, medal_uri))  # Link result to medal type

        # Add athlete
        if athlete_name:
            athlete_uri = URIRef(f"http://example.org/athlete/{athlete_name.replace(' ', '_')}")
            output_graph.add((athlete_uri, RDF.type, SE.Athlete))
            output_graph.add((athlete_uri, RDFS.label, Literal(athlete_name, datatype=XSD.string)))
            output_graph.add((result, SE.athleteOfResult, athlete_uri))  # Link result to athlete

        # Add event
        if event:
            event_uri = URIRef(f"http://example.org/event/{event.replace(' ', '_')}")
            output_graph.add((event_uri, RDF.type, SE.Epreuve))
            output_graph.add((event_uri, RDFS.label, Literal(event, datatype=XSD.string)))
            output_graph.add((result, SE.epreuve, event_uri))  # Link result to event

        # Add country
        if country_name:
            country_uri = URIRef(f"http://example.org/country/{country_name.replace(' ', '_')}")
            output_graph.add((country_uri, RDF.type, SE.Country))
            output_graph.add((country_uri, RDFS.label, Literal(country_name, datatype=XSD.string)))

            # Link athlete or team to their country
            if athlete_name:
                output_graph.add((athlete_uri, SE.representsCountry, country_uri))

        # Match discipline to SKOS concept and link to event
        if discipline:
            normalized_discipline = discipline.toPython().strip().lower()
            if normalized_discipline in discipline_to_skos:
                discipline_uri = discipline_to_skos[normalized_discipline]
                # Link the event to the SKOS concept (sport)
                output_graph.add((event_uri, SE.epreuve, discipline_uri))
                print(f"Linked event '{event}' to sport '{normalized_discipline}'.")
            else:
                print(f"Warning: Discipline '{discipline}' not found in SKOS graph. Skipping.")

    # Serialize the output graph
    try:
        output_graph.serialize(destination=output_file, format="turtle")
        print(f"Transformed data saved to {output_file}")
    except Exception as e:
        print(f"Error saving output TTL file: {e}")


# Process the data
process_data(ttl_file_path, output_ttl_file)





Successfully loaded SKOS file.
Successfully loaded input TTL file.
Available SKOS disciplines:
- individual sports
- team sports
- water sports
- combat sports
- gymnastics
- cycling sports
- athletics sports
- diving
- wrestling
- taekwondo
- athletics
- swimming
- boxing
- archery
- weightlifting
- skateboarding
- judo
- rhythmic gymnastics
- canoe sprint
- shooting
- sport climbing
- badminton
- surfing
- modern pentathlon
- table tennis
- cycling bmx racing
- cycling mountain bike
- canoe slalom
- artistic gymnastics
- fencing
- cycling road
- trampoline gymnastics
- sailing
- marathon swimming
- equestrian
- breaking
- cycling track
- 3x3 basketball
- basketball
- handball
- hockey
- rowing
- football
- rugby sevens
- tennis
- triathlon
- beach volleyball
- volleyball
- cycling bmx freestyle
- artistic swimming
- water polo
- golf
Processing result: http://example.org/results/0
Linked event 'Men's 3m Springboard' to sport 'diving'.
Processing result: http://example.org/results/1
L

In [25]:
g = Graph()
g.bind("se", SE)
g.bind("ex", EX)

def add_sports_locations(json_file, rdf_file, skos_file, output_file):
    """Adds venues to the RDF graph only when the corresponding sport exists in the SKOS file."""
    # Load the existing RDF graph
    g = Graph()
    try:
        g.parse(rdf_file, format="turtle")
        print("Successfully loaded existing RDF file.")
    except Exception as e:
        print(f"Error loading RDF file: {e}")
        return

    # Load SKOS graph and build a mapping of sports
    skos_graph = Graph()
    try:
        skos_graph.parse(skos_file, format="turtle")
        print("Successfully loaded SKOS file.")
    except Exception as e:
        print(f"Error loading SKOS file: {e}")
        return

    skos_sports = {
        label.toPython().strip().lower(): s
        for s, p, o in skos_graph.triples((None, RDF.type, SKOS.Concept))
        if (label := skos_graph.value(s, SKOS.prefLabel))
    }

    # Load JSON data
    try:
        with open(json_file, "r") as file:
            data = json.load(file)
            print("Successfully loaded JSON file.")
    except Exception as e:
        print(f"Error loading JSON file: {e}")
        return

    # Process relevant JSON entries
    for entry in data:
        if entry["type"] == "location":
            sport_name = entry["head"]  # Sport name
            venue_name = entry["tail"]  # Venue name

            # Normalize the sport name
            normalized_sport_name = sport_name.strip().lower()

            # Check if the sport exists in the SKOS file
            if normalized_sport_name in skos_sports:
                sport_uri = skos_sports[normalized_sport_name]
                venue_uri = URIRef(f"http://example.org/venue/{venue_name.replace(' ', '_')}")

                print(f"Sport '{sport_name}' found in SKOS. Adding venue '{venue_name}'.")

                # Add venue to the graph
                if (venue_uri, RDF.type, SE.Venue) not in g:
                    g.add((venue_uri, RDF.type, SE.Venue))
                    g.add((venue_uri, RDFS.label, Literal(venue_name, datatype=XSD.string)))

                # Link sport (from SKOS) to venue
                g.add((sport_uri, SE.venue, venue_uri))
            else:
                print(f"Warning: Sport '{sport_name}' not found in SKOS. Skipping venue '{venue_name}'.")

    # Serialize the updated graph
    try:
        g.serialize(destination=output_file, format="turtle")
        print(f"Updated RDF data saved to {output_file}")
    except Exception as e:
        print(f"Error saving updated RDF file: {e}")


# Add sports locations
add_sports_locations(json_file_path, output_ttl_file, skos_file ,updated_ttl_file)

Successfully loaded existing RDF file.
Successfully loaded SKOS file.
Successfully loaded JSON file.
Sport 'artistic swimming' found in SKOS. Adding venue 'Aquatics Centre'.
Sport 'water polo' found in SKOS. Adding venue 'Aquatics Centre'.
Sport 'diving' found in SKOS. Adding venue 'Aquatics Centre'.
Sport 'athletics' found in SKOS. Adding venue 'Stade de France'.
Sport 'basketball' found in SKOS. Adding venue 'Pierre Mauroy Stadium'.
Sport 'handball' found in SKOS. Adding venue 'Pierre Mauroy Stadium'.
Sport 'judo' found in SKOS. Adding venue 'Champ-de-Mars Arena'.
Sport 'wrestling' found in SKOS. Adding venue 'Champ-de-Mars Arena'.
Updated RDF data saved to og24_data.ttl
