In [1]:
!pip install rdflib
!pip install pyyaml



In [2]:
import os
import yaml
from rdflib import Graph, Namespace, URIRef, Literal
from rdflib.namespace import RDF, XSD, FOAF, SKOS

In [3]:
# Parameters and URLs
path = str(os.getcwd())
matchUrl = '/content/1043989.yaml'  # Update path to match your setup
savePath = '/content/Innings_Serialization'  # Path to save RDF files

In [4]:
# Define namespaces
CRI = Namespace("https://www.dei.unipd.it/lodb/cri/")

In [5]:
# Load the YAML file in memory
with open(matchUrl, 'r') as file:
    matchData = yaml.safe_load(file)

In [6]:
# Check the loaded match data (debugging step)
print(matchData)

{'meta': {'data_version': 0.91, 'created': datetime.date(2017, 2, 17), 'revision': 1}, 'info': {'balls_per_over': 6, 'dates': [datetime.date(2017, 2, 17)], 'gender': 'female', 'match_type': 'T20', 'outcome': {'by': {'runs': 40}, 'winner': 'Australia'}, 'overs': 20, 'player_of_match': ['EJ Villani'], 'players': {'Australia': ['MM Lanning', 'BL Mooney', 'A Gardner', 'EJ Villani', 'AJ Blackwell', 'AJ Healy', 'JL Jonassen', 'M Strano', 'ML Schutt', 'A Wellington', 'KM Beams'], 'New Zealand': ['SW Bates', 'RH Priest', 'AM Peterson', 'AE Satterthwaite', 'KJ Martin', 'EM Bermingham', 'KT Perkins', 'EC Perry', 'HR Huddleston', 'LMM Tahuhu', 'LM Kasperek']}, 'registry': {'people': {'A Gardner': 'bc969efb', 'A Wellington': '9b3bcca4', 'AE Satterthwaite': '72b7ed1e', 'AJ Barrow': 'a67aa9df', 'AJ Blackwell': 'b9164a9b', 'AJ Healy': '321644de', 'AM Peterson': 'ae2c396f', 'BL Mooney': '52d1dbc8', 'EC Perry': '0de3060f', 'EJ Villani': 'cde96d97', 'EM Bermingham': '180240d4', 'GA Abood': 'a4af5528', '

In [7]:
# Create the graph
g = Graph()
# Bind the namespaces to a prefix for more readable output
g.bind("foaf", FOAF)
g.bind("xsd", XSD)
g.bind("skos", SKOS)
g.bind("cri", CRI)

In [8]:
# Get the filename without the extension
filename_without_extension = os.path.splitext(os.path.basename(matchUrl))[0]

In [9]:
# Create the node to add to the graph
# Creating an instance of a Match
# The node has the namespace + the match id(filename) as URI
idM = "match_" + filename_without_extension
Match = URIRef(CRI[idM])

In [10]:
# Add triples using store's add() method
g.add((Match, RDF.type, CRI.Match))  # Add triple for Match type

<Graph identifier=Nc38c3d415b0a4c2f9ecf1bc17127b162 (<class 'rdflib.graph.Graph'>)>

In [11]:
# Process innings data
from rdflib import Graph, Literal, RDF, URIRef
from rdflib.namespace import XSD

# Assuming matchData and CRI namespace are already defined
innings_data = matchData.get('innings', [])
for inning_index, inning in enumerate(innings_data, start=1):
    inning_key = list(inning.keys())[0]  # Get key ("1st innings" or "2nd innings")
    inning_info = inning[inning_key]

    # Create Inning and Team URIs
    idInning = f"innings{inning_index}_{filename_without_extension}"
    Inning = URIRef(CRI[idInning])
    team_name = inning_info['team'].replace(' ', '')
    BattingTeam = URIRef(CRI[f"team{team_name}"])

    # Add triples for the inning
    g.add((Inning, RDF.type, CRI.Innings))
    g.add((Inning, CRI.hasBattingTeam, BattingTeam))

    # Add totalBallsDelivery as xsd:integer
    total_deliveries = len(inning_info.get('deliveries', []))
    g.add((Inning, CRI.totalBallsDelivery, Literal(total_deliveries, datatype=XSD.Integer)))

    # Link innings to the match
    match_uri = URIRef(CRI[f"match{filename_without_extension}"])
    if inning_index == 1:
        g.add((match_uri, CRI.hasFirstInnings, Inning))
        g.add((Inning, CRI.isFirstInningsOf, match_uri))
    elif inning_index == 2:
        g.add((match_uri, CRI.hasSecondInnings, Inning))
        g.add((Inning, CRI.isSecondInningsOf, match_uri))

# Print the serialized RDF output to check
print("Graph serialized to Turtle format:")
print(g.serialize(format="turtle"))

# Serialize RDF graph to a Turtle file
output_file = os.path.join(savePath, f"{filename_without_extension}_innings.ttl")
os.makedirs(os.path.dirname(output_file), exist_ok=True)
g.serialize(destination=output_file, format="turtle")

print(f"RDF triples serialized to {output_file}")

Graph serialized to Turtle format:
@prefix cri: <https://www.dei.unipd.it/lodb/cri/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

cri:match_1043989 a cri:Match .

cri:innings1_1043989 a cri:Innings ;
    cri:hasBattingTeam cri:teamAustralia ;
    cri:isFirstInningsOf cri:match1043989 ;
    cri:totalBallsDelivery "122"^^xsd:Integer .

cri:innings2_1043989 a cri:Innings ;
    cri:hasBattingTeam cri:teamNewZealand ;
    cri:isSecondInningsOf cri:match1043989 ;
    cri:totalBallsDelivery "121"^^xsd:Integer .

cri:match1043989 cri:hasFirstInnings cri:innings1_1043989 ;
    cri:hasSecondInnings cri:innings2_1043989 .


RDF triples serialized to /content/Innings_Serialization/1043989_innings.ttl


  g.add((Inning, CRI.totalBallsDelivery, Literal(total_deliveries, datatype=XSD.Integer)))
