In [1]:
!pip install rdflib
!pip install pyyaml



In [2]:
import os
import yaml
from rdflib import Graph, Namespace, URIRef, Literal
from rdflib.namespace import RDF, XSD, FOAF, SKOS

In [3]:
# Parameters and URLs
path = str(os.getcwd())
matchUrl = '/content/355988.yaml'  # Update path to match your setup
savePath = '/content/Innings_Serialization'  # Path to save RDF files

In [4]:
# Define namespaces
CRI = Namespace("https://www.dei.unipd.it/lodb/cri/")

In [5]:
# Load the YAML file in memory
with open(matchUrl, 'r') as file:
    matchData = yaml.safe_load(file)

In [6]:
# Check the loaded match data (debugging step)
print(matchData)

{'meta': {'data_version': 0.91, 'created': datetime.date(2014, 5, 31), 'revision': 3}, 'info': {'balls_per_over': 6, 'city': 'Nottingham', 'dates': [datetime.date(2009, 6, 18)], 'gender': 'female', 'match_type': 'T20', 'neutral_venue': 1, 'outcome': {'by': {'runs': 52}, 'winner': 'New Zealand'}, 'overs': 20, 'player_of_match': ['AL Watkins'], 'players': {'India': ['PG Raut', 'Anjum Chopra', 'M Raj', 'H Kaur', 'R Malhotra', 'A Sharma', 'S Naik', 'R Dhar', 'P Roy', 'J Goswami', 'G Sultana'], 'New Zealand': ['LR Doolan', 'SW Bates', 'AL Watkins', 'AE Satterthwaite', 'NJ Browne', 'SJ McGlashan', 'SFM Devine', 'RH Priest', 'KL Pulford', 'SEA Ruck', 'SJ Tsukigawa']}, 'registry': {'people': {'Anjum Chopra': '57a7b064', 'A Sharma': 'a3ead8da', 'AE Satterthwaite': '72b7ed1e', 'AL Watkins': 'd395e33c', 'Asad Rauf': '861606b7', 'BC Broad': 'd6a7dd38', 'G Sultana': 'ff227198', 'H Kaur': '53cd8da6', 'IJ Gould': 'a9dfdf6c', 'J Goswami': '84f1ae7f', 'KL Pulford': '25654c89', 'LR Doolan': '4ab6d138', 

In [7]:
# Create the graph
g = Graph()
# Bind the namespaces to a prefix for more readable output
g.bind("foaf", FOAF)
g.bind("xsd", XSD)
g.bind("skos", SKOS)
g.bind("cri", CRI)

In [8]:
# Get the filename without the extension
filename_without_extension = os.path.splitext(os.path.basename(matchUrl))[0]

In [9]:
# Create the node to add to the graph
# Creating an instance of a Match
# The node has the namespace + the match id(filename) as URI
idM = "match_" + filename_without_extension
Match = URIRef(CRI[idM])

In [10]:
# Add triples using store's add() method
g.add((Match, RDF.type, CRI.Match))  # Add triple for Match type

<Graph identifier=Nf1d8706e57fd4a1fa5ef447424fbb4ba (<class 'rdflib.graph.Graph'>)>

In [11]:
# Process innings data
innings_data = matchData.get('innings', [])
for inning_index, inning in enumerate(innings_data, start=1):
    inning_key = list(inning.keys())[0]  # Get key ("1st innings" or "2nd innings")
    inning_info = inning[inning_key]

    # Create Inning and Team instances
    idInning = f"innings_{filename_without_extension}_{inning_index}"
    Inning = URIRef(CRI[idInning])
    BattingTeam = URIRef(CRI[f"Team_{inning_info['team'].replace(' ', '_')}"])

    # Add triples for the inning

    g.add((Inning, RDF.type, CRI.Innings))
    g.add((Inning, CRI.hasBattingTeam, BattingTeam))
    g.add((Inning, CRI.totalBallsDelivery, Literal(len(inning_info.get('deliveries', [])), datatype=XSD.integer)))

    # Link innings to the match
    if inning_index == 1:
        g.add((Match, CRI.hasFirstInnings, Inning))
        g.add((Inning, CRI.isFirstInningsOf, Match))
    elif inning_index == 2:
        g.add((Match, CRI.hasSecondInnings, Inning))
        g.add((Inning, CRI.isSecondInningsOf, Match))

# Serialize RDF graph to a Turtle file
output_file = os.path.join(savePath, f"{filename_without_extension}_innings.ttl")
os.makedirs(os.path.dirname(output_file), exist_ok=True)
g.serialize(destination=output_file, format="turtle")

print(f"RDF triples serialized to {output_file}")

RDF triples serialized to /content/Innings_Serialization/355988_innings.ttl
