# Populating cricket ontology

In [None]:
!pip install rdflib
!pip install pyyaml



In [None]:
# required libraries
import pandas as pd
import yaml
import os
from pathlib import Path
# Load the required libraries
from rdflib import Graph, Literal, RDF, URIRef, Namespace
# rdflib knows about some namespaces, like FOAF
from rdflib.namespace import FOAF, XSD, SKOS
# CHECK DATE
import datetime

In [None]:
# parameters and URLs
path = str(os.getcwd())
#Path(os.path.abspath(os.getcwd())).parent.absolute()
matchUrl = path + '/data/matches/noResult.yaml'

# saving folder
savePath =  path + '/data/rdf/cricketDB/'

In [None]:
# Construct the country and the movie ontology namespaces not known by RDFlib
CRI = Namespace("https://www.dei.unipd.it/lodb/cri/")

## Outcome

Try to find a way to upload bunch of them all at once... maybe iteration???

In [None]:
# Load the YAML files in memory
with open(matchUrl, 'r') as file:
   matchData = yaml.safe_load(file)

matchData['info']['outcome']

{'result': 'no result'}

In [None]:
#create the graph
g = Graph()

# Bind the namespaces to a prefix for more readable output
g.bind("foaf", FOAF)
g.bind("xsd", XSD)
g.bind("skos", SKOS)
g.bind("cri", CRI)

In [None]:
# Get the filename without the extension
filename_without_extension = os.path.splitext(os.path.basename(matchUrl))[0]
filename_without_extension

'noResult'

In [None]:
%%time
#measure execution time

# Create the node to add to the Graph

# Creating an instance of a Match
# the node has the namespace + the match id(filename) as URI
idM = "match"+filename_without_extension
Match = URIRef(CRI[idM])
# Add triples using store's add() method.
g.add((Match, RDF.type, CRI.Match))

# Creating an instance of an Outcome
# the node has the namespace + the outcome id(filename) as URI
idO = "outcome"+filename_without_extension
Outcome = URIRef(CRI[idO])
# Add triples using store's add() method.
g.add((Outcome, RDF.type, CRI.Outcome))

# Creating 2 instances of a Team
# the nodes have the namespace + the team name as URI
idT0 = "team"+(matchData['info']['teams'][0]).replace(" ","")
idT1 = "team"+(matchData['info']['teams'][1]).replace(" ","")
Team0 = URIRef(CRI[idT0])
Team1 = URIRef(CRI[idT1])
# Add triples using store's add() method.
g.add((Team0, RDF.type, CRI.Team))
g.add((Team1, RDF.type, CRI.Team))

# Checking if match has been won or not
if "by" in matchData['info']['outcome']:
    # Creating an instance of a Win
    # the node has the namespace + the win id(filename) as URI
    idW = "win"+filename_without_extension
    Win = URIRef(CRI[idW])
    # Add triples stating that is a win
    g.add((Win, RDF.type, CRI.Win))
    g.add((Outcome, CRI['outcomeDescription'], Literal("win", datatype=XSD.string)))
    # Adding triple that states which Team is the winner
    idWinner = "team"+(matchData['info']['outcome']['winner']).replace(" ","")
    Winner = URIRef(CRI[idWinner])
    g.add((Win, CRI['wonBy'], Winner))

    if "runs" in matchData['info']['outcome']['by']:
      # Creating an instance of a WinByRuns
      # the node has the namespace + the win id(filename) as URI
      idRuns = "winByRuns"+filename_without_extension
      WinByRuns = URIRef(CRI[idRuns])
      # Add triples using store's add() method.
      g.add((WinByRuns, RDF.type, CRI.WinByRuns))
      g.add((WinByRuns, CRI['runs'], Literal(matchData['info']['outcome']['by']['runs'], datatype=XSD.integer)))
      g.add((Match, CRI['hasOutcome'], WinByRuns))
    elif "wickets" in matchData['info']['outcome']['by']:
      # Creating an instance of a WinByRuns
      # the node has the namespace + the win id(filename) as URI
      idWickets = "winByWickets"+filename_without_extension
      WinByWickets = URIRef(CRI[idWickets])
      # Add triples using store's add() method.
      g.add((WinByWickets, RDF.type, CRI.WinByWickets))
      g.add((WinByWickets, CRI['wickets'], Literal(matchData['info']['outcome']['by']['wickets'], datatype=XSD.integer)))
      g.add((Match, CRI['hasOutcome'], WinByWickets))

elif "result" in matchData['info']['outcome']:
  if matchData['info']['outcome']['result'] == "tie":
    # Creating an instance of a Tie
    # the node has the namespace + the Tie id(filename) as URI
    idTie = "tie"+filename_without_extension
    Tie = URIRef(CRI[idTie])
    g.add((Tie, RDF.type, CRI.Tie))
    g.add((Match, CRI['hasOutcome'], Tie))
    g.add((Outcome, CRI['outcomeDescription'], Literal(matchData['info']['outcome']['result'], datatype=XSD.string)))
    if "eliminator" in matchData['info']['outcome']:
      idEliminator = "team"+(matchData['info']['outcome']['eliminator']).replace(" ","")
      Eliminator = URIRef(CRI[idEliminator])
      g.add((Tie, CRI['hasTieBreakWinner'], Eliminator))
    elif "bowl_out" in matchData['info']['outcome']:
      idBowlOut = "team"+(matchData['info']['outcome']['bowl_out']).replace(" ","")
      BowlOut = URIRef(CRI[idBowlOut])
      g.add((Tie, CRI['hasTieBreakWinner'], BowlOut))
  elif matchData['info']['outcome']['result'] == "no result":
    g.add((Match, CRI['hasOutcome'], Outcome))
    g.add((Outcome, CRI['outcomeDescription'], Literal(matchData['info']['outcome']['result'], datatype=XSD.string)))

CPU times: user 273 µs, sys: 0 ns, total: 273 µs
Wall time: 279 µs


In [None]:
%%time
# print all the data in the Turtle format
print("--- saving serialization ---")
# may have to change to match
with open(savePath + 'newoutcomesNoResult-noString.ttl', 'w') as file:
    file.write(g.serialize(format='turtle'))
    #.decode("utf-8")


--- saving serialization ---
CPU times: user 1.37 ms, sys: 837 µs, total: 2.21 ms
Wall time: 4.19 ms
