In [75]:
# required libraries
import pandas as pd
import os
from pathlib import Path
# Load the required libraries
from rdflib import Graph, Literal, RDF, URIRef, Namespace
# rdflib knows about some namespaces, like FOAF
from rdflib.namespace import FOAF, XSD
# CHECK DATE 
import datetime
import csv
from io import StringIO

In [87]:
# Define namespaces
cricket_ns = Namespace("http://www.example.com/cricket#")
owl_ns = Namespace("http://www.w3.org/2002/07/owl#")

In [88]:
# Initialize RDF graph
graph = Graph()

# Load data from CSV files
deliveries_data = csv.DictReader(open('deliveries.csv'))
with open('matches.csv', mode='r', encoding='utf-8-sig') as file:
    content = file.read()
file_obj = StringIO(content)
matches_data = csv.DictReader(file_obj)
points_table_data = csv.DictReader(open('points_table.csv'))
first_row = next(points_table_data)
print(first_row.keys())

dict_keys(['team_ranking', 'team', 'matches_played', 'matches_won', 'matches_lost', 'Tie', 'no_result', 'team_points', 'team_net_run_rate'])


In [89]:
# Define ontology classes
Delivery = cricket_ns.Delivery
DotBall = cricket_ns.DotBall
Runs = cricket_ns.Runs
Wicket = cricket_ns.Wicket
Bowled = cricket_ns.Bowled
Caught = cricket_ns.Caught
LBW = cricket_ns.LBW
RunOut = cricket_ns.RunOut
Stumped = cricket_ns.Stumped
Inning = cricket_ns.Inning
Match = cricket_ns.Match
Final = cricket_ns.Final
GroupMatch = cricket_ns.GroupMatch
KnockOutMatch = cricket_ns.KnockOutMatch
Person = cricket_ns.Person
Player = cricket_ns.Player
Batter = cricket_ns.Batter
Bowler = cricket_ns.Bowler
Umpire = cricket_ns.Umpire
PointsTable = cricket_ns.PointsTable
Score = cricket_ns.Score
Team = cricket_ns.Team
Venue = cricket_ns.Venue
Stadium = cricket_ns.Stadium

# Define ontology object properties
bowledBy = cricket_ns.bowledBy
deliveryBelongsToMatch = cricket_ns.deliveryBelongsToMatch
facedBy = cricket_ns.facedBy
hasOfficiated = cricket_ns.hasOfficiated
hasTeam = cricket_ns.hasTeam
hasWinner = cricket_ns.hasWinner
inningPlayedBy = cricket_ns.inningPlayedBy
played = cricket_ns.played
playedAt = cricket_ns.playedAt

# Define ontology data properties
batter = cricket_ns.batter
bowler = cricket_ns.bowler
bowler_name = cricket_ns.bowler_name
city_name = cricket_ns.city_name
delivery_inning = cricket_ns.delivery_inning
delivery_number = cricket_ns.delivery_number
batting_team = cricket_ns.batting_team
bowling_team = cricket_ns.bowling_team
first_inning_batting_team = cricket_ns.first_inning_batting_team
first_inning_bowling_team = cricket_ns.first_inning_bowling_team
first_inning_score = cricket_ns.first_inning_score
first_umpire = cricket_ns.first_umpire
inning_number = cricket_ns.inning_number
match = cricket_ns.match
match_city = cricket_ns.match_city
match_date = cricket_ns.match_date
match_number = cricket_ns.match_number
match_type = cricket_ns.match_type
match_umpire1 = cricket_ns.match_umpire1
match_umpire2 = cricket_ns.match_umpire2
match_venue = cricket_ns.match_venue
match_winner = cricket_ns.match_winner
matches_lost = cricket_ns.matches_lost
matches_played = cricket_ns.matches_played
matches_won = cricket_ns.matches_won
no_result = cricket_ns.no_result
non_striker_name = cricket_ns.non_striker_name
player_dismissed = cricket_ns.player_dismissed
player_of_the_match = cricket_ns.player_of_the_match
runs_scored = cricket_ns.runs_scored
second_inning_score = cricket_ns.second_inning_score
second_umpire = cricket_ns.second_umpire
stadium_name = cricket_ns.stadium_name
striker_name = cricket_ns.striker_name
team1 = cricket_ns.team1
team2 = cricket_ns.team2
team_name = cricket_ns.team_name
team_net_run_rate = cricket_ns.team_net_run_rate
team_points = cricket_ns.team_points
team_ranking = cricket_ns.team_ranking
third_umpire = cricket_ns.third_umpire
wicket_taken = cricket_ns.wicket_taken
wicket_type_taken = cricket_ns.wicket_type_taken
winner_runs = cricket_ns.winner_runs
winner_wickets = cricket_ns.winner_wickets

In [90]:
# Process deliveries data
for row in deliveries_data:
    # Add triples for deliveries
    delivery_uri = cricket_ns[row['ball']]
    graph.add((delivery_uri, RDF.type, Delivery))
    #graph.add((delivery_uri, deliveryBelongsToMatch, cricket_ns[row['match']]))
    graph.add((delivery_uri, bowledBy, cricket_ns[row['bowler_name']]))
    graph.add((delivery_uri, facedBy, cricket_ns[row['striker_name']]))
    graph.add((delivery_uri, bowling_team, cricket_ns[row['bowling_team']]))
    graph.add((delivery_uri, batting_team, cricket_ns[row['batting_team']]))
    graph.add((delivery_uri, player_dismissed, cricket_ns[row['player_dismissed']]))
    

    if row['runs_off_bat'] == '0':
        graph.add((delivery_uri, RDF.type, DotBall))
    else:
        runs_uri = cricket_ns[f"runs_{row['runs_off_bat']}"]
        graph.add((delivery_uri, RDF.type, Runs))
        graph.add((delivery_uri, RDF.type, runs_uri))

    if row['wicket_type']:
        wicket_uri = cricket_ns[row['wicket_type']]
        graph.add((delivery_uri, RDF.type, Wicket))
        graph.add((delivery_uri, RDF.type, wicket_uri))

In [91]:
# Process matches data

for row in matches_data:
    # Add triples for matches
    match_uri = cricket_ns[row['match']]
    graph.add((match_uri, RDF.type, Match))
    graph.add((match_uri, match_number, cricket_ns[row['match_number']]))
    graph.add((match_uri, hasTeam, cricket_ns[row['team1']]))
    graph.add((match_uri, hasTeam, cricket_ns[row['team2']]))
    graph.add((match_uri, match_date, Literal(row['match_date'], datatype=XSD.datetime)))
    graph.add((match_uri, match_venue, cricket_ns[row['match_venue']]))
    graph.add((match_uri, match_city, cricket_ns[row['match_city']]))
    graph.add((match_uri, first_inning_score, cricket_ns[row['first_inning_score']]))
    graph.add((match_uri, second_inning_score, cricket_ns[row['second_inning_score']]))
    graph.add((match_uri, match_umpire1, cricket_ns[row['match_umpire1']]))
    graph.add((match_uri, match_umpire2, cricket_ns[row['match_umpire2']]))
    graph.add((match_uri, player_of_the_match, cricket_ns[row['player_of_the_match']]))
    graph.add((match_uri, hasWinner, cricket_ns[row['match_winner']]))
    graph.add((match_uri, winner_runs, Literal(row['winner_runs'], datatype=XSD.int)))
    graph.add((match_uri, winner_wickets, Literal(row['winner_wickets'], datatype=XSD.int)))
    graph.add((match_uri, RDF.type, cricket_ns[row['match_type']]))

In [92]:
# Process points table data
for row in points_table_data:
    # Add triples for points table
    team_uri = cricket_ns[row['team']]
    graph.add((team_uri, RDF.type, Team))
    graph.add((team_uri, matches_played, Literal(row['matches_played'], datatype=XSD.int)))
    graph.add((team_uri, matches_won, Literal(row['matches_won'], datatype=XSD.int)))
    graph.add((team_uri, matches_lost, Literal(row['matches_lost'], datatype=XSD.int)))
    graph.add((team_uri, no_result, Literal(row['no_result'], datatype=XSD.int)))
    graph.add((team_uri, team_points, Literal(row['team_points'], datatype=XSD.int)))
    graph.add((team_uri, team_net_run_rate, Literal(row['team_net_run_rate'], datatype=XSD.double)))
    graph.add((team_uri, team_ranking, Literal(row['team_ranking'], datatype=XSD.int)))

In [93]:
graph.serialize(destination='output.ttl', format='turtle')

<Graph identifier=N96c3ef49440a43a5a8097b0d816f78a3 (<class 'rdflib.graph.Graph'>)>