In [158]:
import pandas as pd
from rdflib import Graph, Literal, RDF, URIRef, Namespace
from rdflib.namespace import FOAF, XSD
import numpy as np

# Parameters and URLs
matches_url = 'C:/Users/Muhammad Ali/OneDrive/Desktop/DB_2/database2_project/matches.csv'
deliveries_url = 'C:/Users/Muhammad Ali/OneDrive/Desktop/DB_2/database2_project/deliveries.csv'
points_table_url = 'C:/Users/Muhammad Ali/OneDrive/Desktop/DB_2/database2_project/points_table.csv'


In [159]:
# Saving folder
save_path = 'C:/Users/Muhammad Ali/OneDrive/Desktop/DB_2/'

# Construct the cricket ontology namespaces not known by RDFlib
CNS = Namespace("http://cwnamespace.org/cricketOntology#")

# Load the CSV files in memory (adjust column names accordingly)
matches = pd.read_csv(matches_url)
deliveries = pd.read_csv(deliveries_url)
points_table = pd.read_csv(points_table_url)  # Fixed variable name


In [160]:
# Create the graph
g = Graph()

# Bind the namespaces to a prefix for more readable output
g.bind("foaf", FOAF)
g.bind("xsd", XSD)
g.bind("cricket", CNS)

In [161]:

# Players
striker_players = deliveries['striker_name'].unique()
bowler_players = deliveries['bowler_name'].unique()

# Combine unique values
players = np.union1d(striker_players, bowler_players)

for player_name in players:
    Player = URIRef(CNS['Player_' + player_name.replace(' ', '_')])
    g.add((Player, RDF.type, CNS.Player))
    g.add((Player, CNS['name'], Literal(player_name, datatype=XSD.string)))


In [162]:
# Save players to TTL file
with open(save_path + 'players.ttl', 'w') as file:
    file.write(g.serialize(format='turtle'))

# Create a new graph for matches
g_matches = Graph()
g_matches.bind("foaf", FOAF)
g_matches.bind("xsd", XSD)
g_matches.bind("cricket", CNS)

In [163]:

# Matches
for index, row in matches.iterrows():
    Match = URIRef(CNS['Match_' + str(row['match_number'])])
    g_matches.add((Match, RDF.type, CNS.Match))
    
       
    g_matches.add((Match, CNS['match_name'], Literal(row['match'], datatype=XSD.string)))
    g_matches.add((Match, CNS['venue'], Literal(row['match_venue'], datatype=XSD.string)))
    g_matches.add((Match, CNS['city'], Literal(row['match_city'], datatype=XSD.string)))
    g_matches.add((Match, CNS['player_of_the_match'], Literal(row['player_of_the_match'], datatype=XSD.string)))
    g_matches.add((Match, CNS['umpire1'], Literal(row['match_umpire1'], datatype=XSD.string)))
    g_matches.add((Match, CNS['umpire2'], Literal(row['match_umpire2'], datatype=XSD.string)))
    g_matches.add((Match, CNS['first_inning_score'], Literal(row['first_inning_score'], datatype=XSD.int)))
    g_matches.add((Match, CNS['second_inning_score'], Literal(row['second_inning_score'], datatype=XSD.int)))
    g_matches.add((Match, CNS['match_winner'], Literal(row['match_winner'], datatype=XSD.string)))
    g_matches.add((Match, CNS['winner_runs'], Literal(row['winner_runs'], datatype=XSD.int)))
    g_matches.add((Match, CNS['match_type'], Literal(row['match_type'], datatype=XSD.string)))
    g_matches.add((Match, CNS['winner_wickets'], Literal(row['winner_wickets'], datatype=XSD.int)))
    g_matches.add((Match, CNS['team1'], Literal(row['team1'], datatype=XSD.string)))
    g_matches.add((Match, CNS['team2'], Literal(row['team2'], datatype=XSD.string)))        
    g_matches.add((Match, CNS['date'], Literal(row['match_date'], datatype=XSD.string)))
  

In [164]:
# Save matches to TTL file
with open(save_path + 'matches.ttl', 'w') as file:
    file.write(g_matches.serialize(format='turtle'))

# Create a new graph for Deliveries
g_deliveries = Graph()
g_deliveries.bind("foaf", FOAF)
g_deliveries.bind("xsd", XSD)
g_deliveries.bind("cricket", CNS)

In [165]:
# Deliveries
for index, row in deliveries.iterrows():
    Deliveries = URIRef(CNS['Deliveries' + str(index)])
    g_deliveries.add((Deliveries, RDF.type, CNS.Deliveries))

    batsman = URIRef(CNS['Player_' + row['striker_name'].replace(' ', '_')])
    bowler = URIRef(CNS['Player_' + row['bowler_name'].replace(' ', '_')])
    match = URIRef(CNS['Match_' + str(row['match_id'])])

    g_deliveries.add((Deliveries, CNS['match'], match))
    g_deliveries.add((Deliveries, CNS['batsman'], batsman))
    g_deliveries.add((Deliveries, CNS['bowler'], bowler))
    g_deliveries.add((Deliveries, CNS['inning'], Literal(row['innings'], datatype=XSD.int)))
    g_deliveries.add((Deliveries, CNS['runs'], Literal(row['runs_off_bat'], datatype=XSD.int)))
    g_deliveries.add((Deliveries, CNS['player_dismissed'], Literal(row['player_dismissed'], datatype=XSD.string)))
    g_deliveries.add((Deliveries, CNS['wicket_type'], Literal(row['wicket_type'], datatype=XSD.string)))


In [166]:
# Save Deliveries to TTL file
with open(save_path + 'deliveries.ttl', 'w') as file:
    file.write(g_deliveries.serialize(format='turtle'))

# Create a new graph for points table
g_points_table = Graph()
g_points_table.bind("foaf", FOAF)
g_points_table.bind("xsd", XSD)
g_points_table.bind("cricket", CNS)

In [167]:

# Points Table
for index, row in points_table.iterrows():
   
    Team = URIRef(CNS['Team_' + row['team'].replace(' ', '_')])    
    g_points_table.add((Team, RDF.type, CNS.Team))
    g_points_table.add((Team, CNS['team_name'], Literal(row['team'], datatype=XSD.string)))
    g_points_table.add((Team, CNS['team_points'], Literal(row['team_points'], datatype=XSD.int)))
    g_points_table.add((Team, CNS['matches_played'], Literal(row['matches_played'], datatype=XSD.int)))
    g_points_table.add((Team, CNS['matches_won'], Literal(row['matches_won'], datatype=XSD.int)))
    g_points_table.add((Team, CNS['matches_lost'], Literal(row['matches_lost'], datatype=XSD.int)))
    g_points_table.add((Team, CNS['team_ranking'], Literal(row['team_ranking'], datatype=XSD.int)))
    g_points_table.add((Team, CNS['no_result'], Literal(row['no_result'], datatype=XSD.int)))
    g_points_table.add((Team, CNS['team_net_run_rate'], Literal(row['team_net_run_rate'], datatype=XSD.double)))


# Save points table to TTL file
with open(save_path + 'points_table.ttl', 'w') as file:
    file.write(g_points_table.serialize(format='turtle'))