<h2>Data Cleaning</h2>

In [1]:
import pandas as pd

circuits_df = pd.read_csv('./circuits.csv')
constructor_results_df = pd.read_csv('./constructor_results.csv')
constructor_standings_df = pd.read_csv('./constructor_standings.csv')
constructors_df = pd.read_csv('./constructors.csv')
driver_standings_df = pd.read_csv('./driver_standings.csv')
drivers_df = pd.read_csv('./drivers.csv')
lap_times_df = pd.read_csv('./lap_times.csv')
pit_stops_df = pd.read_csv('./pit_stops.csv')
qualifying_df = pd.read_csv('./qualifying.csv')
races_df = pd.read_csv('./races.csv')
results_df = pd.read_csv('./results.csv')
seasons_df = pd.read_csv('./seasons.csv')
sprint_results_df = pd.read_csv('./sprint_results.csv')
status_df = pd.read_csv('./status.csv')

dataframes = {'circuits_df': circuits_df, 'constructor_results_df': constructor_results_df, 
              'constructor_standings_df': constructor_standings_df, 'constructors_df': constructors_df, 
              'driver_standings_df': driver_standings_df, 'drivers_df': drivers_df, 
              'lap_times_df': lap_times_df, 'pit_stops_df': pit_stops_df, 
              'qualifying_df': qualifying_df, 'races_df': races_df, 
              'results_df': results_df, 'seasons_df': seasons_df, 
              'sprint_results_df': sprint_results_df, 'status_df': status_df}

columns_with_N = {}

for name, df in dataframes.items():
    df.replace('\\N', None, inplace=True)
    columns = df.columns[df.isna().any()].tolist()
    if columns:
        columns_with_N[name] = columns

for name, columns in columns_with_N.items():
    print(f"Yes, '\\N' exists in the following column(s) in {name}: {', '.join(columns)}")


Yes, '\N' exists in the following column(s) in circuits_df: alt
Yes, '\N' exists in the following column(s) in constructor_results_df: status
Yes, '\N' exists in the following column(s) in drivers_df: number, code
Yes, '\N' exists in the following column(s) in qualifying_df: q1, q2, q3
Yes, '\N' exists in the following column(s) in races_df: time, fp1_date, fp1_time, fp2_date, fp2_time, fp3_date, fp3_time, quali_date, quali_time, sprint_date, sprint_time
Yes, '\N' exists in the following column(s) in results_df: number, position, time, milliseconds, fastestLap, rank, fastestLapTime, fastestLapSpeed
Yes, '\N' exists in the following column(s) in sprint_results_df: position, time, milliseconds, fastestLap, fastestLapTime


<h2>Convert CSV to RDF</h2>

In [2]:
from rdflib import Graph, URIRef, Literal, Namespace, RDF, XSD
from rdflib.namespace import OWL, RDFS, FOAF
from rdflib.term import BNode
F1 = Namespace('http://example.com/f1/')
XSD = Namespace('http://www.w3.org/2001/XMLSchema#')
FOAF = Namespace('http://xmlns.com/foaf/0.1/')
dbo = Namespace('http://dbpedia.org/ontology/')

g = Graph()

<p style='text-align: center;'><b>1- drivers_df</b></p>

In [3]:
for index, row in drivers_df.iterrows():
    driver_uri = URIRef(F1['driver/' + str(row['driverId'])])
    g.add((driver_uri, RDF.type, FOAF.Person))  
    g.add((driver_uri, FOAF.nick, Literal(row['driverRef'])))  
    if pd.notna(row['number']): #if number is not available, it is not included in the rdf file.  
        g.add((driver_uri, F1.number, Literal(row['number'], datatype=XSD.integer)))
    if pd.notna(row['code']):
        g.add((driver_uri, F1.code, Literal(row['code'])))
    g.add((driver_uri, FOAF.givenName, Literal(row['forename']))) 
    g.add((driver_uri, FOAF.familyName, Literal(row['surname'])))  
    g.add((driver_uri, FOAF.birthday, Literal(row['dob'], datatype=XSD.date)))  
    g.add((driver_uri, F1.nationality, Literal(row['nationality'])))
    g.add((driver_uri, FOAF.homepage, URIRef(row['url'])))  

<h4>Converting duration to a number in seconds</h4>

In [4]:
def convert_to_seconds(time_str):
    parts = time_str.split(':')
    if len(parts) == 3:
        return int(parts[0]) * 3600 + int(parts[1]) * 60 + float(parts[2])
    elif len(parts) == 2:
        return int(parts[0]) * 60 + float(parts[1])
    else:
        return float(parts[0])


<p style='text-align: center;'><b>2- qualifying_df</b></p>

In [5]:
for index, row in qualifying_df.iterrows():
    qualifying_uri = URIRef(F1['qualifying/' + str(row['qualifyId'])])
    g.add((qualifying_uri, RDF.type, F1.Qualifying))
    g.add((qualifying_uri, F1.raceId, URIRef(F1['race/' + str(row['raceId'])])))
    g.add((qualifying_uri, F1.driverId, URIRef(F1['driver/' + str(row['driverId'])])))
    g.add((qualifying_uri, F1.constructorId, URIRef(F1['constructor/' + str(row['constructorId'])])))
    g.add((qualifying_uri, F1.number, Literal(row['number'], datatype=XSD.integer)))
    g.add((qualifying_uri, F1.position, Literal(row['position'], datatype=XSD.integer)))
    if pd.notna(row['q1']):
        g.add((qualifying_uri, F1.q1, Literal(convert_to_seconds(row['q1']), datatype=XSD.integer)))
    if pd.notna(row['q2']):
        g.add((qualifying_uri, F1.q2, Literal(convert_to_seconds(row['q2']), datatype=XSD.integer)))
    if pd.notna(row['q3']):
        g.add((qualifying_uri, F1.q3, Literal(convert_to_seconds(row['q3']), datatype=XSD.integer)))

<p style='text-align: center;'><b>3- constructors_df</b></p>

In [6]:
for index, row in constructors_df.iterrows():
    constructor_uri = URIRef(F1['constructor/' + str(row['constructorId'])])
    g.add((constructor_uri, RDF.type, F1.Constructor))
    g.add((constructor_uri, F1.constructorRef, Literal(row['constructorRef'])))
    g.add((constructor_uri, F1.name, Literal(row['name'])))
    g.add((constructor_uri, F1.nationality, Literal(row['nationality'])))
    g.add((constructor_uri, F1.url, URIRef(row['url'])))

<p style='text-align: center;'><b>4- circuits_df</b></p>

In [7]:
for index, row in circuits_df.iterrows():
    circuit_uri = URIRef(F1['circuit/' + str(row['circuitId'])])
    g.add((circuit_uri, RDF.type, F1.Circuit))
    g.add((circuit_uri, F1.circuitRef, Literal(row['circuitRef'])))
    g.add((circuit_uri, F1.name, Literal(row['name'])))
    g.add((circuit_uri, F1.location, Literal(row['location'])))
    g.add((circuit_uri, F1.country, Literal(row['country'])))
    g.add((circuit_uri, F1.latlng, Literal(f"{row['lat']},{row['lng']}", datatype=XSD.string)))
    if pd.notna(row['alt']):
        g.add((circuit_uri, F1.alt, Literal(row['alt'], datatype=XSD.integer)))
    g.add((circuit_uri, F1.url, URIRef(row['url'])))

<p style='text-align: center;'><b>5- constructor_results_df</b></p>

In [8]:
for index, row in constructor_results_df.iterrows():
    constructor_results_uri = URIRef(F1['constructor_results/' + str(row['constructorResultsId'])])
    g.add((constructor_results_uri, RDF.type, F1.ConstructorResults))
    g.add((constructor_results_uri, F1.raceId, URIRef(F1['race/' + str(row['raceId'])])))
    g.add((constructor_results_uri, F1.constructorId, URIRef(F1['constructor/' + str(row['constructorId'])])))
    g.add((constructor_results_uri, F1.points, Literal(row['points'], datatype=XSD.float)))
    if pd.notna(row['status']):
        g.add((constructor_results_uri, F1.status, Literal(row['status'])))

<p style='text-align: center;'><b>6- constructor_standings_df</b></p>

In [9]:
for index, row in constructor_standings_df.iterrows():
    constructor_standings_uri = URIRef(F1['constructor_standings/' + str(row['constructorStandingsId'])])
    g.add((constructor_standings_uri, RDF.type, F1.ConstructorStandings))
    g.add((constructor_standings_uri, F1.raceId, URIRef(F1['race/' + str(row['raceId'])])))
    g.add((constructor_standings_uri, F1.constructorId, URIRef(F1['constructor/' + str(row['constructorId'])])))
    g.add((constructor_standings_uri, F1.points, Literal(row['points'], datatype=XSD.float)))
    g.add((constructor_standings_uri, F1.position, Literal(row['position'], datatype=XSD.integer)))
    g.add((constructor_standings_uri, F1.positionText, Literal(row['positionText'])))
    g.add((constructor_standings_uri, F1.wins, Literal(row['wins'], datatype=XSD.integer)))

<p style='text-align: center;'><b>7- driver_standings_df</b></p>

In [10]:
for index, row in driver_standings_df.iterrows():
    driver_standings_uri = URIRef(F1['driver_standings/' + str(row['driverStandingsId'])])
    g.add((driver_standings_uri, RDF.type, F1.DriverStandings))
    g.add((driver_standings_uri, F1.raceId, URIRef(F1['race/' + str(row['raceId'])])))
    g.add((driver_standings_uri, F1.driverId, URIRef(F1['driver/' + str(row['driverId'])])))
    g.add((driver_standings_uri, F1.points, Literal(row['points'], datatype=XSD.float)))
    g.add((driver_standings_uri, F1.position, Literal(row['position'], datatype=XSD.integer)))
    g.add((driver_standings_uri, F1.positionText, Literal(row['positionText'])))
    g.add((driver_standings_uri, F1.wins, Literal(row['wins'], datatype=XSD.integer)))

<p style='text-align: center;'><b>8- lap_times_df</b></p>

In [11]:
for index, row in lap_times_df.iterrows():
    lap_times_uri = URIRef(F1['lap_times/' + str(row['lap'])])
    g.add((lap_times_uri, RDF.type, F1.LapTimes))
    g.add((lap_times_uri, F1.raceId, URIRef(F1['race/' + str(row['raceId'])])))
    g.add((lap_times_uri, F1.driverId, URIRef(F1['driver/' + str(row['driverId'])])))
    g.add((lap_times_uri, F1.lap, Literal(row['lap'], datatype=XSD.integer)))
    g.add((lap_times_uri, F1.position, Literal(row['position'], datatype=XSD.integer)))
    g.add((lap_times_uri, F1.time, Literal(convert_to_seconds(row['time']), datatype=XSD.integer)))
    g.add((lap_times_uri, F1.milliseconds, Literal(row['milliseconds'], datatype=XSD.integer)))

In [12]:
def convert_to_seconds_1(time_str):
    parts = time_str.split(':')
    if len(parts) == 3:
        return round(int(parts[0]) * 3600 + int(parts[1]) * 60 + float(parts[2]), 3)
    elif len(parts) == 2:
        return round(int(parts[0]) * 60 + float(parts[1]), 3)
    else:
        return round(float(parts[0]), 3)

<p style='text-align: center;'><b>9- pit_stops_df</b></p>

In [13]:
for index, row in pit_stops_df.iterrows():
    pit_stops_uri = URIRef(F1['pit_stops/' + str(row['stop'])])
    g.add((pit_stops_uri, RDF.type, F1.PitStops))
    if pd.notna(row['raceId']):
        g.add((pit_stops_uri, F1.raceId, URIRef(F1['race/' + str(row['raceId'])])))
    if pd.notna(row['driverId']):
        g.add((pit_stops_uri, F1.driverId, URIRef(F1['driver/' + str(row['driverId'])])))
    if pd.notna(row['stop']):
        g.add((pit_stops_uri, F1.stop, Literal(row['stop'], datatype=XSD.integer)))
    if pd.notna(row['lap']):
        g.add((pit_stops_uri, F1.lap, Literal(row['lap'], datatype=XSD.integer)))
    if pd.notna(row['time']):
        g.add((pit_stops_uri, F1.time, Literal(convert_to_seconds_1(row['time']), datatype=XSD.float)))
    if pd.notna(row['duration']):
        g.add((pit_stops_uri, F1.duration, Literal(convert_to_seconds_1(row['duration']), datatype=XSD.float)))
    if pd.notna(row['milliseconds']):
        g.add((pit_stops_uri, F1.milliseconds, Literal(row['milliseconds'], datatype=XSD.integer)))

<p style='text-align: center;'><b>10- races_df</b></p>

In [14]:
for index, row in races_df.iterrows():
    race_uri = URIRef(F1['race/' + str(row['raceId'])])
    g.add((race_uri, RDF.type, F1.Race))
    g.add((race_uri, F1.year, Literal(row['year'], datatype=XSD.integer)))
    g.add((race_uri, F1.round, Literal(row['round'], datatype=XSD.integer)))
    g.add((race_uri, F1.circuitId, URIRef(F1['circuit/' + str(row['circuitId'])])))
    g.add((race_uri, F1.name, Literal(row['name'])))
    g.add((race_uri, F1.date, Literal(row['date'], datatype=XSD.date)))
    if pd.notna(row['time']):
        g.add((race_uri, F1.time, Literal(convert_to_seconds_1(row['time']), datatype=XSD.float)))
    g.add((race_uri, F1.url, URIRef(row['url'])))
    if pd.notna(row['fp1_date']):
        g.add((race_uri, F1.time, Literal(row['fp1_date'], datatype=XSD.date)))
    if pd.notna(row['fp1_time']):
        g.add((race_uri, F1.time, Literal(convert_to_seconds_1(row['fp1_time']), datatype=XSD.float)))
    if pd.notna(row['fp2_date']):
        g.add((race_uri, F1.time, Literal(row['fp2_date'], datatype=XSD.date)))
    if pd.notna(row['fp2_time']):
        g.add((race_uri, F1.time, Literal(convert_to_seconds_1(row['fp2_time']), datatype=XSD.float)))
    if pd.notna(row['fp3_date']):
        g.add((race_uri, F1.time, Literal(row['fp3_date'], datatype=XSD.date)))
    if pd.notna(row['fp3_time']):
        g.add((race_uri, F1.time, Literal(convert_to_seconds_1(row['fp3_time']), datatype=XSD.float)))
    if pd.notna(row['quali_date']):
        g.add((race_uri, F1.time, Literal(row['quali_date'], datatype=XSD.date)))
    if pd.notna(row['quali_time']):
        g.add((race_uri, F1.time, Literal(convert_to_seconds_1(row['quali_time']), datatype=XSD.float)))
    if pd.notna(row['sprint_date']):
        g.add((race_uri, F1.time, Literal(row['sprint_date'], datatype=XSD.date)))
    if pd.notna(row['sprint_time']):
        g.add((race_uri, F1.time, Literal(convert_to_seconds_1(row['sprint_time']), datatype=XSD.float)))

<p style='text-align: center;'><b>11- results_df</b></p>

In [15]:
for index, row in results_df.iterrows():
    result_uri = URIRef(F1['result/' + str(row['resultId'])])
    g.add((result_uri, RDF.type, F1.Result))
    g.add((result_uri, F1.raceId, URIRef(F1['race/' + str(row['raceId'])])))
    g.add((result_uri, F1.driverId, URIRef(F1['driver/' + str(row['driverId'])])))
    g.add((result_uri, F1.constructorId, URIRef(F1['constructor/' + str(row['constructorId'])])))
    if pd.notna(row['number']):
        g.add((result_uri, F1.number, Literal(row['number'], datatype=XSD.integer)))
    g.add((result_uri, F1.grid, Literal(row['grid'], datatype=XSD.integer)))
    if pd.notna(row['position']):
        g.add((result_uri, F1.position, Literal(row['position'], datatype=XSD.integer)))
    g.add((result_uri, F1.positionText, Literal(row['positionText'])))
    g.add((result_uri, F1.positionOrder, Literal(row['positionOrder'], datatype=XSD.integer)))
    g.add((result_uri, F1.points, Literal(row['points'], datatype=XSD.float)))
    g.add((result_uri, F1.laps, Literal(row['laps'], datatype=XSD.integer)))
    if pd.notna(row['time']):
        g.add((result_uri, F1.time, Literal(convert_to_seconds_1(row['time']), datatype=XSD.float)))
    if pd.notna(row['milliseconds']):
        g.add((result_uri, F1.milliseconds, Literal(row['milliseconds'], datatype=XSD.integer)))
    if pd.notna(row['fastestLap']):
        g.add((result_uri, F1.fastestLap, Literal(row['fastestLap'], datatype=XSD.integer)))
    if pd.notna(row['rank']):
        g.add((result_uri, F1.rank, Literal(row['rank'], datatype=XSD.integer)))
    if pd.notna(row['fastestLapTime']):
        g.add((result_uri, F1.fastestLapTime, Literal(convert_to_seconds_1(row['fastestLapTime']), datatype=XSD.float)))
    if pd.notna(row['fastestLapSpeed']):
        g.add((result_uri, F1.fastestLapSpeed, Literal(row['fastestLapSpeed'], datatype=XSD.float)))
    g.add((result_uri, F1.statusId, URIRef(F1['status/' + str(row['statusId'])])))

<p style='text-align: center;'><b>12- seasons_df</b></p>

In [16]:
for index, row in seasons_df.iterrows():
    season_uri = URIRef(F1['season/' + str(row['year'])])
    g.add((season_uri, RDF.type, F1.Season))
    g.add((season_uri, F1.year, Literal(row['year'], datatype=XSD.integer)))
    g.add((season_uri, F1.url, URIRef(row['url'])))

<p style='text-align: center;'><b>13- sprint_results_df</b></p>

In [17]:
for index, row in sprint_results_df.iterrows():
    sprint_result_uri = URIRef(F1['sprint_result/' + str(row['resultId'])])
    g.add((sprint_result_uri, RDF.type, F1.SprintResult))
    g.add((sprint_result_uri, F1.raceId, URIRef(F1['race/' + str(row['raceId'])])))
    g.add((sprint_result_uri, F1.driverId, URIRef(F1['driver/' + str(row['driverId'])])))
    g.add((sprint_result_uri, F1.constructorId, URIRef(F1['constructor/' + str(row['constructorId'])])))
    g.add((sprint_result_uri, F1.number, Literal(row['number'], datatype=XSD.integer)))
    g.add((sprint_result_uri, F1.grid, Literal(row['grid'], datatype=XSD.integer)))
    if pd.notna(row['position']):
        g.add((sprint_result_uri, F1.position, Literal(row['position'], datatype=XSD.integer)))
    g.add((sprint_result_uri, F1.positionText, Literal(row['positionText'])))
    g.add((sprint_result_uri, F1.positionOrder, Literal(row['positionOrder'], datatype=XSD.integer)))
    g.add((sprint_result_uri, F1.points, Literal(row['points'], datatype=XSD.float)))
    g.add((sprint_result_uri, F1.laps, Literal(row['laps'], datatype=XSD.integer)))
    if pd.notna(row['time']):
        g.add((sprint_result_uri, F1.time, Literal(convert_to_seconds_1(row['time']), datatype=XSD.float)))
    if pd.notna(row['milliseconds']):
        g.add((sprint_result_uri, F1.milliseconds, Literal(row['milliseconds'], datatype=XSD.integer)))
    if pd.notna(row['fastestLap']):
        g.add((sprint_result_uri, F1.fastestLap, Literal(row['fastestLap'], datatype=XSD.integer)))
    if pd.notna(row['fastestLapTime']):
        g.add((sprint_result_uri, F1.fastestLapTime, Literal(convert_to_seconds_1(row['fastestLapTime']), datatype=XSD.float)))
    g.add((sprint_result_uri, F1.statusId, URIRef(F1['status/' + str(row['statusId'])])))

<p style='text-align: center;'><b>14- status_df</b></p>

In [18]:
for index, row in status_df.iterrows():
    status_uri = URIRef(F1['status/' + str(row['statusId'])])
    g.add((status_uri, RDF.type, F1.Status))
    g.add((status_uri, F1.status, Literal(row['status'])))


In [19]:
g.serialize('f1_data.rdf', format='turtle')

<Graph identifier=N342a474deba04e9eb48003bd460373b6 (<class 'rdflib.graph.Graph'>)>

#  Classes and Properties

In [20]:
# Class Definitions
RACE = F1['Race']
DRIVER = F1['Driver']
TEAM = F1['Team']
CIRCUIT = F1['Circuit']
RACE_PARTICIPATION = F1['RaceParticipation']
RACE_SEASON = F1['RaceSeason']
CONSTRUCTOR = F1['Constructor'] 
QualifiedForQ2 = F1["QualifiedForQ2"]
QualifiedForQ3 = F1["QualifiedForQ3"]
ChampionDriver = F1["ChampionDriver"]

# Declare classes as RDF classes
for cls in [RACE, DRIVER, TEAM, CIRCUIT, RACE_PARTICIPATION, RACE_SEASON, CONSTRUCTOR, QualifiedForQ2, 
            QualifiedForQ3, ChampionDriver]:
    g.add((cls, RDF.type, OWL.Class))

# Property Definitions
HAS_DRIVER = F1['hasDriver']
HAS_TEAM = F1['hasTeam']
HAS_CIRCUIT = F1['hasCircuit']
HAS_POSITION = F1['hasPosition']
HAS_POINTS = F1['hasPoints']
DRIVER_PARTICIPATES_IN_RACE = F1['driverParticipatesInRace']
RACE_INCLUDES_DRIVER = F1['raceIncludesDriver']
TEAMMATE_OF = F1['teammateOf']
ANCESTOR_TEAM_OF = F1['ancestorTeamOf']
Q1_TIME = F1["q1"]
Q2_TIME = F1["q2"]
POSITION = F1["position"]

for prop in [HAS_DRIVER, HAS_TEAM, HAS_CIRCUIT, HAS_POSITION, HAS_POINTS, 
             DRIVER_PARTICIPATES_IN_RACE, RACE_INCLUDES_DRIVER, TEAMMATE_OF, ANCESTOR_TEAM_OF, Q1_TIME,
             Q2_TIME, POSITION]:
    g.add((prop, RDF.type, RDF.Property))

## RDFS Axioms

In [21]:
# Every driver is a person
g.add((DRIVER, RDFS.subClassOf, FOAF.Person))

# Every team is a constructor
g.add((TEAM, RDFS.subClassOf, F1.Constructor))

# Every race is an event
g.add((RACE, RDFS.subClassOf, dbo.Event))
g.add((RACE, RDFS.subClassOf, dbo.Event))
g.add((RACE_PARTICIPATION, RDFS.subClassOf, dbo.Event))
g.add((RACE_SEASON, RDF.type, OWL.Class))

#a driver qualified for Q2 and Q3 has a Q1 and Q2 time respectively
q2_qualification_restriction = BNode()
g.add((q2_qualification_restriction, OWL.onProperty, Q1_TIME))
g.add((q2_qualification_restriction, OWL.someValuesFrom, rdflib.XSD.integer))
g.add((QualifiedForQ2, OWL.equivalentClass, q2_qualification_restriction))

q3_qualification_restriction = BNode()
g.add((q3_qualification_restriction, OWL.onProperty, Q2_TIME))
g.add((q3_qualification_restriction, OWL.someValuesFrom, rdflib.XSD.integer))
g.add((QualifiedForQ3, OWL.equivalentClass, q3_qualification_restriction))


# Every race participation involves a driver, a team, and a circuit
g.add((RACE_PARTICIPATION, RDFS.domain, RACE))
g.add((HAS_DRIVER, RDFS.domain, RACE_PARTICIPATION))
g.add((HAS_DRIVER, RDFS.range, DRIVER))
g.add((HAS_TEAM, RDFS.domain, RACE_PARTICIPATION))
g.add((HAS_TEAM, RDFS.range, TEAM))
g.add((HAS_CIRCUIT, RDFS.domain, RACE_PARTICIPATION))
g.add((HAS_CIRCUIT, RDFS.range, CIRCUIT))

# Every race participation has a position and points
g.add((HAS_POSITION, RDFS.domain, RACE_PARTICIPATION))
g.add((HAS_POSITION, RDFS.range, XSD.integer))
g.add((HAS_POINTS, RDFS.domain, RACE_PARTICIPATION))
g.add((HAS_POINTS, RDFS.range, XSD.float))

NameError: name 'rdflib' is not defined

## OWL Axioms 

In [None]:
# Assuming F1.Constructor is defined and equivalent to F1.Team
g.add((F1.Constructor, OWL.equivalentClass, F1.Team))


# date of birtH of each drive is unique
g.add((FOAF.birthday, RDF.type, OWL.FunctionalProperty)
g.add(FOAF.homepage, RDF.type, OWL.FunctionalProperty)

# inverse Properties
g.add((DRIVER_PARTICIPATES_IN_RACE, OWL.inverseOf, RACE_INCLUDES_DRIVER))
g.add((RACE_INCLUDES_DRIVER, OWL.inverseOf, DRIVER_PARTICIPATES_IN_RACE))

# Symmetric and Transitive Properties
g.add((TEAMMATE_OF, RDF.type, OWL.SymmetricProperty))
#  historical evolution of F1 teams due to various reasons such as rebranding, ownership changes, or mergers. 
# Jordan Grand Prix -> Midland F1 Racing -> Spyker F1 -> Force India -> Racing Point/ Aston Martin      
g.add((ANCESTOR_TEAM_OF, RDF.type, OWL.TransitiveProperty)) 

# OWL Restrictions
# restriction: RaceParticipation must have exactly one Driver, Team, and Circuit
race_participation_restriction = BNode()
g.add((race_participation_restriction, OWL.onProperty, HAS_DRIVER))
g.add((race_participation_restriction, OWL.cardinality, Literal(1, datatype=XSD.integer)))
g.add((RACE_PARTICIPATION, OWL.equivalentClass, race_participation_restriction))

team_restriction = BNode()
g.add((team_restriction, OWL.onProperty, HAS_TEAM))
g.add((team_restriction, OWL.cardinality, Literal(1, datatype=XSD.integer)))
g.add((RACE_PARTICIPATION, OWL.equivalentClass, team_restriction))

circuit_restriction = BNode()
g.add((circuit_restriction, OWL.onProperty, HAS_CIRCUIT))
g.add((circuit_restriction, OWL.cardinality, Literal(1, datatype=XSD.integer)))
g.add((RACE_PARTICIPATION, OWL.equivalentClass, circuit_restriction))

# Cardinality Constraints
# Constraint: A driver participates in at least one race
driver_constraint = BNode()
g.add((driver_constraint, OWL.onProperty, DRIVER_PARTICIPATES_IN_RACE))
g.add((driver_constraint, OWL.minCardinality, Literal(1, datatype=XSD.integer)))
g.add((DRIVER, OWL.equivalentClass, driver_constraint))
      
      
# Team must have at least two drivers (typical in F1) - using OWL.minCardinality
team_driver_restriction = BNode()
g.add((team_driver_restriction, OWL.onProperty, F1.hasDriver))
g.add((team_driver_restriction, OWL.minCardinality, Literal(2, datatype=XSD.integer)))
g.add((F1.Team, OWL.equivalentClass, team_driver_restriction))      
      
# First-place finish restriction
first_place_restriction = BNode()
g.add((first_place_restriction, OWL.onProperty, POSITION))
g.add((first_place_restriction, OWL.hasValue, Literal(1, datatype=rdflib.XSD.integer)))


# ChampionDriver class definition
# A ChampionDriver is someone who has a first-place finish and is qualified for Q3
champion_restriction = BNode()
g.add((champion_restriction, OWL.intersectionOf, rdflib.Collection([first_place_restriction, QualifiedForQ3])))
g.add((ChampionDriver, OWL.equivalentClass, champion_restriction))

In [None]:
# from rdflib import OWL

# #Every driver is distinct from every other driver:
# g.add((F1.Driver, OWL.distinctMembers, Literal(True, datatype=XSD.boolean)))

# #Every team is distinct from every other team:
# g.add((F1.Team, OWL.distinctMembers, Literal(True, datatype=XSD.boolean)))

# #Every race is distinct from every other race:
# g.add((F1.Race, OWL.distinctMembers, Literal(True, datatype=XSD.boolean)))

# #A driver can only have one nationality:
# g.add((F1.driverId, RDF.type, OWL.FunctionalProperty))

# # A race can only have one circuit:
# g.add((F1.raceId, RDF.type, OWL.FunctionalProperty))


In [None]:
g.serialize('f1_data.rdf', format='turtle')