In [4]:
import pandas as pd

drivers_df = pd.read_csv('drivers.csv')
drivers_df.head()

Unnamed: 0,driverId,driverRef,number,code,forename,surname,dob,nationality,url
0,1,hamilton,44,HAM,Lewis,Hamilton,1985-01-07,British,http://en.wikipedia.org/wiki/Lewis_Hamilton
1,2,heidfeld,\N,HEI,Nick,Heidfeld,1977-05-10,German,http://en.wikipedia.org/wiki/Nick_Heidfeld
2,3,rosberg,6,ROS,Nico,Rosberg,1985-06-27,German,http://en.wikipedia.org/wiki/Nico_Rosberg
3,4,alonso,14,ALO,Fernando,Alonso,1981-07-29,Spanish,http://en.wikipedia.org/wiki/Fernando_Alonso
4,5,kovalainen,\N,KOV,Heikki,Kovalainen,1981-10-19,Finnish,http://en.wikipedia.org/wiki/Heikki_Kovalainen


In [24]:
qualifying_df = pd.read_csv('qualifying.csv')
qualifying_df.head()

Unnamed: 0,qualifyId,raceId,driverId,constructorId,number,position,q1,q2,q3
0,1,18,1,1,22,1,1:26.572,1:25.187,1:26.714
1,2,18,9,2,4,2,1:26.103,1:25.315,1:26.869
2,3,18,5,1,23,3,1:25.664,1:25.452,1:27.079
3,4,18,13,6,2,4,1:25.994,1:25.691,1:27.178
4,5,18,2,2,3,5,1:25.960,1:25.518,1:27.236


<h2>Data Cleaning</h2>

In [25]:
# Replace '\N' with None 
drivers_df.replace('\\N', None, inplace=True)
qualifying_df.replace('\\N', None, inplace=True) 

<h2>Convert to RDF</h2>

In [28]:
from rdflib import Graph, URIRef, Literal, Namespace, RDF, XSD

g = Graph()

F1 = Namespace('http://example.com/f1/')

for index, row in drivers_df.iterrows():
    driver_uri = URIRef(F1['driver/' + str(row['driverId'])])
    g.add((driver_uri, RDF.type, F1.Driver))
    g.add((driver_uri, F1.driverRef, Literal(row['driverRef'])))
    g.add((driver_uri, F1.number, Literal(row['number'])))
    g.add((driver_uri, F1.code, Literal(row['code'])))
    g.add((driver_uri, F1.forename, Literal(row['forename'])))
    g.add((driver_uri, F1.surname, Literal(row['surname'])))
    g.add((driver_uri, F1.dob, Literal(row['dob'])))
    g.add((driver_uri, F1.nationality, Literal(row['nationality'])))
    g.add((driver_uri, F1.url, Literal(row['url'])))

In [29]:
for index, row in qualifying_df.iterrows():
    qualifying_uri = URIRef(F1['qualifying/' + str(row['qualifyId'])])
    g.add((qualifying_uri, RDF.type, F1.Qualifying))
    g.add((qualifying_uri, F1.raceId, Literal(row['raceId'], datatype=XSD.integer)))
    g.add((qualifying_uri, F1.driverId, URIRef(F1['driver/' + str(row['driverId'])])))
    g.add((qualifying_uri, F1.constructorId, URIRef(F1['constructor/' + str(row['constructorId'])])))
    g.add((qualifying_uri, F1.number, Literal(row['number'])))
    g.add((qualifying_uri, F1.position, Literal(row['position'], datatype=XSD.integer)))
    # Add more properties as needed
    # You can also convert time values to appropriate formats and datatypes
    # For example, convert '01:26.6' to XSD.duration or XSD.dateTime

# Serialize RDF Data
g.serialize('f1_data.rdf', format='turtle')


<Graph identifier=N93d89e5fe04b4b3691267fbc5eb50fea (<class 'rdflib.graph.Graph'>)>

<h2>Save RDF Data:</h2>

In [30]:
g.serialize('f1_data.rdf', format='turtle')

<Graph identifier=N93d89e5fe04b4b3691267fbc5eb50fea (<class 'rdflib.graph.Graph'>)>

In [31]:
for index, row in qualifying_df.iterrows():
    qualifying_uri = URIRef(F1['qualifying/' + str(row['qualifyId'])])
    driver_uri = URIRef(F1['driver/' + str(row['driverId'])])
    g.add((qualifying_uri, F1.driver, driver_uri))

# Serialize the updated RDF data
g.serialize('f1_data.rdf', format='turtle')

<Graph identifier=N93d89e5fe04b4b3691267fbc5eb50fea (<class 'rdflib.graph.Graph'>)>

<h2>Query RDF Data:</h2>

In [37]:
qres = g.query(
    """
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX F1: <http://example.com/f1/>

    SELECT ?driver ?driverRef ?number ?code ?forename ?surname ?dob ?nationality ?url
       ?qualifyId ?raceId ?constructorId ?position
       ?q1 ?q2 ?q3
    WHERE {
    ?driver rdf:type F1:Driver .
    ?driver F1:driverRef ?driverRef .
    ?driver F1:number ?number .
    ?driver F1:code ?code .
    ?driver F1:forename ?forename .
    ?driver F1:surname ?surname .
    ?driver F1:dob ?dob .
    ?driver F1:nationality ?nationality .
    ?driver F1:url ?url .
    ?qualifying rdf:type F1:Qualifying .
    ?qualifying F1:driver ?driver .
    ?qualifying F1:qualifyId ?qualifyId .
    ?qualifying F1:raceId ?raceId .
    ?qualifying F1:constructorId ?constructorId .
    ?qualifying F1:position ?position .
    ?qualifying F1:q1 ?q1 .
    ?qualifying F1:q2 ?q2 .
    ?qualifying F1:q3 ?q3 .
}
    """
)




for row in qres:
    print(f"Forename: {row.forename}, Surname: {row.surname}, DOB: {row.dob}, Nationality: {row.nationality}, URL: {row.url} ")