# F1 Dataset

This notebook contains the code to download and process F1 dataset from [OpenF1 API](https://openf1.org/).

This includes the following datasets:
- Meetings
- Drivers 
- Laps 

In [91]:
import pandas as pd
from urllib.request import urlopen
import json
import great_tables as gt
import time

## Meetings

F1 meeting information

In [103]:
response = urlopen('https://api.openf1.org/v1/meetings')
meetings = pd.DataFrame(json.loads(response.read().decode('utf-8')))
gt.GT(meetings.head(10))

meeting_key,circuit_key,circuit_short_name,meeting_code,location,country_key,country_code,country_name,meeting_name,meeting_official_name,gmt_offset,date_start,year
1140,63,Sakhir,BRN,Sakhir,36,BRN,Bahrain,Pre-Season Testing,FORMULA 1 ARAMCO PRE-SEASON TESTING 2023,03:00:00,2023-02-23T07:00:00+00:00,2023
1141,63,Sakhir,BRN,Sakhir,36,BRN,Bahrain,Bahrain Grand Prix,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2023,03:00:00,2023-03-03T11:30:00+00:00,2023
1142,149,Jeddah,KSA,Jeddah,153,KSA,Saudi Arabia,Saudi Arabian Grand Prix,FORMULA 1 STC SAUDI ARABIAN GRAND PRIX 2023,03:00:00,2023-03-17T13:30:00+00:00,2023
1143,10,Melbourne,AUS,Melbourne,5,AUS,Australia,Australian Grand Prix,FORMULA 1 ROLEX AUSTRALIAN GRAND PRIX 2023,11:00:00,2023-03-31T01:30:00+00:00,2023
1207,144,Baku,AZE,Baku,30,AZE,Azerbaijan,Azerbaijan Grand Prix,FORMULA 1 AZERBAIJAN GRAND PRIX 2023,04:00:00,2023-04-28T09:30:00+00:00,2023
1208,151,Miami,USA,Miami,19,USA,United States,Miami Grand Prix,FORMULA 1 CRYPTO.COM MIAMI GRAND PRIX 2023,-04:00:00,2023-05-05T18:00:00+00:00,2023
1210,22,Monte Carlo,MON,Monaco,114,MON,Monaco,Monaco Grand Prix,FORMULA 1 GRAND PRIX DE MONACO 2023,02:00:00,2023-05-26T11:30:00+00:00,2023
1211,15,Catalunya,ESP,Barcelona,1,ESP,Spain,Spanish Grand Prix,FORMULA 1 AWS GRAN PREMIO DE ESPAÑA 2023,02:00:00,2023-06-02T11:30:00+00:00,2023
1212,23,Montreal,CAN,Montréal,46,CAN,Canada,Canadian Grand Prix,FORMULA 1 PIRELLI GRAND PRIX DU CANADA 2023,-04:00:00,2023-06-16T17:30:00+00:00,2023
1213,19,Spielberg,AUT,Spielberg,17,AUT,Austria,Austrian Grand Prix,FORMULA 1 ROLEX GROSSER PREIS VON ÖSTERREICH 2023,02:00:00,2023-06-30T11:30:00+00:00,2023


In [105]:
meetings_2023 = meetings[meetings["year"] == 2023]["meeting_key"].unique()
meetings_2024 = meetings[meetings["year"] == 2024]["meeting_key"].unique()
meetings_2025 = meetings[meetings["year"] == 2025]["meeting_key"].unique()
meeting_list = meetings["meeting_key"].unique()
meeting_list

array([1140, 1141, 1142, 1143, 1207, 1208, 1210, 1211, 1212, 1213, 1214,
       1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225,
       1226, 1228, 1229, 1230, 1231, 1232, 1233, 1234, 1235, 1236, 1237,
       1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248,
       1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259,
       1260, 1261, 1262, 1263, 1264, 1277, 1265, 1266, 1267, 1268, 1269])

In [88]:
print(meetings["meeting_name"].unique())

['Pre-Season Testing' 'Bahrain Grand Prix' 'Saudi Arabian Grand Prix'
 'Australian Grand Prix' 'Azerbaijan Grand Prix' 'Miami Grand Prix'
 'Monaco Grand Prix' 'Spanish Grand Prix' 'Canadian Grand Prix'
 'Austrian Grand Prix' 'British Grand Prix' 'Hungarian Grand Prix'
 'Belgian Grand Prix' 'Dutch Grand Prix' 'Italian Grand Prix'
 'Singapore Grand Prix' 'Japanese Grand Prix' 'Qatar Grand Prix'
 'United States Grand Prix' 'Mexico City Grand Prix'
 'São Paulo Grand Prix' 'Las Vegas Grand Prix' 'Abu Dhabi Grand Prix'
 'Chinese Grand Prix' 'Emilia Romagna Grand Prix']


In [40]:
print(meetings["country_name"].unique())

['Bahrain' 'Saudi Arabia' 'Australia' 'Azerbaijan' 'United States'
 'Monaco' 'Spain' 'Canada' 'Austria' 'Great Britain' 'Hungary' 'Belgium'
 'Netherlands' 'Italy' 'Singapore' 'Japan' 'Qatar' 'Mexico' 'Brazil'
 'United Arab Emirates' 'China' 'United Kingdom']


In [113]:
meetings.to_csv("../data/meetings.csv", index=False)

## Drivers Data

In [50]:
response = urlopen('https://api.openf1.org/v1/drivers')
drivers = pd.DataFrame(json.loads(response.read().decode('utf-8')))
gt.GT(drivers.head(10))

meeting_key,session_key,driver_number,broadcast_name,full_name,name_acronym,team_name,team_colour,first_name,last_name,headshot_url,country_code
1140,7763,1,M VERSTAPPEN,Max VERSTAPPEN,VER,Red Bull Racing,3671C6,Max,Verstappen,https://www.formula1.com/content/dam/fom-website/drivers/M/MAXVER01_Max_Verstappen/maxver01.png.transform/1col/image.png,NED
1140,7763,2,L SARGEANT,Logan SARGEANT,SAR,Williams,37BEDD,Logan,Sargeant,https://www.formula1.com/content/dam/fom-website/drivers/L/LOGSAR01_Logan_Sargeant/logsar01.png.transform/1col/image.png,USA
1140,7763,4,L NORRIS,Lando NORRIS,NOR,McLaren,F58020,Lando,Norris,https://www.formula1.com/content/dam/fom-website/drivers/L/LANNOR01_Lando_Norris/lannor01.png.transform/1col/image.png,GBR
1140,7763,10,P GASLY,Pierre GASLY,GAS,Alpine,2293D1,Pierre,Gasly,https://www.formula1.com/content/dam/fom-website/drivers/P/PIEGAS01_Pierre_Gasly/piegas01.png.transform/1col/image.png,FRA
1140,7763,11,S PEREZ,Sergio PEREZ,PER,Red Bull Racing,3671C6,Sergio,Perez,https://www.formula1.com/content/dam/fom-website/drivers/S/SERPER01_Sergio_Perez/serper01.png.transform/1col/image.png,MEX
1140,7763,14,F ALONSO,Fernando ALONSO,ALO,Aston Martin,358C75,Fernando,Alonso,https://www.formula1.com/content/dam/fom-website/drivers/F/FERALO01_Fernando_Alonso/feralo01.png.transform/1col/image.png,ESP
1140,7763,16,C LECLERC,Charles LECLERC,LEC,Ferrari,F91536,Charles,Leclerc,https://www.formula1.com/content/dam/fom-website/drivers/C/CHALEC01_Charles_Leclerc/chalec01.png.transform/1col/image.png,MON
1140,7763,20,K MAGNUSSEN,Kevin MAGNUSSEN,MAG,Haas F1 Team,B6BABD,Kevin,Magnussen,https://www.formula1.com/content/dam/fom-website/drivers/K/KEVMAG01_Kevin_Magnussen/kevmag01.png.transform/1col/image.png,DEN
1140,7763,21,N DE VRIES,Nyck DE VRIES,DEV,AlphaTauri,5E8FAA,Nyck,De Vries,https://www.formula1.com/content/dam/fom-website/drivers/N/NYCDEV01_Nyck_De%20Vries/nycdev01.png.transform/1col/image.png,NED
1140,7763,22,Y TSUNODA,Yuki TSUNODA,TSU,AlphaTauri,5E8FAA,Yuki,Tsunoda,https://www.formula1.com/content/dam/fom-website/drivers/Y/YUKTSU01_Yuki_Tsunoda/yuktsu01.png.transform/1col/image.png,JPN


In [51]:
len(drivers)

6478

In [112]:
drivers.to_csv("../data/drivers.csv", index=False)

## Laps Data

Pulling laps data per session key

In [74]:
response = urlopen('https://api.openf1.org/v1/laps?meeting_key=1219')
laps = pd.DataFrame(json.loads(response.read().decode('utf-8')))
print(len(laps))
gt.GT(laps.head())

2732


meeting_key,session_key,driver_number,lap_number,date_start,duration_sector_1,duration_sector_2,duration_sector_3,i1_speed,i2_speed,is_pit_out_lap,lap_duration,segments_sector_1,segments_sector_2,segments_sector_3,st_speed
1219,9158,18,1,,,51.693,31.884,297.0,198.0,True,,"[2064, 2064, 2064, 2049, 2051, 2051, 2051, 2051]","[2051, 2051, 2049, 2049, 2049, 2049, 2049, 2049]","[2049, 2049, 2049, 2051, 2051, 2051, 2051, 2051]",
1219,9159,2,1,,,58.946,33.461,304.0,264.0,True,,"[2064, 2064, 2064, 2049, 2049, 2049, 2049, 2051]","[2049, 2049, 2049, 2049, 2049, 2049, 2049, 2049]","[2049, 2049, 2049, 2049, 2051, 2051, 2051]",
1219,9160,77,1,,,54.468,33.023,233.0,240.0,True,,"[2064, 2064, 2064, 2051, 2051, 2049, 2049, 2049]","[2049, 2049, 2049, 2049, 2051, 2049, 2049, 2051]","[2051, 2049, 2051, 2051, 2051, 2051, 2051]",
1219,9161,2,1,,,53.111,30.919,300.0,273.0,True,,"[2064, 2064, 2064, 2051, 2051, 2049, 2049, 2051]","[2049, 2049, 2049, 2049, 2049, 2049, 2049, 2051]","[2049, 2049, 2051, 2051, 2051, 2051, 2051, 2051]",
1219,9161,20,1,,,50.156,37.076,229.0,254.0,True,,"[2064, 2064, 2064, 2049, 2049, 2049, 2049, 2049]","[2049, 2049, 2049, 2049, 2049, 2049, 2049, 2049]","[2051, 2049, 2049, 2049, 2049, 2051, 2051]",


In [106]:
lap_df = pd.DataFrame()
for i in meeting_list:
    print(i)
    url = f"https://api.openf1.org/v1/laps?meeting_key={i}"
    response = urlopen(url)
    laps_meeting = pd.DataFrame(json.loads(response.read().decode('utf-8')))
    lap_df = pd.concat([lap_df,laps_meeting], ignore_index=True)
    time.sleep(2)

1140
1141
1142
1143
1207
1208
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1277
1265
1266
1267
1268
1269


In [109]:
print(len(lap_df))

gt.GT(lap_df.head(10))




180672


meeting_key,session_key,driver_number,lap_number,date_start,duration_sector_1,duration_sector_2,duration_sector_3,i1_speed,i2_speed,is_pit_out_lap,lap_duration,segments_sector_1,segments_sector_2,segments_sector_3,st_speed
1140,7763,14,1,,,53.322,27.97,215.0,200.0,True,,"[2064, 2064, 2064, 2049, 2049, 2049, 2051, 2049, 2049, 2049]","[2049, 2049, 2049, 2049, 2049, 2049, 2049, 2049, 2049, 2051, 2049]","[2049, 2049, 2049, 2049, 2051, 2051, 2051]",155.0
1140,7764,34,1,,,51.057,32.123,149.0,239.0,True,,"[2064, 2064, 2064, 2049, 2049, 2049, 2049, 2049, 2049, 2049]","[2049, 2049, 2049, 2049, 2049, 2049, 2049, 2049, 2049, 2049, 2051]","[2049, 2049, 2049, 2049, 2049, 2051]",164.0
1140,9222,63,1,,,47.609,25.153,222.0,231.0,True,,,,,185.0
1140,9222,24,1,2023-02-23T07:00:05.955000+00:00,,51.359,29.109,190.0,219.0,True,,,,,149.0
1140,9222,23,1,2023-02-23T07:00:08.471000+00:00,,54.064,30.61,225.0,214.0,True,,,,,84.0
1140,9222,55,1,2023-02-23T07:00:08.830000+00:00,,46.139,29.943,209.0,127.0,True,,,,,189.0
1140,9222,22,1,2023-02-23T07:00:11.815000+00:00,,49.704,31.868,201.0,220.0,True,,,,,46.0
1140,9222,1,1,2023-02-23T07:00:34.221000+00:00,,49.344,32.346,200.0,221.0,True,,,,,194.0
1140,9222,27,1,2023-02-23T07:00:50.877000+00:00,,46.172,27.63,203.0,242.0,True,,,,,198.0
1140,9222,10,1,2023-02-23T07:01:04.533000+00:00,,50.013,31.888,206.0,219.0,True,,,,,176.0


In [111]:
lap_df.to_csv("../data/laps.csv", index=False)