In [2]:
from urllib.request import urlopen
import json
import pandas as pd
import os

In [3]:
if(not os.path.exists("data")):
  os.makedirs("data")
response = urlopen("https://api.openf1.org/v1/drivers")
data = json.loads(response.read().decode("utf-8"))
# save to csv
df = pd.DataFrame(data)
df.to_csv("data/drivers.csv", index=False)

In [4]:
df = pd.read_csv("data/drivers.csv")
drivers = df["broadcast_name"].unique()
for driver in drivers:
  print(df[df["broadcast_name"] == driver]["driver_number"].unique(), driver)

[1] M VERSTAPPEN
[2] L SARGEANT
[4] L NORRIS
[10] P GASLY
[11] S PEREZ
[14] F ALONSO
[16] C LECLERC
[18] L STROLL
[20] K MAGNUSSEN
[21] N DE VRIES
[22] Y TSUNODA
[23] A ALBON
[24] G ZHOU
[27] N HULKENBERG
[31] E OCON
[44] L HAMILTON
[55] C SAINZ
[63] G RUSSELL
[77] V BOTTAS
[81] O PIASTRI
[3] D RICCIARDO
[ 1 97 61] P ARON
[ 2 38] D BEGANOVIC
[ 3 45] Z O'SULLIVAN
[4] L FORNAROLI
[10 45 43] F COLAPINTO
[11] M BOYA
[14] S MONTOYA
[16 46] L BROWNING
[18] R VILLAGOMEZ
[20] O GRAY
[22] I COHEN
[23] J MARTI
[24] C MANSELL
[27] T BARNARD
[31] W SHIN
[5] G BORTOLETO
[6] O GOETHE
[7] K FREDERICK
[8] G SAUCY
[9] N TSOLOV
[12] J EDGAR
[15] G MINI
[17] C COLLET
[19] T SMITH
[21] M ESTERSON
[25] H BARTER
[26] N BEDRIN
[28] A GARCIA
[29] S FLOERSCH
[30] R FARIA
[39 97] R SHWARTZMAN
[40 30] L LAWSON
[34] F DRUGOVICH
[41 37  6] I HADJAR
[42 72] F VESTI
[50 38 87] O BEARMAN
[61  7] J DOOHAN
[98] T POURCHAIRE
[29 89] P O'WARD
[36] J DENNIS
[40 37] A IWASA
[12] A ANTONELLI
[28 62 50] R HIRAKAWA
[39] A LEC

# Fetch Data from All OpenF1 API Endpoints

This notebook fetches data from all available OpenF1 API endpoints and saves them to CSV files.

In [5]:
import time

# Define all OpenF1 API endpoints
endpoints = {
    'car_data': 'https://api.openf1.org/v1/car_data',
    'drivers': 'https://api.openf1.org/v1/drivers',
    'intervals': 'https://api.openf1.org/v1/intervals',
    'laps': 'https://api.openf1.org/v1/laps',
    'location': 'https://api.openf1.org/v1/location',
    'meetings': 'https://api.openf1.org/v1/meetings',
    'overtakes': 'https://api.openf1.org/v1/overtakes',
    'pit': 'https://api.openf1.org/v1/pit',
    'position': 'https://api.openf1.org/v1/position',
    'race_control': 'https://api.openf1.org/v1/race_control',
    'sessions': 'https://api.openf1.org/v1/sessions',
    'session_result': 'https://api.openf1.org/v1/session_result',
    'starting_grid': 'https://api.openf1.org/v1/starting_grid',
    'stints': 'https://api.openf1.org/v1/stints',
    'team_radio': 'https://api.openf1.org/v1/team_radio',
    'weather': 'https://api.openf1.org/v1/weather'
}

def fetch_and_save_endpoint(endpoint_name, url, params=None):
    """Fetch data from an endpoint and save to CSV"""
    try:
        # Build URL with parameters if provided
        if params:
            url = f"{url}?{'&'.join([f'{k}={v}' for k, v in params.items()])}"
        
        print(f"Fetching {endpoint_name}...")
        response = urlopen(url)
        data = json.loads(response.read().decode("utf-8"))
        
        if data and len(data) > 0:
            df = pd.DataFrame(data)
            filename = f"data/{endpoint_name}.csv"
            df.to_csv(filename, index=False)
            print(f"✓ Saved {len(data)} records to {filename}")
            return len(data)
        else:
            print(f"⚠ No data available for {endpoint_name}")
            return 0
            
    except Exception as e:
        print(f"✗ Error fetching {endpoint_name}: {str(e)}")
        return 0

print("Starting to fetch data from all OpenF1 API endpoints...\n")

Starting to fetch data from all OpenF1 API endpoints...



In [6]:
# Fetch data from all endpoints
# Using recent session data to get meaningful results
# Let's use the latest available data with some basic filters

total_records = 0

# 1. Meetings - Get recent meetings
records = fetch_and_save_endpoint('meetings', endpoints['meetings'], {'year': 2024})
total_records += records
time.sleep(0.5)

# 2. Sessions - Get sessions from 2024
records = fetch_and_save_endpoint('sessions', endpoints['sessions'], {'year': 2024})
total_records += records
time.sleep(0.5)

# 3. Drivers - Get all drivers
records = fetch_and_save_endpoint('drivers', endpoints['drivers'])
total_records += records
time.sleep(0.5)

# 4. Laps - Get lap data (limited sample)
records = fetch_and_save_endpoint('laps', endpoints['laps'])
total_records += records
time.sleep(0.5)

# 5. Car data - Get car data (limited sample)
records = fetch_and_save_endpoint('car_data', endpoints['car_data'])
total_records += records
time.sleep(0.5)

# 6. Position - Get position data
records = fetch_and_save_endpoint('position', endpoints['position'])
total_records += records
time.sleep(0.5)

# 7. Pit stops
records = fetch_and_save_endpoint('pit', endpoints['pit'])
total_records += records
time.sleep(0.5)

# 8. Stints
records = fetch_and_save_endpoint('stints', endpoints['stints'])
total_records += records
time.sleep(0.5)

# 9. Weather
records = fetch_and_save_endpoint('weather', endpoints['weather'])
total_records += records
time.sleep(0.5)

# 10. Team radio
records = fetch_and_save_endpoint('team_radio', endpoints['team_radio'])
total_records += records
time.sleep(0.5)

# 11. Race control
records = fetch_and_save_endpoint('race_control', endpoints['race_control'])
total_records += records
time.sleep(0.5)

# 12. Location
records = fetch_and_save_endpoint('location', endpoints['location'])
total_records += records
time.sleep(0.5)

# 13. Intervals (race only)
records = fetch_and_save_endpoint('intervals', endpoints['intervals'])
total_records += records
time.sleep(0.5)

# 14. Starting grid
records = fetch_and_save_endpoint('starting_grid', endpoints['starting_grid'])
total_records += records
time.sleep(0.5)

# 15. Session result
records = fetch_and_save_endpoint('session_result', endpoints['session_result'])
total_records += records
time.sleep(0.5)

# 16. Overtakes
records = fetch_and_save_endpoint('overtakes', endpoints['overtakes'])
total_records += records

print(f"\n{'='*60}")
print(f"Total records fetched: {total_records}")
print(f"All data saved to the 'data' directory!")
print(f"{'='*60}")

Fetching meetings...
✓ Saved 25 records to data/meetings.csv
Fetching sessions...
✓ Saved 123 records to data/sessions.csv
Fetching drivers...
✓ Saved 7125 records to data/drivers.csv
Fetching laps...
✗ Error fetching laps: HTTP Error 422: unknown
Fetching car_data...
✗ Error fetching car_data: HTTP Error 422: unknown
Fetching position...
✗ Error fetching position: HTTP Error 422: unknown
Fetching pit...
✓ Saved 24213 records to data/pit.csv
Fetching stints...
✓ Saved 28021 records to data/stints.csv
Fetching weather...
✗ Error fetching weather: HTTP Error 422: unknown
Fetching team_radio...
✓ Saved 16420 records to data/team_radio.csv
Fetching race_control...
✓ Saved 15764 records to data/race_control.csv
Fetching location...
✗ Error fetching location: HTTP Error 429: Too Many Requests
Fetching intervals...
✗ Error fetching intervals: HTTP Error 422: unknown
Fetching starting_grid...
✓ Saved 1734 records to data/starting_grid.csv
Fetching session_result...
✓ Saved 7063 records to data

## Summary of Available Endpoints

Let's check what data files we now have:

In [7]:
# List all CSV files in the data directory
import glob

csv_files = glob.glob("data/*.csv")
print(f"Total CSV files created: {len(csv_files)}\n")

# Display file sizes and record counts
print(f"{'Endpoint':<25} {'Records':<10} {'File Size':<15}")
print("="*50)

for file in sorted(csv_files):
    filename = file.split('/')[-1].replace('.csv', '')
    try:
        df = pd.read_csv(file)
        file_size = os.path.getsize(file) / 1024  # KB
        print(f"{filename:<25} {len(df):<10} {file_size:>10.2f} KB")
    except Exception as e:
        print(f"{filename:<25} Error reading file")

Total CSV files created: 14

Endpoint                  Records    File Size      
drivers                   7125          1464.85 KB
intervals                 26261         1497.96 KB
laps                      1156           283.54 KB
meetings                  25               3.93 KB
overtakes                 16918          849.20 KB
pit                       24213         1299.59 KB
position                  666             31.64 KB
race_control              15764         1564.86 KB
session_result            7063           436.35 KB
sessions                  123             16.12 KB
starting_grid             1734            37.67 KB
stints                    28021          933.96 KB
team_radio                16420         2929.08 KB
weather                   153             11.28 KB
