In [10]:
import pickle
import json
import os
import re
import pandas as pd

from urllib.request import urlopen
from joblib import Parallel, delayed

# 1. Utils
---

In [12]:
def save_parquet(path, file):
    df = pd.DataFrame(file)
    df.to_parquet(path)

# 2. Extract
---

## Telemetrys

In [13]:
response = urlopen("https://api.openf1.org/v1/drivers")
drivers = json.loads(response.read().decode("utf-8"))

In [None]:
def extract_car_data(path, driver):
    try:
        response = urlopen(f"https://api.openf1.org/v1/car_data?driver_number={driver['driver_number']}&session_key={driver['session_key']}")
        car_data = json.loads(response.read().decode("utf-8"))

        save_parquet(
            f"{path}/session_key={driver['session_key']}&driver_number={driver['driver_number']}.pickle",
            car_data
        )
    except:
        print(driver["session_key"], driver["driver_number"])

In [15]:
path_telemetrys = "./data/telemetrys"
session_key_drivers = set([driver["session_key"] for driver in drivers])

if not os.listdir(path_telemetrys):
    for session_key in session_key_drivers:
        filter_session_key = [driver for driver in aux if driver["session_key"] == session_key]
        Parallel(n_jobs=6)(delayed(extract_car_data)(path_telemetrys, driver) for driver in filter_session_key)

## Drivers

In [16]:
list_drivers = []
list_sessions = []

for file in os.listdir(path_telemetrys):
    match = re.search(r"session_key=(\d+)&driver_number=(\d+)", file)
    session_key = int(match.group(1))
    driver_number = int(match.group(2))
    
    list_sessions.append(session_key)
    list_drivers.append(driver_number)

In [None]:
path_drivers = "./data/drivers"
save_drivers = [drive for drive in drivers if (drive["session_key"], drive["driver_number"]) in zip(list_sessions, list_drivers)]

if not os.listdir(path_drivers):
    save_parquet(f"{path_drivers}/drivers.parquet", save_drivers)

## Laps

In [18]:
path_laps = "./data/laps"

if not os.listdir(path_laps):
    for session_key, driver_number in zip(list_sessions, list_drivers):
        try:
            response = urlopen(f"https://api.openf1.org/v1/laps?session_key={session_key}&driver_number={driver_number}")
            data = json.loads(response.read().decode("utf-8"))
            
            save_parquet(f"{path_laps}/session_key={session_key}&driver_number={driver_number}.parquet", data)
        except:
            print(session_key, driver_number)

## Meetings

In [None]:
response = urlopen('https://api.openf1.org/v1/meetings')
data = json.loads(response.read().decode('utf-8'))

df = pd.DataFrame(data)
list_meetings = [driver["meeting_key"] for driver in save_drivers]
df = df[df["meeting_key"].isin(list_meetings)]

df.to_parquet("./data/meetings/meetings.parquet")

## Pits

In [None]:
response = urlopen('https://api.openf1.org/v1/pits')
data = json.loads(response.read().decode('utf-8'))

df = pd.DataFrame(data)
df = df[df["session_key"].isin(list_sessions)]

df.to_parquet("./data/pits/pits.parquet")

## Position

In [None]:
response = urlopen('https://api.openf1.org/v1/position')
data = json.loads(response.read().decode('utf-8'))

df = pd.DataFrame(data)
df = df[df["session_key"].isin(list_sessions)]

df.to_parquet("./data/positions/positions.parquet")

# Session

In [None]:
response = urlopen('https://api.openf1.org/v1/sessions')
data = json.loads(response.read().decode('utf-8'))

df = pd.DataFrame(data)
df = df[df["session_key"].isin(list_sessions)]

df.to_parquet("./data/sessions/sessions.parquet")

## Stints

In [None]:
response = urlopen('https://api.openf1.org/v1/stints')
data = json.loads(response.read().decode('utf-8'))

df = pd.DataFrame(data)
df = df[df["session_key"].isin(list_sessions)]

df.to_parquet("./data/stints/stints.parquet")

# Race control

In [None]:
response = urlopen('https://api.openf1.org/v1/race_control')
data = json.loads(response.read().decode('utf-8'))

df = pd.DataFrame(data)
df = df[df["session_key"].isin(list_sessions)]

save_parquet(f"./data/race_controls/race_controls.parquet", data)

# Weather

In [None]:
response = urlopen('https://api.openf1.org/v1/weather')
data = json.loads(response.read().decode('utf-8'))

df = pd.DataFrame(data)
df = df[df["session_key"].isin(list_sessions)]

save_parquet(f"./data/weather_conditions/weather_conditions.parquet", data)