In [0]:
%pip install fastf1 matplotlib pandas

In [0]:
%restart_python

In [0]:
import fastf1
import pandas as pd
from pyspark.sql import SparkSession
from pyspark.sql.functions import lit
import warnings
warnings.filterwarnings("ignore")

# Enable cache
fastf1.Cache.enable_cache("/Workspace/Users/niranjan.482000@gmail.com/F1-Race-Analytics/cache")

# COMMAND ----------

# Load a session
session = fastf1.get_session(2023, "Bahrain", "R")  # Race
session.load()

# COMMAND ----------

# Extract telemetry data for all drivers
all_tel_dfs = []

for drv in session.drivers:
    drv_laps = session.laps.pick_driver(drv)
    if len(drv_laps) == 0:
        continue
    
    # Get telemetry for each lap
    for _, lap in drv_laps.iterlaps():
        try:
            tel = lap.get_car_data().add_distance()  # pandas DataFrame
            tel["Driver"] = drv
            tel["LapNumber"] = lap["LapNumber"]
            all_tel_dfs.append(tel)
        except:
            continue

telemetry_df = pd.concat(all_tel_dfs, ignore_index=True)

print("Telemetry shape:", telemetry_df.shape)
display(telemetry_df.head())

# COMMAND ----------

# Convert Pandas -> Spark
spark_df = spark.createDataFrame(telemetry_df)

# Inspect schema
spark_df.printSchema()
spark_df.show(5)

# COMMAND ----------

# Save as Bronze Delta Table
catalog = "f1_catalog"
schema = "bronze"
table_name = "telemetry_data"

(spark_df.write
 .mode("overwrite")
 .format("delta")
 .option("overwriteSchema", "true")
 .saveAsTable(f"{catalog}.{schema}.{table_name}")
)

print(f"Saved to {catalog}.{schema}.{table_name}")
