In [0]:
%pip install fastf1 matplotlib pandas

In [0]:
%restart_python

In [0]:
# Race control messages ingestion -> f1_catalog.bronze.race_control_messages
import fastf1, pandas as pd
from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()

# enable cache
try:
    fastf1.Cache.enable_cache("/Workspace/Users/niranjan.482000@gmail.com/F1-Race-Analytics/cache")
except Exception as e:
    print("Cache warning:", e)

# load session (adjust year/gp/session)
year = 2023
gp = "Bahrain"
sess_type = "R"

session = fastf1.get_session(year, gp, sess_type)
session.load(messages=True)

# get race control messages
messages = session.race_control_messages.copy()

print("Raw columns:", list(messages.columns))

# keep only the main columns + session_key
keep_cols = ["Date", "Category", "Message", "Status", "Flag", "Scope"]
for col in keep_cols:
    if col not in messages.columns:
        messages[col] = None  # add missing cols if they don't exist

# convert Date safely
messages['Date'] = pd.to_datetime(messages['Date'], errors='coerce')

# add session key
messages['session_key'] = f"{session.event['EventName']}_{session.name}_{pd.to_datetime(session.date).strftime('%Y%m%d')}"

# subset dataframe to keep_cols + session_key
messages = messages[keep_cols + ["session_key"]]

# create Spark DataFrame with inferred schema
sdf = spark.createDataFrame(messages)

# write to bronze delta table
sdf.write.format("delta").mode("append").saveAsTable("f1_catalog.bronze.race_control_messages")

print("✅ Wrote race control messages to f1_catalog.bronze.race_control_messages")

display(spark.table("f1_catalog.bronze.race_control_messages").orderBy("Date", ascending=False).limit(10))
