In [0]:
%pip install fastf1 matplotlib pandas

In [0]:
%restart_python

In [0]:
# Robust session metadata ingestion -> writes to f1_catalog.bronze.session_metadata
import fastf1, pandas as pd
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, TimestampType, DoubleType

spark = SparkSession.builder.getOrCreate()

# optional: enable cache (ignore error if path differs)
try:
    fastf1.Cache.enable_cache("/Workspace/Users/niranjan.482000@gmail.com/F1-Race-Analytics/cache")
except Exception as e:
    print("Cache enable warning:", e)

# load session (change year/gp/session as needed)
year = 2023
gp = "Bahrain"
sess_type = "R"
sess = fastf1.get_session(year, gp, sess_type)
sess.load()

# inspect event keys so you can see what's available
try:
    ev = dict(sess.event)
except Exception:
    ev = {}
print("event keys:", list(ev.keys()))

# helper for safe extraction
def safe_get(d, *keys, default=None):
    for k in keys:
        if k in d and d[k] is not None:
            return d[k]
    return default

# build metadata with fallbacks
metadata = {
    "session_key": f"{safe_get(ev,'EventName','Event','eventName', default=gp)}_{sess.name}_{pd.to_datetime(sess.date).strftime('%Y%m%d')}",
    "year": int(safe_get(ev,'EventYear','Year', default=year)),
    "round": (int(safe_get(ev,'RoundNumber','Round', default=None)) if safe_get(ev,'RoundNumber','Round', default=None) is not None else None),
    "event_name": safe_get(ev,'EventName','Event', default=gp),
    "country": safe_get(ev,'Country','CountryName', default=None),
    "location": safe_get(ev,'Location', default=None),
    "session_name": sess.name,
    "session_type": getattr(sess, 'session_type', None),
    "date": pd.to_datetime(sess.date),
    "weather_temp_air": (sess.weather_data['AirTemp'].mean() if getattr(sess, 'weather_data', None) is not None and 'AirTemp' in sess.weather_data.columns else None),
    "weather_temp_track": (sess.weather_data['TrackTemp'].mean() if getattr(sess, 'weather_data', None) is not None and 'TrackTemp' in sess.weather_data.columns else None),
    "weather_humidity": (sess.weather_data['Humidity'].mean() if getattr(sess, 'weather_data', None) is not None and 'Humidity' in sess.weather_data.columns else None),
    "weather_pressure": (sess.weather_data['Pressure'].mean() if getattr(sess, 'weather_data', None) is not None and 'Pressure' in sess.weather_data.columns else None),
}

pdf = pd.DataFrame([metadata])

schema = StructType([
    StructField("session_key", StringType(), False),
    StructField("year", IntegerType(), True),
    StructField("round", IntegerType(), True),
    StructField("event_name", StringType(), True),
    StructField("country", StringType(), True),
    StructField("location", StringType(), True),
    StructField("session_name", StringType(), True),
    StructField("session_type", StringType(), True),
    StructField("date", TimestampType(), True),
    StructField("weather_temp_air", DoubleType(), True),
    StructField("weather_temp_track", DoubleType(), True),
    StructField("weather_humidity", DoubleType(), True),
    StructField("weather_pressure", DoubleType(), True),
])

# create spark DF and write to catalog
sdf = spark.createDataFrame(pdf, schema=schema)
sdf.write.format("delta").mode("append").saveAsTable("f1_catalog.bronze.session_metadata")

print("Wrote session metadata to f1_catalog.bronze.session_metadata")
display(spark.table("f1_catalog.bronze.session_metadata").orderBy("date", ascending=False).limit(5))
