In [0]:
%pip install fastf1 matplotlib pandas

In [0]:
%restart_python

In [0]:
# Databricks notebook
# Day 3 – Expanding Bronze Layer

# COMMAND ----------

# MAGIC %md
# # Day 3 – Expanding Bronze Layer
# In this notebook, we will:
# - Ingest Session Info
# - Ingest Weather Data
# - Ingest Telemetry Data
# All raw ingestions will go to `f1_catalog.bronze` schema.

# COMMAND ----------

from pyspark.sql import SparkSession
import fastf1
import pandas as pd

spark = SparkSession.builder.getOrCreate()

# Enable FastF1 cache (local to cluster or ADLS if configured)
fastf1.Cache.enable_cache("/Workspace/Users/niranjan.482000@gmail.com/F1-Race-Analytics/cache")

year = 2023
gp = "Monza"
session = fastf1.get_session(year, gp, "R")
session.load()

# COMMAND ----------

# MAGIC %md
# ## 1. Session Info

# COMMAND ----------

session_info = {
    "Year": year,
    "Round": session.event["RoundNumber"],
    "EventName": session.event["EventName"],
    "Circuit": session.event["OfficialEventName"],
    "SessionName": session.name,
    "StartDate": session.date,
    "EndDate": session.date + session.session_info.get("Length", pd.Timedelta(0))
}

session_df = pd.DataFrame([session_info])
spark_df = spark.createDataFrame(session_df)

spark.sql("CREATE SCHEMA IF NOT EXISTS f1_catalog.bronze")
spark_df.write.mode("overwrite").format("delta").saveAsTable("f1_catalog.bronze.session_info")

display(spark.table("f1_catalog.bronze.session_info"))

# COMMAND ----------

# MAGIC %md
# ## 2. Weather Data

# COMMAND ----------

weather_df = session.weather_data  # Pandas DataFrame
weather_sdf = spark.createDataFrame(weather_df.reset_index())

weather_sdf.write.mode("overwrite").format("delta").saveAsTable("f1_catalog.bronze.weather_data")

display(spark.table("f1_catalog.bronze.weather_data").limit(5))

# COMMAND ----------

# MAGIC %md
# ## 3. Telemetry Data (one driver to avoid huge size)
# Example: Verstappen laps telemetry (Speed, Throttle, Brake, RPM).

# COMMAND ----------

laps = session.laps.pick_driver("VER")  # telemetry only for Verstappen
tel = laps.get_car_data().add_distance()  # adds Distance column

tel_df = tel.reset_index()
tel_sdf = spark.createDataFrame(tel_df)

tel_sdf.write.mode("overwrite").format("delta").saveAsTable("f1_catalog.bronze.telemetry_ver")

display(spark.table("f1_catalog.bronze.telemetry_ver").limit(5))

# COMMAND ----------

# MAGIC %md
# # Bronze Layer Summary
# - f1_catalog.bronze.lap_times (from Day 2)
# - f1_catalog.bronze.session_info
# - f1_catalog.bronze.weather_data
# - f1_catalog.bronze.telemetry_ver
