In [0]:
%pip install fastf1 matplotlib pandas

In [0]:
%restart_python

In [0]:
# Circuit info ingestion - safe fully flattened
import fastf1, pandas as pd, json
from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()
CACHE_PATH = "/Workspace/Users/niranjan.482000@gmail.com/F1-Race-Analytics/cache"
try:
    fastf1.Cache.enable_cache(CACHE_PATH)
except Exception as e:
    print("Cache enable warning:", e)

year = 2023
gp = "Bahrain"
sess_type = "R"
session = fastf1.get_session(year, gp, sess_type)
session.load()

# safe circuit info
try:
    circuit_info = session.get_circuit_info()
except Exception as e:
    print("No circuit_info:", e)
    circuit_info = None

def safe_val(obj, *attrs):
    """Return first non-None scalar/string value."""
    for a in attrs:
        v = getattr(obj, a, None) if obj else None
        if v is not None:
            return v
    return None

# candidate collections
def safe_list(obj, *names):
    for n in names:
        val = getattr(obj, n, None) if obj else None
        if val is None:
            continue
        if isinstance(val, (list, pd.Series, pd.DataFrame)):
            return list(val)
        return [val]
    return []

records = []
if circuit_info:
    candidates = {
        "corners": safe_list(circuit_info, "corners", "cornerList"),
        "marshal_sectors": safe_list(circuit_info, "marshalSectors", "marshal_sectors"),
        "marshal_lights": safe_list(circuit_info, "marshalLights", "marshal_lights"),
        "track_markers": safe_list(circuit_info, "track_markers", "markers"),
    }

    for rec_type, coll in candidates.items():
        for idx, entry in enumerate(coll):
            if not isinstance(entry, dict):
                # convert non-dict entries to string
                entry_dict = {"raw": str(entry)}
            else:
                entry_dict = entry.copy()
            records.append({
                "record_type": rec_type,
                "idx": idx,
                "number": entry_dict.get("number"),
                "name": entry_dict.get("name"),
                "angle": entry_dict.get("angle"),
                "distance": entry_dict.get("distance"),
                "location": entry_dict.get("location"),
                "raw": entry_dict.get("raw") or json.dumps(entry_dict)
            })

# ensure at least one row
if not records:
    records = [{"record_type": None,"idx": None,"number": None,"name": None,"angle": None,"distance": None,"location": None,"raw": None}]

circuit_df = pd.DataFrame(records, columns=[
    "record_type", "idx", "number", "name", "angle", "distance", "location", "raw"
])

# add metadata
circuit_df["circuit_name"] = safe_val(circuit_info, "name", "circuit_name")
circuit_df["circuit_location"] = safe_val(circuit_info, "location", "city")
circuit_df["circuit_country"] = safe_val(circuit_info, "country")
circuit_df["event_name"] = session.event.EventName if hasattr(session.event, "EventName") else gp
circuit_df["year"] = year
circuit_df["round"] = getattr(session.event, "RoundNumber", None)
circuit_df["session_name"] = session.name
circuit_df["session_key"] = f"{circuit_df['event_name'].iloc[0]}_{session.name}_{pd.to_datetime(session.date).strftime('%Y%m%d')}"

# create Spark DF safely
sdf = spark.createDataFrame(circuit_df.astype(str))  # convert everything to string
sdf.write.format("delta").mode("append").saveAsTable("f1_catalog.bronze.circuit_info")

print(f"✅ Wrote {len(circuit_df)} rows")
display(spark.table("f1_catalog.bronze.circuit_info").where(f"session_key = '{circuit_df['session_key'].iloc[0]}'").limit(20))
