In [5]:
import fastf1
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d

# ==============================================================================
# PROGETTO: F1 High-Dimensional Functional Data Analysis (FDA) & D-STEM
# VERSIONE: 6.0 (Final - Hybrid Curvature & Integer Precision)
# ==============================================================================

# --- 0. CONFIGURAZIONE ---
YEAR = 2024      
RACE = 'Bahrain' 
SESSION = 'R'    
SPATIAL_RESOLUTION = 10 # [Metri]
SELECTED_DRIVERS = ['VER', 'PER', 'SAI', 'LEC', 'RUS', 'NOR', 'HAM', 'PIA', 'ALO', 'STR']

fastf1.Cache.enable_cache('f1_cache')

print(f"--- FASE 1: ESTRAZIONE DATI ({RACE} {YEAR}) ---")
session = fastf1.get_session(YEAR, RACE, SESSION)
session.load()

# --- 1. DATA CLEANING ---
laps = session.laps.pick_drivers(SELECTED_DRIVERS).pick_quicklaps()
laps = laps[laps['TrackStatus'] == '1'] # Solo Green Flag
laps = laps[laps['PitInTime'].isnull() & laps['PitOutTime'].isnull()] 

print(f"Giri validi selezionati: {len(laps)}")

# --- 2. DEFINIZIONE DOMINIO SPAZIALE ---
print("Calcolo lunghezza circuito...")
fastest_lap = session.laps.pick_fastest()
telemetry_fastest = fastest_lap.get_telemetry()
circuit_length = telemetry_fastest['Distance'].max()

if pd.isna(circuit_length) or circuit_length < 2000:
    circuit_length = 5412.0 

spatial_grid = np.arange(0, int(circuit_length), SPATIAL_RESOLUTION)
print(f"Griglia Spaziale: {len(spatial_grid)} punti (step {SPATIAL_RESOLUTION}m)")


# --- 3. FEATURE ENGINEERING (FISICA) ---

def calculate_curvature(x, y):
    """Calcola la curvatura continua k(s)"""
    dx = np.gradient(x)
    dy = np.gradient(y)
    ddx = np.gradient(dx)
    ddy = np.gradient(dy)
    numerator = dx * ddy - dy * ddx
    denominator = (dx**2 + dy**2)**1.5
    with np.errstate(divide='ignore', invalid='ignore'):
        k = numerator / denominator
        k[np.isnan(k)] = 0
    return np.abs(k)

def classify_corner_type(k):
    """
    Classifica la curvatura in categorie discrete per analisi qualitativa.
    Threshold basati su fisica F1:
    - < 0.002: Rettilineo (R > 500m)
    - < 0.01:  Curva Veloce (R 100-500m)
    - < 0.03:  Curva Media (R 33-100m)
    - < 0.08:  Curva Lenta (R 12-33m)
    - > 0.08:  Tornante/Hairpin (R < 12m)
    """
    if k < 0.002:
        return 'Straight'
    elif k < 0.01:
        return 'HighSpeed'
    elif k < 0.03:
        return 'Medium'
    elif k < 0.08:
        return 'Slow'
    else:
        return 'Hairpin'

def estimate_fuel_load(total_laps, current_lap):
    fuel_per_lap = 110.0 / total_laps
    return 110.0 - (fuel_per_lap * current_lap)

total_race_laps = session.total_laps
if pd.isna(total_race_laps): total_race_laps = 57 

# --- 4. CORE LOOP ---
data_list = []
print("--- FASE 2: ELABORAZIONE FUNZIONALE (Attendere...) ---")

for i, lap in laps.iterrows():
    try:
        tel = lap.get_telemetry()
        if len(tel) < 50: continue 
        
        # A. Interpolazione
        f_speed = interp1d(tel['Distance'], tel['Speed'], kind='linear', fill_value="extrapolate")
        f_x = interp1d(tel['Distance'], tel['X'], kind='linear', fill_value="extrapolate")
        f_y = interp1d(tel['Distance'], tel['Y'], kind='linear', fill_value="extrapolate")
        
        drs_values = tel['DRS'].astype(float)
        f_drs = interp1d(tel['Distance'], drs_values, kind='nearest', fill_value="extrapolate")
        
        # B. Proiezione
        s_speed = f_speed(spatial_grid)
        s_x = f_x(spatial_grid)
        s_y = f_y(spatial_grid)
        s_drs_binary = np.where(f_drs(spatial_grid) >= 10, 1, 0)
        
        # C. Regressori
        s_curvature = calculate_curvature(s_x, s_y)
        fuel = estimate_fuel_load(total_race_laps, lap['LapNumber'])
        
        # D. DataFrame
        lap_df = pd.DataFrame({
            'Driver': lap['Driver'],
            'Team': lap['Team'],
            'LapNumber': lap['LapNumber'],
            'Space_Distance': spatial_grid,
            'Speed': s_speed,
            'Curvature': s_curvature,
            'DRS': s_drs_binary,
            'Fuel_Load': fuel,
            'TyreLife': lap['TyreLife'],
            'Compound': lap['Compound'], 
            'X_Coord': s_x,
            'Y_Coord': s_y
        })
        data_list.append(lap_df)
            
    except Exception as e:
        continue 

df_final = pd.concat(data_list, ignore_index=True)

# --- 5. POST-PROCESSING & FORMATTAZIONE ---
print("--- FASE 3: FORMATTAZIONE FINALE ---")

# A. CLASSIFICAZIONE CURVE (Ibrida: Continua + Discreta)
# 1. Creiamo la colonna categorica temporanea
df_final['Corner_Category'] = df_final['Curvature'].apply(classify_corner_type)
# 2. Generiamo le One-Hot Encoding (Type_Straight, Type_Slow...)
df_final = pd.get_dummies(df_final, columns=['Corner_Category'], prefix='Type')
# 3. Assicuriamo che siano int (0/1)
corner_cols = [c for c in df_final.columns if 'Type_' in c and 'Tyre' not in c]
for c in corner_cols:
    df_final[c] = df_final[c].astype(int)

# B. ENCODING GOMME (Universale)
df_final = pd.get_dummies(df_final, columns=['Compound'], prefix='Tyre')
dry_compounds = ['Tyre_SOFT', 'Tyre_MEDIUM', 'Tyre_HARD']

for col in dry_compounds:
    if col not in df_final.columns:
        print(f" -> '{col}' mancante. Creata (0).")
        df_final[col] = 0
    else:
        df_final[col] = df_final[col].astype(int)

# Pulizia colonne indesiderate (Intermedie/Wet)
cols_to_drop = [c for c in df_final.columns if 'Tyre_' in c and c not in dry_compounds]
if cols_to_drop:
    df_final.drop(columns=cols_to_drop, inplace=True)

# C. ARROTONDAMENTI E TIPI (Precisione Scientifica)
print(" -> Applicazione precisione numerica...")

# Interi
df_final['LapNumber'] = df_final['LapNumber'].astype(int)
df_final['TyreLife'] = df_final['TyreLife'].fillna(0).astype(int) # Fix per eventuali NaN

# Float
df_final['Speed'] = df_final['Speed'].round(1)        # Es: 305.4
df_final['Fuel_Load'] = df_final['Fuel_Load'].round(3) # Es: 98.450
df_final['Curvature'] = df_final['Curvature'].round(6) # Es: 0.000021

# --- 6. SALVATAGGIO ---
filename = f"f1_functional_data_{RACE}_{YEAR}_final_v6.csv"
df_final.to_csv(filename, index=False)

print("\n" + "="*60)
print(f"COMPLETATO. File generato: {filename}")
print("-" * 20)
print(f"Dimensioni Dataset: {df_final.shape}")
print("Anteprima Variabili Formattate:")
print(df_final[['LapNumber', 'TyreLife', 'Speed', 'Curvature', 'Type_Straight']].head(3))
print("="*60)

--- FASE 1: ESTRAZIONE DATI (Bahrain 2024) ---


core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '63', '4', '44', '81', '14', '18', '24', '20', '3', '22', '23', '27', '31', '10', '77', '2']


Giri validi selezionati: 509
Calcolo lunghezza circuito...
Griglia Spaziale: 537 punti (step 10m)
--- FASE 2: ELABORAZIONE FUNZIONALE (Attendere...) ---
--- FASE 3: FORMATTAZIONE FINALE ---
 -> 'Tyre_MEDIUM' mancante. Creata (0).
 -> Applicazione precisione numerica...

COMPLETATO. File generato: f1_functional_data_Bahrain_2024_final_v6.csv
--------------------
Dimensioni Dataset: (273333, 19)
Anteprima Variabili Formattate:
   LapNumber  TyreLife  Speed  Curvature  Type_Straight
0          2         5  277.9   0.000020              1
1          2         5  279.0   0.000010              1
2          2         5  280.1   0.000021              1
