In [4]:
# Full Jupyter Notebook-style Code for Predicting F1 Qualifying Position with Track Awareness

# Required installations (run in a notebook cell if not installed)
# !pip install fastf1 pandas scikit-learn ipywidgets

import fastf1
import pandas as pd
from fastf1 import get_session
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
import ipywidgets as widgets
from IPython.display import display
import os

# Enable FastF1 cache
os.makedirs('cache', exist_ok=True)
fastf1.Cache.enable_cache('cache')

# ---- TRACK METADATA ----
track_metadata = {
    1: {"circuit": "Bahrain", "length_km": 5.412, "altitude_m": 10, "corners": 15},
    2: {"circuit": "Imola", "length_km": 4.909, "altitude_m": 12, "corners": 19},
    3: {"circuit": "Portimao", "length_km": 4.653, "altitude_m": 96, "corners": 15},
    4: {"circuit": "Barcelona", "length_km": 4.675, "altitude_m": 109, "corners": 16},
    5: {"circuit": "Monaco", "length_km": 3.337, "altitude_m": 0, "corners": 19},
    6: {"circuit": "Baku", "length_km": 6.003, "altitude_m": 2, "corners": 20},
    7: {"circuit": "Paul Ricard", "length_km": 5.842, "altitude_m": 408, "corners": 15},
    8: {"circuit": "Red Bull Ring", "length_km": 4.318, "altitude_m": 677, "corners": 10},
    9: {"circuit": "Silverstone", "length_km": 5.891, "altitude_m": 153, "corners": 18},
    10: {"circuit": "Hungaroring", "length_km": 4.381, "altitude_m": 264, "corners": 14},
    11: {"circuit": "Spa", "length_km": 7.004, "altitude_m": 401, "corners": 20},
    12: {"circuit": "Zandvoort", "length_km": 4.259, "altitude_m": 1, "corners": 14},
    13: {"circuit": "Monza", "length_km": 5.793, "altitude_m": 162, "corners": 11},
    14: {"circuit": "Sochi", "length_km": 5.848, "altitude_m": 2, "corners": 18},
    15: {"circuit": "Istanbul", "length_km": 5.338, "altitude_m": 130, "corners": 14},
    16: {"circuit": "Austin", "length_km": 5.513, "altitude_m": 194, "corners": 20},
    17: {"circuit": "Mexico City", "length_km": 4.304, "altitude_m": 2285, "corners": 17},
    18: {"circuit": "Interlagos", "length_km": 4.309, "altitude_m": 786, "corners": 15},
    19: {"circuit": "Losail", "length_km": 5.380, "altitude_m": 8, "corners": 16},
    20: {"circuit": "Jeddah", "length_km": 6.174, "altitude_m": 2, "corners": 27},
    21: {"circuit": "Yas Marina", "length_km": 5.281, "altitude_m": 1, "corners": 16},
}

# ---- FUNCTIONS ----
def get_session_fastest_laps(year, gp, session_name):
    session = get_session(year, gp, session_name)
    session.load()
    laps = session.laps.pick_quicklaps()
    fastest_laps = laps.groupby('Driver')['LapTime'].min().reset_index()
    fastest_laps.rename(columns={'LapTime': f'{session_name}_Time'}, inplace=True)
    return fastest_laps

def get_qualifying_positions(year, gp):
    session = get_session(year, gp, 'Q')
    session.load()
    results = session.results[['Abbreviation', 'Position']]
    results.rename(columns={'Abbreviation': 'Driver', 'Position': 'Qualifying_Position'}, inplace=True)
    return results

def normalize_session_times(df, session_cols):
    for col in session_cols:
        session_mean = df[col].mean()
        session_std = df[col].std()
        df[f'{col}_norm'] = (df[col] - session_mean) / session_std
    return df

def collect_race_data(year, gp, meta):
    fp1 = get_session_fastest_laps(year, gp, 'FP1')
    fp2 = get_session_fastest_laps(year, gp, 'FP2')
    fp3 = get_session_fastest_laps(year, gp, 'FP3')
    quali = get_qualifying_positions(year, gp)

    data = fp1.merge(fp2, on='Driver', how='outer')
    data = data.merge(fp3, on='Driver', how='outer')
    data = data.merge(quali, on='Driver', how='outer')
    data.dropna(inplace=True)

    for col in ['FP1_Time', 'FP2_Time', 'FP3_Time']:
        data[col] = data[col].dt.total_seconds()

    data = normalize_session_times(data, ['FP1_Time', 'FP2_Time', 'FP3_Time'])

    data['track_length_km'] = meta['length_km']
    data['altitude_m'] = meta['altitude_m']
    data['corners'] = meta['corners']
    data['Circuit'] = meta['circuit']
    data['Round'] = gp
    data['Year'] = year
    return data

# ---- DATA COLLECTION ----
race_data = []
for rnd in range(1, 22):
    try:
        meta = track_metadata[rnd]
        data = collect_race_data(2021, rnd, meta)
        race_data.append(data)
    except Exception as e:
        print(f"Round {rnd} skipped due to error: {e}")

df = pd.concat(race_data, ignore_index=True)

# ---- ENCODING ----
le_driver = LabelEncoder()
df['Driver_Encoded'] = le_driver.fit_transform(df['Driver'])
le_circuit = LabelEncoder()
df['Circuit_Encoded'] = le_circuit.fit_transform(df['Circuit'])

# ---- FEATURES ----
features = ['FP1_Time_norm', 'FP2_Time_norm', 'FP3_Time_norm', 'track_length_km', 'altitude_m', 'corners', 'Circuit_Encoded', 'Driver_Encoded']
X = df[features]
y = df['Qualifying_Position'].astype(int)

# ---- MODEL TRAINING ----
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
model.fit(X_train, y_train)
print(classification_report(y_test, model.predict(X_test)))

# ---- MENU TO SELECT NEXT GP ----
def select_next_track(metadata):
    options = [(f"Round {rnd}: {info['circuit']}", rnd) for rnd, info in metadata.items()]
    dropdown = widgets.Dropdown(options=options, description='Next GP:')
    display(dropdown)
    return dropdown

dropdown = select_next_track(track_metadata)

# ---- PREDICTION FUNCTION ----
def predict_next_qualifying_order(year, gp):
    try:
        meta = track_metadata[gp]
        data = collect_race_data(year, gp, meta)

        data['Driver_Encoded'] = le_driver.transform(data['Driver'])
        data['Circuit_Encoded'] = le_circuit.transform([meta['circuit']] * len(data))

        X_next = data[features]
        data['Predicted_Position'] = model.predict(X_next)

        data_sorted = data.sort_values(by='Predicted_Position')
        print("\nPredicted Qualifying Order:")
        print(data_sorted[['Driver', 'Predicted_Position']].to_string(index=False))

    except Exception as e:
        print(f"Error in prediction: {e}")

# ---- AFTER DROPDOWN SELECTION ----
# Run this in a cell after selecting a round:
# predict_next_qualifying_order(2021, dropdown.value)


core           INFO 	Loading data for Bahrain Grand Prix - Practice 1 [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


KeyboardInterrupt: 