In [None]:
pip install fastf1

In [None]:
pip install git+https://github.com/theOehrly/Fast-F1.git


In [None]:
import os
import fastf1

os.makedirs('./cache', exist_ok=True)  # ✅ Create the cache folder if it doesn't exist
fastf1.Cache.enable_cache('./cache')   # ✅ Now FastF1 can use it

In [None]:
import fastf1
fastf1.Cache.enable_cache('./cache')  # Set a local directory for cache


In [None]:
session = fastf1.get_session(2024, 'Silverstone', 'Q')  # British GP Qualifying
session.load()
print(session.results)

In [None]:
from fastf1 import plotting
import pandas as pd

os.makedirs('./cache', exist_ok=True)
fastf1.Cache.enable_cache('./cache')

# Load session
session = fastf1.get_session(2024, 'Silverstone', 'R')  # Race session
session.load()

# Get results
results = session.results

data = []
for _, row in results.iterrows():
    data.append({
        'driver': row['FullName'],        # Use full driver name
        'team': row['TeamName'],
        'grid_pos': row['GridPosition'],
        'finish_pos': row['Position'],
        'points': row['Points'],
        'status': row['Status'],
        'winner': 1 if row['Position'] == 1 else 0
    })

df = pd.DataFrame(data)
print(df.head())

df['winner'] = (df['finish_pos'] == 1).astype(int)
print(df.head())


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

# Encode categorical columns
le_driver = LabelEncoder()
df['driver_encoded'] = le_driver.fit_transform(df['driver'])

le_team = LabelEncoder()
df['team_encoded'] = le_team.fit_transform(df['team'])

X = df[['grid_pos', 'team_encoded', 'driver_encoded']]
y = df['winner']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

clf = RandomForestClassifier()
clf.fit(X_train, y_train)

acc = clf.score(X_test, y_test)
print(f"Accuracy: {acc:.2f}")


In [None]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred = clf.predict(X_test)

print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


In [None]:
import matplotlib.pyplot as plt

importances = clf.feature_importances_
features = X.columns

plt.barh(features, importances)
plt.xlabel("Feature Importance")
plt.title("What the model thinks is important")
plt.show()


In [None]:
# prompt: # Assume `df_predict` contains latest driver grid data

# Assuming df_predict has the same structure as X (grid_pos, team_encoded, driver_encoded)
# before fitting the LabelEncoders.
# You'll need to apply the *same* LabelEncoders fitted on the training data to df_predict.

# Example df_predict structure (you would load this from your actual data source)
# Make sure to create team_encoded and driver_encoded columns in df_predict
# using the le_team and le_driver objects that were fitted on the training data (df).
df_predict = pd.DataFrame({
    'grid_pos': [1, 2, 3, 4, 5],
    'driver': ['Max Verstappen', 'Lando Norris', 'Lewis Hamilton', 'Carlos Sainz', 'Sergio Perez'],
    'team': ['Red Bull Racing', 'McLaren', 'Mercedes', 'Ferrari', 'Red Bull Racing']
})

# Apply the previously fitted LabelEncoders to df_predict
# Ensure that all drivers and teams in df_predict were present in the training data
# If not, you might need to handle unseen values (e.g., use a different strategy or add them to training data)
df_predict['driver_encoded'] = le_driver.transform(df_predict['driver'])
df_predict['team_encoded'] = le_team.transform(df_predict['team'])

# Select the features for prediction
X_predict = df_predict[['grid_pos', 'team_encoded', 'driver_encoded']]

# Make predictions on the new data
predictions = clf.predict(X_predict)

# Add the predictions to the df_predict DataFrame
df_predict['predicted_winner'] = predictions

print("\nPredictions on new data:")
print(df_predict)

# You can further analyze the predictions, e.g., filter for predicted winners
predicted_winners_df = df_predict[df_predict['predicted_winner'] == 1]
print("\nPredicted winners:")
predicted_winners_df


In [None]:
# Assume df_predict contains latest driver grid data, and it's properly encoded
X_predict = df_predict[['grid_pos', 'team_encoded', 'driver_encoded']]

# Predict probability of being the winner (class 1)
pred_probs = clf.predict_proba(X_predict)[:, 0]

# Add to DataFrame
df_predict['win_probability'] = pred_probs

# Sort and display
df_predict.sort_values(by='win_probability', ascending=False, inplace=True)
print(df_predict[['driver', 'team', 'grid_pos', 'win_probability']])


In [None]:
import joblib
joblib.dump(clf, 'f1_winner_model.pkl')


In [None]:
clf = joblib.load('f1_winner_model.pkl')

In [None]:
predicted_winner = df_predict.loc[df_predict['win_probability'].idxmax()]
print("🏁 Predicted Winner:")
print(f"Driver: {predicted_winner['driver']}")
print(f"Team: {predicted_winner['team']}")
print(f"Grid Position: {predicted_winner['grid_pos']}")
print(f"Win Probability: {predicted_winner['win_probability']:.4f}")


In [None]:
print("🥇 Top 3 Predicted Finishers:")
print(df_predict[['driver', 'team', 'grid_pos', 'win_probability']].head(3))


In [None]:
# prompt: Save this to my projects

import os
import fastf1
from fastf1 import plotting
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import joblib

# Install fastf1 if not already installed
try:
    import fastf1
except ImportError:
    !pip install fastf1
    !pip install git+https://github.com/theOehrly/Fast-F1.git
    import fastf1


os.makedirs('./cache', exist_ok=True)  # Create the cache folder if it doesn't exist
fastf1.Cache.enable_cache('./cache')  # Now FastF1 can use it

# Load session for Qualifying (for example)
# session = fastf1.get_session(2024, 'Silverstone', 'Q')  # British GP Qualifying
# session.load()
# print(session.results)

# Load session for Race
session = fastf1.get_session(2024, 'Silverstone', 'R')  # Race session
session.load()

# Get results
results = session.results

data = []
for _, row in results.iterrows():
    data.append({
        'driver': row['FullName'],
        'team': row['TeamName'],
        'grid_pos': row['GridPosition'],
        'finish_pos': row['Position'],
        'points': row['Points'],
        'status': row['Status'],
        'winner': 1 if row['Position'] == 1 else 0
    })

df = pd.DataFrame(data)
# print(df.head()) # Uncomment to print head

df['winner'] = (df['finish_pos'] == 1).astype(int)
# print(df.head()) # Uncomment to print head

# Encode categorical columns
le_driver = LabelEncoder()
df['driver_encoded'] = le_driver.fit_transform(df['driver'])

le_team = LabelEncoder()
df['team_encoded'] = le_team.fit_transform(df['team'])

X = df[['grid_pos', 'team_encoded', 'driver_encoded']]
y = df['winner']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # Added random_state for reproducibility

clf = RandomForestClassifier(random_state=42) # Added random_state
clf.fit(X_train, y_train)

acc = clf.score(X_test, y_test)
print(f"Accuracy: {acc:.2f}")

y_pred = clf.predict(X_test)

print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

importances = clf.feature_importances_
features = X.columns

plt.figure(figsize=(8, 4)) # Added figure size
plt.barh(features, importances)
plt.xlabel("Feature Importance")
plt.title("What the model thinks is important")
plt.show()

# --- Prediction on New Data (Example) ---
# Assuming df_predict has the same structure as X (grid_pos, driver, team)
# before fitting the LabelEncoders.
# You'll need to apply the *same* LabelEncoders fitted on the training data to df_predict.

# Example df_predict structure (you would load this from your actual data source)
# Make sure to create team_encoded and driver_encoded columns in df_predict
# using the le_team and le_driver objects that were fitted on the training data (df).
# This example uses hypothetical data for prediction.
df_predict = pd.DataFrame({
    'grid_pos': [1, 2, 3, 4, 5],
    'driver': ['Max Verstappen', 'Lando Norris', 'Lewis Hamilton', 'Carlos Sainz', 'Sergio Perez'],
    'team': ['Red Bull Racing', 'McLaren', 'Mercedes', 'Ferrari', 'Red Bull Racing']
})

# Apply the previously fitted LabelEncoders to df_predict
# Ensure that all drivers and teams in df_predict were present in the training data
# If not, you might need to handle unseen values (e.g., use a different strategy or add them to training data)
# Using .loc to avoid SettingWithCopyWarning
df_predict.loc[:, 'driver_encoded'] = le_driver.transform(df_predict['driver'])
df_predict.loc[:, 'team_encoded'] = le_team.transform(df_predict['team'])


# Select the features for prediction
X_predict = df_predict[['grid_pos', 'team_encoded', 'driver_encoded']]

# Make predictions on the new data
predictions = clf.predict(X_predict)

# Add the predictions to the df_predict DataFrame
df_predict.loc[:, 'predicted_winner'] = predictions

print("\nPredictions on new data:")
print(df_predict)

# You can further analyze the predictions, e.g., filter for predicted winners
predicted_winners_df = df_predict[df_predict['predicted_winner'] == 1].copy() # Use .copy() to avoid SettingWithCopyWarning
print("\nPredicted winners:")
print(predicted_winners_df) # Use print to display the dataframe

# Predict probability of being the winner (class 1)
# Assuming the positive class (winner) is the second class in clf.classes_
# Check clf.classes_ if you're unsure. Usually 0 is the first class, 1 is the second.
pred_probs = clf.predict_proba(X_predict)[:, 1] # Get probabilities for the positive class (winner)

# Add to DataFrame
df_predict.loc[:, 'win_probability'] = pred_probs

# Sort and display
df_predict.sort_values(by='win_probability', ascending=False, inplace=True)
print("\nRace Winner Probabilities:")
print(df_predict[['driver', 'team', 'grid_pos', 'win_probability']])

# Save the model
joblib.dump(clf, 'f1_winner_model.pkl')
print("\nModel saved as f1_winner_model.pkl")

# Load the model (optional, just for demonstration)
# clf_loaded = joblib.load('f1_winner_model.pkl')
# print("\nModel loaded successfully.")

# Predict the most likely winner and top 3 based on probability
if not df_predict.empty:
    predicted_winner = df_predict.iloc[0] # Get the first row after sorting by probability
    print("\n🏁 Predicted Winner:")
    print(f"Driver: {predicted_winner['driver']}")
    print(f"Team: {predicted_winner['team']}")
    print(f"Grid Position: {predicted_winner['grid_pos']}")
    print(f"Win Probability: {predicted_winner['win_probability']:.4f}")

    print("\n🥇 Top 3 Predicted Finishers (by Win Probability):")
    print(df_predict[['driver', 'team', 'grid_pos', 'win_probability']].head(3))
else:
    print("\nNo predictions to display.")
