# 🏎️ F1 Winner Prediction – Google Colab Notebook

Tento notebook obsahuje všechny kroky potřebné k vytvoření predikčního modelu pro určení vítěze závodu Formule 1 na základě reálných historických dat.

In [None]:
# 📦 Import knihoven
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib

In [None]:
# 📥 Načtení datasetu
from google.colab import files
uploaded = files.upload()
df = pd.read_csv(list(uploaded.keys())[0])
df.head()

In [None]:
# 🧼 Čištění a předzpracování dat
df = df[["Season", "Circuit", "Grid Position", "Constructor", "Driver", "Final Position"]]
encoder_circuit = LabelEncoder()
encoder_constructor = LabelEncoder()
encoder_driver = LabelEncoder()
df["Circuit"] = encoder_circuit.fit_transform(df["Circuit"])
df["Constructor"] = encoder_constructor.fit_transform(df["Constructor"])
df["Driver"] = encoder_driver.fit_transform(df["Driver"])
df["Final Position"] = df["Final Position"].astype(int)
df["Winner"] = (df["Final Position"] == 1).astype(int)
df = df.dropna()
df.head()

In [None]:
# 🧠 Trénování modelu
X = df[["Season", "Circuit", "Grid Position", "Constructor", "Driver"]]
y = df["Winner"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
# 💾 Uložení modelu a encoderů
joblib.dump(model, "models/f1_winner_model.pkl")
joblib.dump(encoder_circuit, "encoders/enc_circuit.pkl")
joblib.dump(encoder_constructor, "encoders/enc_constructor.pkl")
joblib.dump(encoder_driver, "encoders/enc_driver.pkl")

In [None]:
# 📊 Vyhodnocení modelu
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"Přesnost: {acc * 100:.2f}%")
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

In [None]:
# 📊 Vizualizace confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Nevyhrál', 'Vyhrál'], yticklabels=['Nevyhrál', 'Vyhrál'])
plt.xlabel('Predikováno')
plt.ylabel('Skutečnost')
plt.title('Confusion Matrix')
plt.show()

In [None]:
# 📈 Přesnost modelu podle sezóny
years = sorted(df['Season'].unique())
accuracies = []
for year in years:
    subset = df[df['Season'] == year]
    if len(subset) < 20:
        continue
    X_year = subset[["Season", "Circuit", "Grid Position", "Constructor", "Driver"]]
    y_year = subset["Winner"]
    X_train_y, X_test_y, y_train_y, y_test_y = train_test_split(X_year, y_year, test_size=0.3, random_state=42)
    model.fit(X_train_y, y_train_y)
    y_pred_y = model.predict(X_test_y)
    acc_y = accuracy_score(y_test_y, y_pred_y)
    accuracies.append((year, acc_y))
years_plot, acc_plot = zip(*accuracies)
plt.figure(figsize=(10,5))
plt.plot(years_plot, [a*100 for a in acc_plot], marker='o')
plt.title('Přesnost modelu podle sezóny')
plt.xlabel('Sezóna')
plt.ylabel('Přesnost (%)')
plt.grid(True)
plt.show()