In [None]:
!pip install fastf1

Collecting fastf1
  Downloading fastf1-3.5.3-py3-none-any.whl.metadata (4.6 kB)
Collecting rapidfuzz (from fastf1)
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting requests-cache>=1.0.0 (from fastf1)
  Downloading requests_cache-1.2.1-py3-none-any.whl.metadata (9.9 kB)
Collecting timple>=0.1.6 (from fastf1)
  Downloading timple-0.1.8-py3-none-any.whl.metadata (2.0 kB)
Collecting websockets<14,>=10.3 (from fastf1)
  Downloading websockets-13.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting cattrs>=22.2 (from requests-cache>=1.0.0->fastf1)
  Downloading cattrs-24.1.3-py3-none-any.whl.metadata (8.4 kB)
Collecting url-normalize>=1.4 (from requests-cache>=1.0.0->fastf1)
  Downloading url_normalize-2.2.0-py3-none-any.whl.metadata (4.9 kB)
Downloading fastf1-3.5.3-py3-none-any.whl (151 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [None]:
import fastf1
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

In [None]:
import fastf1
print(fastf1.__version__)

3.5.3


In [None]:
import os

cache_dir = '/content/cache'
os.makedirs(cache_dir, exist_ok=True)
fastf1.Cache.enable_cache(cache_dir)


In [None]:
fastf1.Cache.enable_cache('cache')

def fetch_f1_data(year, round_number):
    """Fetch data using official F1 API via FastF1"""
    try:
        quali = fastf1.get_session(year, round_number, 'Q')
        quali.load()
        print(f"Fetched data for year {year}, round {round_number}")
        print("DataFrame columns available:", quali.results.columns.tolist())

        results = quali.results[['DriverNumber', 'FullName', 'TeamName', 'Q1', 'Q2', 'Q3']]

        results = results.rename(columns={'FullName': 'Driver'})

        for col in ['Q1', 'Q2', 'Q3']:
            results[col] = results[col].apply(lambda x: x.total_seconds() if pd.notnull(x) else None)

        results=results.rename(columns={'Q1':'Q1_sec','Q2':'Q2_sec','Q3':'Q3_sec'})

        print("\nQualifying Results Structure:")
        print(results.head())

        return results
    except Exception as e:
        print(f"Error fetching data: {e}")
        print("DataFrame columns available:", quali.results.columns.tolist())
        return None

In [None]:
dt = fetch_f1_data(2023, 5)

core           INFO 	Loading data for Miami Grand Prix - Qualifying [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Miami Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
INFO:fastf1.api:Fetching session info data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
DEBUG:fastf1.ergast:Failed to parse timestamp '' in Ergastresponse.
DEBUG:fastf1.ergast:Failed to parse timestamp '' in Ergastresponse.
req

Fetched data for year 2023, round 5
DataFrame columns available: ['DriverNumber', 'BroadcastName', 'Abbreviation', 'DriverId', 'TeamName', 'TeamColor', 'TeamId', 'FirstName', 'LastName', 'FullName', 'HeadshotUrl', 'CountryCode', 'Position', 'ClassifiedPosition', 'GridPosition', 'Q1', 'Q2', 'Q3', 'Time', 'Status', 'Points']

Qualifying Results Structure:
   DriverNumber           Driver         TeamName  Q1_sec  Q2_sec  Q3_sec
11           11     Sergio Perez  Red Bull Racing  87.713  87.328  86.841
14           14  Fernando Alonso     Aston Martin  88.179  87.097  87.202
55           55     Carlos Sainz          Ferrari  87.686  87.148  87.349
20           20  Kevin Magnussen     Haas F1 Team  87.809  87.673  87.767
10           10     Pierre Gasly           Alpine  88.061  87.612  87.786


In [None]:
def fetch_recent_data():
    """Fetch data from recent races using FastF1"""
    all_data = []


    current_year = 2025
    for round_num in range(1, 5):  # First 4 races of 2025
        print(f"Fetching data for {current_year} round {round_num}...")
        df = fetch_f1_data(current_year, round_num)
        if df is not None:
            df['Year'] = current_year
            df['Round'] = round_num
            all_data.append(df)


    print("Fetching 2024 Japanese GP data...")
    japan_2024 = fetch_f1_data(2024, 4)
    if japan_2024 is not None:
        japan_2024['Year'] = 2024
        japan_2024['Round'] = 4
        all_data.append(japan_2024)

    return all_data

In [None]:
def compute_performance_factors(df, model):
    """Compute dynamic performance multipliers from real data"""

    # Average base Q1/Q2 to predict a 'base' Q3 using model
    avg_q1 = df['Q1_sec'].mean()
    avg_q2 = df['Q2_sec'].mean()
    base_input = pd.DataFrame([[avg_q1, avg_q2]], columns=['Q1_sec', 'Q2_sec'])
    base_time = model.predict(base_input)[0]

    # Filter only rows with valid Q3
    valid_df = df.dropna(subset=['Q3_sec'])

    # Team factors
    team_avg = valid_df.groupby('TeamName')['Q3_sec'].mean()
    team_factors = (team_avg / base_time).to_dict()

    # Driver factors
    driver_avg = valid_df.groupby('Driver')['Q3_sec'].mean()
    driver_factors = (driver_avg / base_time).to_dict()

    return team_factors, driver_factors


In [None]:
def predict_japanese_gp(model, latest_data):
    """Predict Q3 times for Japanese GP 2025 using computed multipliers."""

    # Get dynamic multipliers
    team_factors, driver_factors = compute_performance_factors(latest_data, model)

    driver_teams = {
        'Max Verstappen': 'Red Bull Racing',
        'Sergio Perez': 'Red Bull Racing',
        'Charles Leclerc': 'Ferrari',
        'Carlos Sainz': 'Ferrari',
        'Lewis Hamilton': 'Mercedes',
        'George Russell': 'Mercedes',
        'Lando Norris': 'McLaren',
        'Oscar Piastri': 'McLaren',
        'Fernando Alonso': 'Aston Martin',
        'Lance Stroll': 'Aston Martin',
        'Daniel Ricciardo': 'RB',
        'Yuki Tsunoda': 'RB',
        'Alexander Albon': 'Williams',
        'Logan Sargeant': 'Williams',
        'Valtteri Bottas': 'Kick Sauber',
        'Zhou Guanyu': 'Kick Sauber',
        'Kevin Magnussen': 'Haas F1 Team',
        'Nico Hulkenberg': 'Haas F1 Team',
        'Pierre Gasly': 'Alpine',
        'Esteban Ocon': 'Alpine'
    }

    results_df = pd.DataFrame(list(driver_teams.items()), columns=['Driver', 'Team'])

    # Base Q3 time prediction
    avg_q1 = latest_data['Q1_sec'].mean()
    avg_q2 = latest_data['Q2_sec'].mean()
    base_time = model.predict(pd.DataFrame([[avg_q1, avg_q2]], columns=['Q1_sec', 'Q2_sec']))[0]

    predicted_times = []
    for _, row in results_df.iterrows():
        driver = row['Driver']
        team = row['Team']
        driver_factor = driver_factors.get(driver, 1.0)
        team_factor = team_factors.get(team, 1.0)
        random_noise = np.random.uniform(-0.1, 0.1)
        predicted_q3 = base_time * team_factor * driver_factor + random_noise
        predicted_times.append(predicted_q3)

    results_df['Predicted_Q3'] = predicted_times
    results_df = results_df.sort_values('Predicted_Q3').reset_index(drop=True)

    print("\n🇯🇵 Japanese GP 2025 Qualifying Predictions:")
    print("=" * 100)
    print(f"{'Position':<10}{'Driver':<20}{'Team':<25}{'Predicted Q3':<15}")
    print("-" * 100)

    for idx, row in results_df.iterrows():
        print(f"{idx+1:<10}{row['Driver']:<20}{row['Team']:<25}{row['Predicted_Q3']:.3f}s")


In [None]:
print("Initializing enhanced F1 prediction model...")
all_data = fetch_recent_data()

if all_data:

        combined_df = pd.concat(all_data, ignore_index=True)


        valid_data = combined_df.dropna(subset=['Q1_sec', 'Q2_sec', 'Q3_sec'], how='all')

        imputer = SimpleImputer(strategy='median')


        X = valid_data[['Q1_sec', 'Q2_sec']]
        y = valid_data['Q3_sec']


        X_clean = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)
        y_clean = pd.Series(imputer.fit_transform(y.values.reshape(-1, 1)).ravel())

        model = LinearRegression()
        model.fit(X_clean, y_clean)

        predict_japanese_gp(model, valid_data)

        y_pred = model.predict(X_clean)
        mae = mean_absolute_error(y_clean, y_pred)
        r2 = r2_score(y_clean, y_pred)

        print("\nModel Performance Metrics:")
        print(f'Mean Absolute Error: {mae:.2f} seconds')
        print(f'R^2 Score: {r2:.2f}')
else:
        print("Failed to fetch F1 data")

core           INFO 	Loading data for Australian Grand Prix - Qualifying [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Australian Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
INFO:fastf1.api:Fetching session info data...


Initializing enhanced F1 prediction model...
Fetching data for 2025 round 1...


req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
DEBUG:fastf1.ergast:Failed to parse timestamp '' in Ergastresponse.
req            INFO 	No cached data found for session_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
INFO:fastf1.api:Fetching session status data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
INFO:fastf1.fastf1.re

Fetched data for year 2025, round 1
DataFrame columns available: ['DriverNumber', 'BroadcastName', 'Abbreviation', 'DriverId', 'TeamName', 'TeamColor', 'TeamId', 'FirstName', 'LastName', 'FullName', 'HeadshotUrl', 'CountryCode', 'Position', 'ClassifiedPosition', 'GridPosition', 'Q1', 'Q2', 'Q3', 'Time', 'Status', 'Points']

Qualifying Results Structure:
   DriverNumber          Driver         TeamName  Q1_sec  Q2_sec  Q3_sec
4             4    Lando Norris          McLaren  75.912  75.415  75.096
81           81   Oscar Piastri          McLaren  76.062  75.468  75.180
1             1  Max Verstappen  Red Bull Racing  76.018  75.565  75.481
63           63  George Russell         Mercedes  75.971  75.798  75.546
22           22    Yuki Tsunoda     Racing Bulls  76.225  76.009  75.670
Fetching data for 2025 round 2...


req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
INFO:fastf1.api:Fetching session status data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for track_status_data. Loading data...
_api  

Fetched data for year 2025, round 2
DataFrame columns available: ['DriverNumber', 'BroadcastName', 'Abbreviation', 'DriverId', 'TeamName', 'TeamColor', 'TeamId', 'FirstName', 'LastName', 'FullName', 'HeadshotUrl', 'CountryCode', 'Position', 'ClassifiedPosition', 'GridPosition', 'Q1', 'Q2', 'Q3', 'Time', 'Status', 'Points']

Qualifying Results Structure:
   DriverNumber          Driver         TeamName  Q1_sec  Q2_sec  Q3_sec
81           81   Oscar Piastri          McLaren  91.591  91.200  90.641
63           63  George Russell         Mercedes  91.295  91.307  90.723
4             4    Lando Norris          McLaren  90.983  90.787  90.793
1             1  Max Verstappen  Red Bull Racing  91.424  91.142  90.817
44           44  Lewis Hamilton          Ferrari  91.690  91.501  90.927
Fetching data for 2025 round 3...


DEBUG:fastf1.api:Falling back to livetiming mirror (https://livetiming-mirror.fastf1.dev)
DEBUG:fastf1.fastf1.core:Traceback for failure in session info data
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/fastf1/logger.py", line 151, in __wrapped
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/fastf1/core.py", line 1470, in _load_session_info
    self._session_info = api.session_info(self.api_path,
                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/fastf1/req.py", line 479, in _cached_api_request
    data = func(api_path, **func_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/fastf1/_api.py", line 1687, in session_info
    raise SessionNotAvailableError(
fastf1._api.SessionNotAvailableError: No data for this session! If this session only finished recently, please try again in a few mi

Fetched data for year 2025, round 3
DataFrame columns available: ['DriverNumber', 'BroadcastName', 'Abbreviation', 'DriverId', 'TeamName', 'TeamColor', 'TeamId', 'FirstName', 'LastName', 'FullName', 'HeadshotUrl', 'CountryCode', 'Position', 'ClassifiedPosition', 'GridPosition', 'Q1', 'Q2', 'Q3', 'Time', 'Status', 'Points']

Qualifying Results Structure:
Empty DataFrame
Columns: [DriverNumber, Driver, TeamName, Q1_sec, Q2_sec, Q3_sec]
Index: []
Fetching data for 2025 round 4...


DEBUG:fastf1.api:Falling back to livetiming mirror (https://livetiming-mirror.fastf1.dev)
DEBUG:fastf1.fastf1.core:Traceback for failure in session info data
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/fastf1/logger.py", line 151, in __wrapped
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/fastf1/core.py", line 1470, in _load_session_info
    self._session_info = api.session_info(self.api_path,
                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/fastf1/req.py", line 479, in _cached_api_request
    data = func(api_path, **func_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/fastf1/_api.py", line 1687, in session_info
    raise SessionNotAvailableError(
fastf1._api.SessionNotAvailableError: No data for this session! If this session only finished recently, please try again in a few mi

Fetched data for year 2025, round 4
DataFrame columns available: ['DriverNumber', 'BroadcastName', 'Abbreviation', 'DriverId', 'TeamName', 'TeamColor', 'TeamId', 'FirstName', 'LastName', 'FullName', 'HeadshotUrl', 'CountryCode', 'Position', 'ClassifiedPosition', 'GridPosition', 'Q1', 'Q2', 'Q3', 'Time', 'Status', 'Points']

Qualifying Results Structure:
Empty DataFrame
Columns: [DriverNumber, Driver, TeamName, Q1_sec, Q2_sec, Q3_sec]
Index: []
Fetching 2024 Japanese GP data...


req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
INFO:fastf1.api:Fetching session status data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for track_status_data. Loading data...
_api  

Fetched data for year 2024, round 4
DataFrame columns available: ['DriverNumber', 'BroadcastName', 'Abbreviation', 'DriverId', 'TeamName', 'TeamColor', 'TeamId', 'FirstName', 'LastName', 'FullName', 'HeadshotUrl', 'CountryCode', 'Position', 'ClassifiedPosition', 'GridPosition', 'Q1', 'Q2', 'Q3', 'Time', 'Status', 'Points']

Qualifying Results Structure:
   DriverNumber           Driver         TeamName  Q1_sec  Q2_sec  Q3_sec
1             1   Max Verstappen  Red Bull Racing  88.866  88.740  88.197
11           11     Sergio Perez  Red Bull Racing  89.303  88.752  88.263
4             4     Lando Norris          McLaren  89.536  88.940  88.489
55           55     Carlos Sainz          Ferrari  89.513  89.099  88.682
14           14  Fernando Alonso     Aston Martin  89.254  89.082  88.686

🇯🇵 Japanese GP 2025 Qualifying Predictions:
Position  Driver              Team                     Predicted Q3   
------------------------------------------------------------------------------------