En este Jupyter Notebook construiremos un modelo que nos permita predecir quién ganará una carrera y en qué posición quedará un piloto

In [6]:
import pandas as pd
import numpy as np
import requests

In [7]:
season = 2023

url = f'http://api.jolpi.ca/ergast/f1/{season}/driverstandings/'

response = requests.get(url)
print(response.status_code)

content = response.json()

200


In [11]:
# content['MRData']['StandingsTable']['StandingsLists'][0]['DriverStandings']

---

Vamos a obtener los datos para crear el modelo predictivo

In [1]:
import fastf1

Cargamos los resultados de una sesión (carrera). No necesitamos telemetría ni tiempo atmosférico

In [33]:
session = fastf1.get_session(2023, 13, 'R')
session.load(telemetry=False, weather=True)

core           INFO 	Loading data for Dutch Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '14', '10', '11', '55', '44', '4', '23', '81', '31', '18', '27', '40', '77', '22', '20', '63', '24', '16', '2']


In [77]:
def get_weather_condition(compounds):
    """
    Determine the race condition based on the proportion of tire compounds.

    Parameters
    -----------
    - compounds (pd.Series): A pandas Series where keys represent tire types (e.g., 'WET', 'INTERMEDIATE', etc.), 
    and values represent the proportions of each tire type.

    Returns
    --------
    - (str): The classified race condition, which can be:
    - "Wet" if more than 90% of tires are wet or intermediate.
    - "Mixed" if the proportion of wet or intermediate tires is between 10% and 90%.
    - "Dry" if the proportion of wet or intermediate tires is 10% or less.
    """

    # Thresholds to determine race conditions
    wet_threshold = 0.9
    mixed_threshold = 0.1

    # Calculate the total sum of the Series
    total = compounds.sum()

    # Ensure the values are normalized
    if total != 1:
        compounds = compounds / total

    # Proportion of wet tires
    wet = compounds.get('WET', 0) + compounds.get('INTERMEDIATE', 0)

    # Classify conditions based on the thresholds
    if wet > wet_threshold:
        return "Wet"
    
    elif wet > mixed_threshold:
        return "Mixed"
    
    else:
        return "Dry"


In [97]:
def get_race_data(session, load=False):

    if load:
        session.load(telemetry=False, weather=True)


    results = session.results

    results = results.loc[:, ['DriverId', 'TeamId', 'Position', 'ClassifiedPosition', 'GridPosition', 'Time', 'Status', 'Points']]

    # Ponemos el ganador a 0
    results.iloc[0, results.columns.get_loc('Time')] = pd.Timedelta(0)

    results['Time'] = results['Time'].dt.total_seconds()


    compounds = session.laps['Compound'].value_counts(normalize=True)

    results['Weather'] = get_weather_condition(compounds)

    # Encontrar el tiempo máximo entre los que terminaron
    max_finished_time = results.loc[results['Status'] == 'Finished', 'Time'].max()

    # Definir un margen adicional (10 segundos, por ejemplo)
    margin = 10

    # Asignar tiempo a los que no tienen tiempo registrado
    results.loc[results['Status'] == '+1 Lap', 'Time'] = max_finished_time + margin
    results.loc[results['Status'] == 'Retired', 'Time'] = max_finished_time + 10 * margin  # Mayor margen para Retired

    return results


Cogemos los resultados. Tenemos las siguientes columnas

['DriverId', 'TeamId', 'Position', 'ClassifiedPosition', 'GridPosition', 'Time', 'Status', 'Points']

Adicionalmente añadimos:

['Season', 'Round', 'circuit_id']

In [98]:
get_race_data(session)

Unnamed: 0,DriverId,TeamId,Position,ClassifiedPosition,GridPosition,Time,Status,Points,Weather
1,max_verstappen,red_bull,1.0,1,1.0,0.0,Finished,25.0,Mixed
14,alonso,aston_martin,2.0,2,5.0,3.744,Finished,19.0,Mixed
10,gasly,alpine,3.0,3,12.0,7.058,Finished,15.0,Mixed
11,perez,red_bull,4.0,4,7.0,10.068,Finished,12.0,Mixed
55,sainz,ferrari,5.0,5,6.0,12.541,Finished,10.0,Mixed
44,hamilton,mercedes,6.0,6,13.0,13.209,Finished,8.0,Mixed
4,norris,mclaren,7.0,7,2.0,13.232,Finished,6.0,Mixed
23,albon,williams,8.0,8,4.0,15.155,Finished,4.0,Mixed
81,piastri,mclaren,9.0,9,8.0,16.58,Finished,2.0,Mixed
31,ocon,alpine,10.0,10,16.0,18.346,Finished,1.0,Mixed
