In [30]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, QuantileTransformer, OrdinalEncoder, FunctionTransformer
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectKBest, f_classif, mutual_info_classif
from sklearn.decomposition import PCA
from category_encoders import TargetEncoder
from pandas.api.types import CategoricalDtype

In [31]:
# Dataset de carreras del 2024
raw_qualys_24 = pd.read_csv('../datasets/f1_2024_Q.csv')

In [32]:
raw_qualys_24.head()

Unnamed: 0,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,Sector3Time,...,EventName,SessionType,EventDate,Time_weather,AirTemp,TrackTemp,Humidity,AvgSpeed,MaxThrottle,AvgBrake
0,SAI,55,,1.0,1.0,0 days 00:13:35.553000,,,47.556,25.067,...,Bahrain Grand Prix,Q,2024-03-02,795.661,18.1,22.0,48.0,,,
1,LEC,16,,1.0,1.0,0 days 00:13:41.044000,,,45.475,27.582,...,Bahrain Grand Prix,Q,2024-03-02,795.661,18.1,22.0,48.0,,,
2,OCO,31,,1.0,1.0,0 days 00:13:46.438000,,,48.58,25.377,...,Bahrain Grand Prix,Q,2024-03-02,855.673,18.1,21.9,48.0,,,
3,GAS,10,,1.0,1.0,0 days 00:13:48.410000,,,46.947,31.307,...,Bahrain Grand Prix,Q,2024-03-02,855.673,18.1,21.9,48.0,,,
4,SAI,55,91.208,2.0,1.0,,,29.76,38.878,22.57,...,Bahrain Grand Prix,Q,2024-03-02,915.667,18.1,21.8,48.0,,,


In [33]:
# Armo un dataset consolidado con los resultados de cada carrera para cada piloto

result = raw_qualys_24.groupby(['Driver', 'EventName'], observed=True).agg(
    Event_date=('EventDate','first' ),
    Team=('Team','first'),
    Position_start=('Position', 'first'),
    Position_end=('Position', 'last'),
    QualyAvg_LapTime=('LapTime', 'mean'),
    QualyFastest_LapTime=('LapTime', 'min'),
    QualyAvgSpeedI1=('SpeedI1', 'mean'),
    QualyAvgSpeedI2=('SpeedI2', 'mean'),
    Tyre_Compound_start=('Compound', 'first'),
    FreshTyre_start=('FreshTyre', 'first'),
    Avg_TrackTemp=('TrackTemp', 'mean'),
    Avg_AirTemp=('AirTemp', 'mean'),
    Avg_Humidity=('Humidity', 'mean'),

).reset_index()

position_start_order = CategoricalDtype(categories=[i for i in range(20, 0, -1)], ordered=True)
result['Position_start'] = result['Position_start'].astype(position_start_order)

# Agrego el ganador
result['Winner']= np.where(result['Position_end']==1,1,0)
result['Winner'] = result['Winner'].astype('category')
result = result.drop(['Position_end'], axis=1)

result.head()

Unnamed: 0,Driver,EventName,Event_date,Team,Position_start,QualyAvg_LapTime,QualyFastest_LapTime,QualyAvgSpeedI1,QualyAvgSpeedI2,Tyre_Compound_start,FreshTyre_start,Avg_TrackTemp,Avg_AirTemp,Avg_Humidity,Winner
0,ALB,Abu Dhabi Grand Prix,2024-12-08,Williams,,95.8845,83.821,265.666667,281.5,SOFT,True,30.083333,25.916667,64.5,0
1,ALB,Australian Grand Prix,2024-03-24,Williams,,91.0515,77.13,259.0,288.466667,SOFT,True,34.793333,18.78,53.266667,0
2,ALB,Austrian Grand Prix,2024-06-30,Williams,,81.284,65.736,294.888889,205.333333,SOFT,True,46.766667,31.4,38.111111,0
3,ALB,Azerbaijan Grand Prix,2024-09-15,Williams,,117.25225,102.84,173.736842,204.894737,SOFT,True,36.836842,27.042105,36.421053,0
4,ALB,Bahrain Grand Prix,2024-03-02,Williams,,104.6405,90.221,201.666667,238.416667,SOFT,True,21.516667,18.083333,46.666667,0


In [34]:
result_qualy = result[['EventName','Driver','QualyAvg_LapTime', 'QualyFastest_LapTime', 'QualyAvgSpeedI1', 'QualyAvgSpeedI2']].copy()

In [35]:
# exportar CSV
result_qualy.to_csv('../datasets/result_qualy.csv', index=False)