In [176]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
import seaborn as sns
import mplfinance as mpf
import yfinance as yf
from datetime import datetime
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import make_scorer, mean_squared_error
from scipy.stats import uniform
from sklearn.model_selection import RandomizedSearchCV
import joblib
from sklearn.impute import KNNImputer
from sklearn.metrics import mean_squared_error, r2_score

In [177]:
df = pd.read_csv('formula/qualifying.csv', na_values='\\N')
df

Unnamed: 0,qualifyId,raceId,driverId,constructorId,number,position,q1,q2,q3
0,1,18,1,1,22,1,1:26.572,1:25.187,1:26.714
1,2,18,9,2,4,2,1:26.103,1:25.315,1:26.869
2,3,18,5,1,23,3,1:25.664,1:25.452,1:27.079
3,4,18,13,6,2,4,1:25.994,1:25.691,1:27.178
4,5,18,2,2,3,5,1:25.960,1:25.518,1:27.236
...,...,...,...,...,...,...,...,...,...
10249,10307,1132,822,15,77,16,1:32.431,,
10250,10308,1132,825,210,20,17,1:32.905,,
10251,10309,1132,839,214,31,18,1:34.557,,
10252,10310,1132,815,9,11,19,1:38.348,,


In [178]:
df.isnull().sum()

qualifyId           0
raceId              0
driverId            0
constructorId       0
number              0
position            0
q1                154
q2               4585
q3               6740
dtype: int64

In [179]:
def convertir_a_segundos(tiempo):
    try:
        minutos, resto = tiempo.split(':')
        segundos, milisegundos = resto.split('.')
        total_segundos = int(minutos) * 60 + int(segundos) + int(milisegundos) / 1000
        return total_segundos
    except:
        return np.nan

for col in ['q1', 'q2', 'q3']:
    df[col] = df[col].apply(convertir_a_segundos)

print(df[['q1', 'q2', 'q3']].head())

       q1      q2      q3
0  86.572  85.187  86.714
1  86.103  85.315  86.869
2  85.664  85.452  87.079
3  85.994  85.691  87.178
4  85.960  85.518  87.236


In [180]:
imputer = KNNImputer(n_neighbors=7, weights='uniform')
df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

In [181]:
df.isnull().sum()

qualifyId        0
raceId           0
driverId         0
constructorId    0
number           0
position         0
q1               0
q2               0
q3               0
dtype: int64

In [182]:
df.dropna(inplace=True)
df

Unnamed: 0,qualifyId,raceId,driverId,constructorId,number,position,q1,q2,q3
0,1.0,18.0,1.0,1.0,22.0,1.0,86.572,85.187000,86.714000
1,2.0,18.0,9.0,2.0,4.0,2.0,86.103,85.315000,86.869000
2,3.0,18.0,5.0,1.0,23.0,3.0,85.664,85.452000,87.079000
3,4.0,18.0,13.0,6.0,2.0,4.0,85.994,85.691000,87.178000
4,5.0,18.0,2.0,2.0,3.0,5.0,85.960,85.518000,87.236000
...,...,...,...,...,...,...,...,...,...
10249,10307.0,1132.0,822.0,15.0,77.0,16.0,92.431,76.385143,75.174714
10250,10308.0,1132.0,825.0,210.0,20.0,17.0,92.905,74.762000,72.311429
10251,10309.0,1132.0,839.0,214.0,31.0,18.0,94.557,74.779571,72.311429
10252,10310.0,1132.0,815.0,9.0,11.0,19.0,98.348,83.938714,77.134571


In [183]:
df.isnull().sum()

qualifyId        0
raceId           0
driverId         0
constructorId    0
number           0
position         0
q1               0
q2               0
q3               0
dtype: int64

In [184]:
df = df.drop(columns='qualifyId')
df

Unnamed: 0,raceId,driverId,constructorId,number,position,q1,q2,q3
0,18.0,1.0,1.0,22.0,1.0,86.572,85.187000,86.714000
1,18.0,9.0,2.0,4.0,2.0,86.103,85.315000,86.869000
2,18.0,5.0,1.0,23.0,3.0,85.664,85.452000,87.079000
3,18.0,13.0,6.0,2.0,4.0,85.994,85.691000,87.178000
4,18.0,2.0,2.0,3.0,5.0,85.960,85.518000,87.236000
...,...,...,...,...,...,...,...,...
10249,1132.0,822.0,15.0,77.0,16.0,92.431,76.385143,75.174714
10250,1132.0,825.0,210.0,20.0,17.0,92.905,74.762000,72.311429
10251,1132.0,839.0,214.0,31.0,18.0,94.557,74.779571,72.311429
10252,1132.0,815.0,9.0,11.0,19.0,98.348,83.938714,77.134571


In [185]:
df = df.drop(columns='raceId')
df

Unnamed: 0,driverId,constructorId,number,position,q1,q2,q3
0,1.0,1.0,22.0,1.0,86.572,85.187000,86.714000
1,9.0,2.0,4.0,2.0,86.103,85.315000,86.869000
2,5.0,1.0,23.0,3.0,85.664,85.452000,87.079000
3,13.0,6.0,2.0,4.0,85.994,85.691000,87.178000
4,2.0,2.0,3.0,5.0,85.960,85.518000,87.236000
...,...,...,...,...,...,...,...
10249,822.0,15.0,77.0,16.0,92.431,76.385143,75.174714
10250,825.0,210.0,20.0,17.0,92.905,74.762000,72.311429
10251,839.0,214.0,31.0,18.0,94.557,74.779571,72.311429
10252,815.0,9.0,11.0,19.0,98.348,83.938714,77.134571


In [186]:
df = df.drop(columns='number')
df

Unnamed: 0,driverId,constructorId,position,q1,q2,q3
0,1.0,1.0,1.0,86.572,85.187000,86.714000
1,9.0,2.0,2.0,86.103,85.315000,86.869000
2,5.0,1.0,3.0,85.664,85.452000,87.079000
3,13.0,6.0,4.0,85.994,85.691000,87.178000
4,2.0,2.0,5.0,85.960,85.518000,87.236000
...,...,...,...,...,...,...
10249,822.0,15.0,16.0,92.431,76.385143,75.174714
10250,825.0,210.0,17.0,92.905,74.762000,72.311429
10251,839.0,214.0,18.0,94.557,74.779571,72.311429
10252,815.0,9.0,19.0,98.348,83.938714,77.134571


In [187]:
valores_unicos_contados = df.nunique()
valores_unicos_contados

driverId          170
constructorId      47
position           28
q1               9106
q2               7144
q3               6510
dtype: int64

In [188]:
X = df.drop(columns=['position']) 
y = df['position']

In [189]:
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1)).ravel()

In [190]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

In [191]:
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
    'epsilon': [0.1, 0.2, 0.5]
}

In [192]:
svr = SVR(kernel='rbf')

In [193]:
grid_search = GridSearchCV(
    estimator=svr,
    param_grid=param_grid,
    scoring=make_scorer(mean_squared_error, greater_is_better=False),
    cv=5,  
    verbose=1,
    n_jobs=-1  
)

In [194]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 48 candidates, totalling 240 fits


In [195]:
best_params = grid_search.best_params_

In [196]:
best_model = grid_search.best_estimator_

In [197]:
joblib.dump({'model': best_model, 'scaler_X': scaler_X, 'scaler_y': scaler_y}, 'svr_model.pkl')

['svr_model.pkl']

In [198]:
y_pred_scaled = best_model.predict(X_test)

In [199]:
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1))
y_test_original = scaler_y.inverse_transform(y_test.reshape(-1, 1))

Arrays unidmensional

In [200]:
mse = mean_squared_error(y_test_original, y_pred)
r2 = r2_score(y_test_original, y_pred)
print(f"Error cuadrático medio (MSE): {mse}")
print(f"Coeficiente de determinación (R^2): {r2}")

Error cuadrático medio (MSE): 21.813780264509234
Coeficiente de determinación (R^2): 0.46111656536754353


Arrays bidimensional

In [201]:
mse = mean_squared_error(y_test_original.ravel(), y_pred.ravel())
r2 = r2_score(y_test_original.ravel(), y_pred.ravel())
print(f"Error cuadrático medio (MSE): {mse}")
print(f"Coeficiente de determinación (R^2): {r2}")

Error cuadrático medio (MSE): 21.813780264509234
Coeficiente de determinación (R^2): 0.46111656536754353


In [202]:
data = joblib.load('svr_model.pkl')

#### Prediccion

In [203]:
loaded_model = data['model']
scaler_X = data['scaler_X']
scaler_y = data['scaler_y']

In [208]:
nuevos_datos = [
    [1, 2, '1:23.456', '1:22.789', '1:19.987'],
    [5, 1, '1:26.456', '1:22.789', '1:20.987']
]

nuevos_datos_convertidos = [
    [dato[0], dato[1]] + [convertir_a_segundos(t) for t in dato[2:]] for dato in nuevos_datos
]


In [209]:
nuevos_datos_scaled = scaler_X.transform(nuevos_datos_convertidos)



In [210]:
predicciones_scaled = loaded_model.predict(nuevos_datos_scaled)

predicciones = scaler_y.inverse_transform(predicciones_scaled.reshape(-1, 1))

print("Predicciones:")
for i, pred in enumerate(predicciones):
    print(f"{nuevos_datos[i]} Posición predicha = {pred[0]:.2f}")


Predicciones:
[1, 2, '1:23.456', '1:22.789', '1:19.987'] Posición predicha = 6.68
[5, 1, '1:26.456', '1:22.789', '1:20.987'] Posición predicha = 8.25
