# DATA CLEANING

In [3]:
import pandas as pd

# Cargar los archivos
stints = pd.read_csv("stints.csv")
weather = pd.read_csv("weather.csv")
sessions = pd.read_csv("sessions.csv")

# Convertir fechas y asegurar que están en UTC
weather['date'] = pd.to_datetime(weather['date'], utc=True)
sessions['date_start'] = pd.to_datetime(sessions['date_start'], utc=True)

# Juntar información de circuito con stints
stints = stints.merge(sessions[['session_key', 'date_start', 'location', 'circuit_short_name', 'year']], on='session_key', how='left')

# Seleccionar solo stints válidos
stints = stints.dropna(subset=['lap_start', 'lap_end'])

# Calcular el tiempo aproximado de inicio del stint
stints['approx_start_time'] = stints['date_start'] + pd.to_timedelta(stints['lap_start'] * 90, unit='s')


# Función para encontrar el clima más cercano
def find_closest_weather(start_time, session_key):
    subset = weather[weather['session_key'] == session_key]
    if subset.empty:
        return pd.Series([None]*5)
    closest = subset.iloc[(subset['date'] - start_time).abs().argsort()[:1]]
    return closest[['track_temperature', 'air_temperature', 'humidity', 'rainfall', 'wind_speed']].iloc[0]

# Aplicar la función fila por fila
weather_features = stints.apply(lambda row: find_closest_weather(row['approx_start_time'], row['session_key']), axis=1)

# Combinar datos
dataset = pd.concat([stints.reset_index(drop=True), weather_features], axis=1)

# Dataset final
final_dataset = dataset[[
    'compound',
    'track_temperature',
    'air_temperature',
    'humidity',
    'rainfall',
    'wind_speed',
    'circuit_short_name',
    'year'
]].dropna()

# Guardar resultado
final_dataset.to_csv("stint_weather_dataset.csv", index=False)
print("✅ Dataset listo y guardado como 'stint_weather_dataset.csv'")



✅ Dataset listo y guardado como 'stint_weather_dataset.csv'


# PREDICTION

In [4]:
# Leer el dataset
df = pd.read_csv("stint_weather_dataset.csv")
df.head()

Unnamed: 0,compound,track_temperature,air_temperature,humidity,rainfall,wind_speed,circuit_short_name,year
0,MEDIUM,32.0,27.0,49.0,0.0,1.8,Yas Marina Circuit,2024.0
1,MEDIUM,32.0,27.0,49.0,0.0,1.8,Yas Marina Circuit,2024.0
2,MEDIUM,32.0,27.0,49.0,0.0,1.8,Yas Marina Circuit,2024.0
3,MEDIUM,32.0,27.0,49.0,0.0,1.8,Yas Marina Circuit,2024.0
4,MEDIUM,32.0,27.0,49.0,0.0,1.8,Yas Marina Circuit,2024.0


In [5]:
##### Preparar los datos para el modelo
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Codificar columnas categóricas
le_compound = LabelEncoder()
le_circuit = LabelEncoder()

df['compound_encoded'] = le_compound.fit_transform(df['compound'])
df['circuit_encoded'] = le_circuit.fit_transform(df['circuit_short_name'])

# Definir variables de entrada y salida
X = df[['track_temperature', 'air_temperature', 'humidity', 'rainfall', 'wind_speed', 'circuit_encoded', 'year']]
y = df['compound_encoded']

# Dividir en conjunto de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Entrenar el modelo
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Crear y entrenar modelo
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predicciones
y_pred = model.predict(X_test)

# Evaluación
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=le_compound.classes_))


In [None]:
import matplotlib.pyplot as plt

importances = model.feature_importances_
features = X.columns

plt.figure(figsize=(8,5))
plt.barh(features, importances)
plt.title("Importancia de variables en la predicción de neumáticos")
plt.xlabel("Importancia")
plt.tight_layout()
plt.show()


In [None]:
ejemplo = pd.DataFrame([{
    'track_temperature': 36.5,
    'air_temperature': 27.0,
    'humidity': 42.0,
    'rainfall': 0.0,
    'wind_speed': 1.5,
    'circuit_encoded': le_circuit.transform(['Yas Marina Circuit'])[0],
    'year': 2024
}])

# Predicción
pred_encoded = model.predict(ejemplo)[0]
pred_clase = le_compound.inverse_transform([pred_encoded])[0]
print("Neumático recomendado:", pred_clase)


In [None]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy general: {accuracy:.2%}")