# IMPLEMENTACIÓN USANDO VSR

## Librerías

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score, KFold
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.svm import SVR

## Carga y limpieza de datos

In [5]:
# 1. Cargar datos
df = pd.read_csv('student_habits_performance.csv')

# 2. Eliminar columna innecesaria
df = df.drop(columns=['student_id'])

# 3. Separar variables
X = df.drop(columns=['exam_score'])  
y = df['exam_score']

# 4. Detectar columnas categóricas y numéricas
categorical_cols = X.select_dtypes(include='object').columns
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns

# 5. Codificar variables categóricas
ohe = OneHotEncoder(drop='first', sparse_output=False)
X_encoded = pd.DataFrame(
    ohe.fit_transform(X[categorical_cols]),
    columns=ohe.get_feature_names_out(categorical_cols)
)

# 6. Escalar variables numéricas
scaler = StandardScaler()
X_scaled = pd.DataFrame(
    scaler.fit_transform(X[numerical_cols]),
    columns=numerical_cols
)

# 7. Unir datos procesados
X_prepared = pd.concat([X_scaled.reset_index(drop=True), X_encoded.reset_index(drop=True)], axis=1)

## Modelo

In [7]:
# 8. Crear modelo
model = SVR(kernel='rbf')

## K-Fold

In [9]:
# 9. Validación cruzada
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(model, X_prepared, y, cv=kfold, scoring='neg_mean_squared_error')

## Resultados

In [11]:
# 10. Mostrar resultados
rmse_scores = np.sqrt(-scores)
print(f'RMSE promedio con 5-Fold CV: {rmse_scores.mean():.4f}')
print(f'Todas las puntuaciones RMSE: {rmse_scores}')


RMSE promedio con 5-Fold CV: 7.9362
Todas las puntuaciones RMSE: [7.36548486 7.99804506 8.15736985 7.84565909 8.31459633]
