In [None]:
import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.metrics import mean_squared_error, r2_score,confusion_matrix
from scipy.stats import expon, reciprocal

# Datos preprocesados
data = np.load('datos_preprocesados.npz')
X = data['X']

# Lee el DF y el target, 'sy_pm'
df = pd.read_csv('06_dataset.csv', skiprows=322)

df['sy_pm'] = df['sy_pm'].fillna(df['sy_pm'].mode()[0])
y = df['sy_pm'].values

# Hace el split de train y test
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Random search para regresión
param_dist = {'C': reciprocal(1e-3, 1e3), 'gamma': expon(scale=1e-3), 'kernel': ['rbf', 'poly']}
rand = RandomizedSearchCV(SVR(), param_dist, n_iter=20, cv=5, n_jobs=-1, random_state=42)
rand.fit(X_train, y_train)
print("Best params:", rand.best_params_)

# Evaluacion
y_pred = rand.predict(X_test)
print("\nMSE:", mean_squared_error(y_test, y_pred))
print("R2:", r2_score(y_test, y_pred))
np.save('y_pred_model2.npy', y_pred)

# Binning para matriz de confusión
n_bins = 4
y_test_bin = pd.qcut(y_test, q=n_bins, labels=False, duplicates='drop')
y_pred_bin = pd.qcut(y_pred, q=n_bins, labels=False, duplicates='drop')

# Matriz de confusión
cm = confusion_matrix(y_test_bin, y_pred_bin)
print("\nMatriz de confusion (4 bins):\n", cm)

Best params: {'C': np.float64(793.2047656808546), 'gamma': np.float64(0.0006287891005408561), 'kernel': 'rbf'}
MSE: 31786.432449629392
R2: 0.8425277687370254
Confusion Matrix (with 4 bins):
 [[302  48   9  10]
 [ 63 258  40   7]
 [  4  62 289  13]
 [  0   0  30 339]]
