In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
from joblib import dump

# Cargar los datos preprocesados
X = pd.read_csv('X_preprocessed.csv')
y = pd.read_csv('y_encoded.csv')

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Modelo SVC 
svc = SVC(probability=True)
svc_params = {'kernel': ['linear', 'rbf'], 'C': [1, 10]}
svc_grid = GridSearchCV(svc, svc_params, cv=5)
svc_grid.fit(X_train, y_train.values.ravel())
print(f"Mejores parámetros para SVC: {svc_grid.best_params_}")

# Evaluación del modelo
y_pred_svc = svc_grid.predict(X_test)
print("Informe de clasificación para SVC:")
print(classification_report(y_test, y_pred_svc))
print("Matriz de confusión para SVC:")
print(confusion_matrix(y_test, y_pred_svc))

# Curva de aprendizaje
def plot_learning_curve(estimator, X, y, title):
    from sklearn.model_selection import learning_curve
    train_sizes, train_scores, test_scores = learning_curve(estimator, X, y, cv=5)
    train_scores_mean = np.mean(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)

    plt.plot(train_sizes, train_scores_mean, 'o-', label='Puntaje de entrenamiento')
    plt.plot(train_sizes, test_scores_mean, 'o-', label='Puntaje de validación cruzada')
    plt.title(title)
    plt.xlabel('Ejemplos de entrenamiento')
    plt.ylabel('Puntaje')
    plt.legend(loc='best')
    plt.grid()
    plt.show()

plot_learning_curve(svc_grid.best_estimator_, X_train, y_train.values.ravel(), 'Curva de aprendizaje para SVC')

# Guardar el modelo SVC
dump(svc_grid, 'svc_model.joblib')
