In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score

In [3]:
from preprocessing import dividir_dataset
from preprocessing import preparar_dataset
from preprocessing import normalizar_datos
from preprocessing import aplicar_one_hot_encoding

In [4]:
from funcionesAuxiliares import graficar_auc_roc
from funcionesAuxiliares import traer_df

In [5]:
df = traer_df()
df = preparar_dataset(df)
df = aplicar_one_hot_encoding(df)

In [6]:
X, y = dividir_dataset(df)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
X_train, X_test = normalizar_datos(X_train, X_test)

In [7]:
def definir_mejores_hiperparametros(X, y):    
    metrics = []
    y_array=np.array(y)
    for kernel in ['poly', 'rbf', 'linear']:
        for C in [0.1, 1, 10, 100]: 
            kf = StratifiedKFold(n_splits=5)
            metricas_de_la_combinacion = []
            for fold_index, (train_index, test_index) in enumerate(kf.split(X, y_array)):
                print(C)
                train_fold_actual = X.iloc[train_index].copy()
                test_fold_actual = X.iloc[test_index].copy()
                train_fold_actual, test_fold_actual = normalizar_datos(train_fold_actual, test_fold_actual)
                svm = SVC(kernel = kernel, C = C, probability = True)
                svm.fit(train_fold_actual, y.iloc[train_index])
                y_pred = svm.predict_proba(test_fold_actual)[:, 1]
                metricas_de_la_combinacion.append(roc_auc_score(y.iloc[test_index], y_pred))
                
            print(f"Promedio de la combinación: {np.mean(metricas_de_la_combinacion)}")
            metrics.append((C, kernel, np.mean(metricas_de_la_combinacion)))
                
    df_metrics = pd.DataFrame(metrics, columns=['C','kernel','rocauc'])
    mejor_combinacion = df_metrics[df_metrics["rocauc"] ==  max(df_metrics.rocauc)]
    return mejor_combinacion

In [None]:
mejor_combinacion = definir_mejores_hiperparametros(X, y)
print(mejor_combinacion)

In [None]:
parametros_svm = mejor_combinacion.loc[:].values[0]
mejor_C = parametros_svm[0]
mejor_kernel = parametros_svm[1]

In [None]:
def definir_mejor_gamma(X, y, mejor_kernel, mejor_C):
    metrics = []
    y_array=np.array(y)
    for gamma in range(1, 100, 10):
        kf = StratifiedKFold(n_splits=5)
        metricas_de_la_combinacion = []
        for fold_index, (train_index, test_index) in enumerate(kf.split(X, y_array)):
            print(gamma)
            train_fold_actual = X.iloc[train_index].copy()
            test_fold_actual = X.iloc[test_index].copy()
            train_fold_actual, test_fold_actual = normalizar_datos(train_fold_actual, test_fold_actual)
            svm = SVC(kernel = mejor_kernel, C = mejor_C, gamma = gamma, probability = True)
            svm.fit(train_fold_actual, y.iloc[train_index])
            y_pred = svm.predict_proba(test_fold_actual)[:, 1]
            metricas_de_la_combinacion.append(roc_auc_score(y.iloc[test_index], y_pred))

        print(f"Promedio de la combinación: {np.mean(metricas_de_la_combinacion)}")
        metrics.append((gamma, np.mean(metricas_de_la_combinacion)))
        

    df_metrics = pd.DataFrame(metrics, columns=['gamma', 'rocauc'])
    mejor_combinacion = df_metrics[df_metrics["rocauc"] ==  max(df_metrics.rocauc)]   
    return mejor_combinacion

In [None]:
mejor_combinacion = definir_mejor_gamma(X, y, mejor_kernel, mejor_C)
print(mejor_combinacion)

In [None]:
mejor_gamma = mejor_combinacion.loc[:].values[0][0]
print(mejor_gamma)
svm = SVC(kernel = mejor_kernel, C = mejor_C, gamma = mejor_gamma, probability=True)
svm.fit(X_train, y_train)
y_pred = svm.predict_proba(X_test)[:, 1]
print(classification_report(y_test, y_pred, target_names=['No Tiene Alto Valor Adquisitivo', 'Tiene Alto Valor Adquisitivo']))
graficar_auc_roc(y_test,y_pred)