## SVM

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from itertools import combinations
from sklearn.preprocessing import OrdinalEncoder
from sklearn.impute import SimpleImputer
import time
import warnings

# Cargar datos
df_datos = pd.read_csv("datos/credit_risk_dataset.csv")
print('Técnica de las  Máquinas de Vectores Soporte')
# Definir columnas importantes
columnas_importantes = ['person_age', 'person_income', 'loan_status', 'cb_person_cred_hist_length', 'loan_grade']

# Preprocesamiento de datos
imputer = SimpleImputer(strategy='mean')
df_datos[columnas_importantes] = imputer.fit_transform(df_datos[columnas_importantes])

ordinal_encoder = OrdinalEncoder()
df_datos['loan_grade'] = ordinal_encoder.fit_transform(df_datos[['loan_grade']])

# Lista de todas las columnas
todas_columnas = ['person_age', 'person_income', 'cb_person_cred_hist_length', 'loan_grade']

# Función para entrenar y evaluar modelo
def train_evaluate_model(X_train, X_test, y_train, y_test, c_value, kernel='linear'):
    start_time_training = time.time()
    clf = SVC(kernel=kernel, C=c_value)
    clf.fit(X_train, y_train)
    end_time_training = time.time()
    
    start_time_testing = time.time()
    y_pred = clf.predict(X_test)
    end_time_testing = time.time()
    
    mc = confusion_matrix(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, zero_division=0)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    training_time = end_time_training - start_time_training
    testing_time = end_time_testing - start_time_testing
    
    return mc, accuracy, precision, recall, f1, training_time, testing_time

# Configuraciones
configurations = [
   # {'kernel': 'linear', 'c_value': 50},
    #{'kernel': 'linear', 'c_value': 200},
    {'kernel': 'rbf', 'c_value': 50},
    {'kernel': 'rbf', 'c_value': 200}
]

# DataFrame para almacenar resultados
resultados = []

# Conjuntos de entrenamiento y prueba únicos
X_train, X_test, y_train, y_test = train_test_split(df_datos[todas_columnas], df_datos['loan_status'], test_size=0.2, random_state=100)

# Entrenar y evaluar para cada configuración y combinación de columnas
for config in configurations:
    for r in range(1, len(todas_columnas) + 1):
        for cols in combinations(todas_columnas, r):
            mc, accuracy, precision, recall, f1, training_time, testing_time = train_evaluate_model(
                X_train[list(cols)], X_test[list(cols)], y_train, y_test, config['c_value'], kernel=config['kernel']
            )
            resultados.append([cols, mc, accuracy, precision, recall, f1, training_time, testing_time])

# Crear DataFrame final
df_resultados = pd.DataFrame(resultados, columns=['Columnas', 'Matriz de Confusión', 'Exactitud', 'Precisión', 'Sensibilidad', 'F1 Score', 'Tiempo de Entrenamiento', 'Tiempo de Prueba'])

# Ordenar DataFrame por Exactitud
df_resultados = df_resultados.sort_values(by='Exactitud', ascending=False)

# Mostrar DataFrame final
print(df_resultados)


Técnica de las  Máquinas de Vectores Soporte
                                             Columnas  \
3                                       (loan_grade,)   
18                                      (loan_grade,)   
24           (cb_person_cred_hist_length, loan_grade)   
13  (person_income, cb_person_cred_hist_length, lo...   
10  (person_age, person_income, cb_person_cred_his...   
4                         (person_age, person_income)   
7         (person_income, cb_person_cred_hist_length)   
8                         (person_income, loan_grade)   
11            (person_age, person_income, loan_grade)   
1                                    (person_income,)   
29  (person_age, person_income, cb_person_cred_his...   
16                                   (person_income,)   
14  (person_age, person_income, cb_person_cred_his...   
26            (person_age, person_income, loan_grade)   
25  (person_age, person_income, cb_person_cred_his...   
28  (person_income, cb_person_cred_hist_len

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from itertools import combinations
from sklearn.preprocessing import OrdinalEncoder
from sklearn.impute import SimpleImputer
import time
import warnings

# Cargar datos
print("Cargando datos...")
df_datos = pd.read_csv("datos/credit_risk_dataset.csv")
print("Datos cargados correctamente.")
print('SVM..')
# Definir columnas importantes
columnas_importantes = ['person_age', 'person_income', 'loan_status', 'cb_person_cred_hist_length', 'loan_grade']

# Preprocesamiento de datos
print("Preprocesamiento de datos...")
imputer = SimpleImputer(strategy='mean')
df_datos[columnas_importantes] = imputer.fit_transform(df_datos[columnas_importantes])

ordinal_encoder = OrdinalEncoder()
df_datos['loan_grade'] = ordinal_encoder.fit_transform(df_datos[['loan_grade']]).reshape(-1, 1)

# Lista de todas las columnas
todas_columnas = ['person_age', 'person_income', 'cb_person_cred_hist_length', 'loan_grade']

# Función para entrenar y evaluar modelo
def train_evaluate_model(X_train, X_test, y_train, y_test, c_value, kernel='linear'):
    start_time_training = time.time()
    clf = SVC(kernel=kernel, C=c_value)
    clf.fit(X_train, y_train)
    end_time_training = time.time()
    
    start_time_testing = time.time()
    y_pred = clf.predict(X_test)
    end_time_testing = time.time()
    
    mc = confusion_matrix(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, zero_division=0)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    training_time = end_time_training - start_time_training
    testing_time = end_time_testing - start_time_testing
    
    return mc, accuracy, precision, recall, f1, training_time, testing_time

# Configuraciones
configurations = [
    {'kernel': 'linear', 'c_value': 50},
    {'kernel': 'linear', 'c_value': 200}
]

# DataFrame para almacenar resultados
resultados = []

# Conjuntos de entrenamiento y prueba únicos
print("Dividiendo datos en conjuntos de entrenamiento y prueba...")
X_train, X_test, y_train, y_test = train_test_split(df_datos[todas_columnas], df_datos['loan_status'], test_size=0.2, random_state=100)

# Entrenar y evaluar para cada configuración y combinación de columnas
print("Entrenando y evaluando modelo...")
for config in configurations:
    for r in range(1, len(todas_columnas) + 1):
        for cols in combinations(todas_columnas, r):
            print(f"Evaluando combinación de columnas: {cols}")
            mc, accuracy, precision, recall, f1, training_time, testing_time = train_evaluate_model(
                X_train[list(cols)], X_test[list(cols)], y_train, y_test, config['c_value'], kernel=config['kernel']
            )
            resultados.append([cols, mc, accuracy, precision, recall, f1, training_time, testing_time])

# Crear DataFrame final
print("Creando DataFrame final...")
df_resultados = pd.DataFrame(resultados, columns=['Columnas', 'Matriz de Confusión', 'Exactitud', 'Precisión', 'Sensibilidad', 'F1 Score', 'Tiempo de Entrenamiento', 'Tiempo de Prueba'])

# Ordenar DataFrame por Exactitud
df_resultados = df_resultados.sort_values(by='Exactitud', ascending=False)

# Mostrar DataFrame final
print(df_resultados)
