In [182]:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from tensorflow.keras import layers


data = pd.read_csv("data.csv", sep=";" )

print(data.columns)

Index(['LIMIT_BAL', 'SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_0', 'PAY_2',
       'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2',
       'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1',
       'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6',
       'default payment next month'],
      dtype='object')


In [183]:
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler

omit = ["SEX", "EDUCATION", 'AGE', 'MARRIAGE']

X = data.drop('default payment next month', axis=1)
X = X.drop(omit, axis=1)
y = data['default payment next month']

def deleteFromList(list, omition_list):
    return [item for item in list if item not in omition_list]




# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

train_data = pd.concat([X_train, y_train], axis=1)

# Separa las clases
non_default = train_data[train_data['default payment next month'] == 0]
default = train_data[train_data['default payment next month'] == 1]

# Realiza el submuestreo (undersampling)
non_default_undersampled = non_default.sample(len(default), random_state=42)

# Combina las clases de nuevo
undersampled_data = pd.concat([non_default_undersampled, default])

# Separa las características y las etiquetas de nuevo
X_train_undersampled = undersampled_data.drop('default payment next month', axis=1)
y_train_undersampled = undersampled_data['default payment next month']


#X_train = X_train_undersampled
#y_train = y_train_undersampled

# Columnas numéricas y categóricas
num_attribs = deleteFromList(['LIMIT_BAL', 'AGE', 'BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6'], omit)
cat_attribs = deleteFromList(['SEX', 'EDUCATION', 'MARRIAGE', 'PAY_0', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6'], omit)

# Tubos de preprocesamiento
num_pipeline = Pipeline([
        ('std_scaler', StandardScaler()),
    ])

cat_pipeline = Pipeline([
        ('encoder', OneHotEncoder()),
    ])

# Combinar todo
preprocessor = ColumnTransformer([
        ("num", num_pipeline, num_attribs),
        ("cat", cat_pipeline, cat_attribs),
    ])

X_train_prepared = preprocessor.fit_transform(X_train)
X_test_prepared = preprocessor.transform(X_test)


In [184]:
# %%
# Construir el modelo
model = keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=(X_train_prepared.shape[1],)),
    layers.Dropout(0.2),  # Añadir Dropout después de la primera capa oculta con una tasa del 20%
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.2),  # Añadir Dropout después de la segunda capa oculta con una tasa del 20%
    layers.Dense(32, activation='relu'),
    layers.Dropout(0.2),  # Añadir Dropout después de la tercera capa oculta con una tasa del 20%
    layers.Dense(1, activation='sigmoid')
])

# Compilar el modelo
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)


In [185]:
X_train_full, X_val, y_train_full, y_val = train_test_split(
    X_train_prepared, y_train, test_size=0.2, random_state=42)

X_train_full = X_train_full.toarray()
X_val = X_val.toarray()




history = model.fit(
    X_train_full, y_train_full,
    epochs=15,
    batch_size=64,
    validation_data=(X_val, y_val)
)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [186]:
model.evaluate(X_test_prepared.toarray(), y_test, verbose = 2)

188/188 - 1s - loss: 0.4333 - accuracy: 0.8203 - 629ms/epoch - 3ms/step


[0.4333001673221588, 0.8203333616256714]