In [1]:
# Importación de bibliotecas necesarias
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# 1. Leer el archivo loan_data.csv
data = pd.read_csv("loan_data.csv")

# 2. Dividir aleatoriamente los datos
train_data, test_data = train_test_split(data, test_size=0.2, random_state=123)

# 3. Normalización y codificación
numerical_features = ["person_age", "person_income", "person_emp_exp", "loan_amnt", "loan_int_rate", 
                      "loan_percent_income", "cb_person_cred_hist_length", "credit_score"]
categorical_features = ["person_gender", "person_education", "person_home_ownership", "loan_intent", 
                        "previous_loan_defaults_on_file"]

preprocessor = ColumnTransformer(transformers=[
    ('num', StandardScaler(), numerical_features),
    ('cat', OneHotEncoder(), categorical_features)
])

# 4. Construcción de redes neuronales
topologies = [
    (50,), (100,), (50, 50), (100, 50), (100, 100)
]

results = []

for topology in topologies:
    model = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('classifier', MLPClassifier(hidden_layer_sizes=topology, random_state=123, max_iter=500))
    ])
    
    # Entrenamiento
    model.fit(train_data.drop(columns="loan_status"), train_data["loan_status"])
    
    # Predicción
    predictions = model.predict(test_data.drop(columns="loan_status"))
    accuracy = accuracy_score(test_data["loan_status"], predictions)
    
    results.append({"Topology": topology, "Accuracy": accuracy})

# 5. Tabla de resultados
results_df = pd.DataFrame(results)
print(results_df)

# 6. Hiperparámetro adicional
# Modificar un hiperparámetro adicional (como alpha)
alpha_variations = [0.0001, 0.01]

for alpha in alpha_variations:
    model = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('classifier', MLPClassifier(hidden_layer_sizes=(100, 50), random_state=123, max_iter=500, alpha=alpha))
    ])
    
    model.fit(train_data.drop(columns="loan_status"), train_data["loan_status"])
    predictions = model.predict(test_data.drop(columns="loan_status"))
    accuracy = accuracy_score(test_data["loan_status"], predictions)
    
    print(f"Alpha: {alpha}, Accuracy: {accuracy}")

     Topology  Accuracy
0       (50,)  0.923000
1      (100,)  0.919222
2    (50, 50)  0.906667
3   (100, 50)  0.903111
4  (100, 100)  0.896667
Alpha: 0.0001, Accuracy: 0.9031111111111111
Alpha: 0.01, Accuracy: 0.9116666666666666


In [2]:

from sklearn.tree import DecisionTreeClassifier

# 1 y 2. Leer datos y dividirlos
data = pd.read_csv("loan_data.csv")
train_data, test_data = train_test_split(data, test_size=0.2, random_state=123)

# 3. Normalización y codificación
# Reutilizar el preprocessor del notebook anterior

# 4. Árboles de decisión con max_depth
depth_results = []

for depth in range(1, 11):
    model = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('classifier', DecisionTreeClassifier(criterion="gini", splitter="best", max_depth=depth, random_state=123))
    ])
    
    model.fit(train_data.drop(columns="loan_status"), train_data["loan_status"])
    predictions = model.predict(test_data.drop(columns="loan_status"))
    accuracy = accuracy_score(test_data["loan_status"], predictions)
    
    depth_results.append({"Max Depth": depth, "Accuracy": accuracy})

depth_results_df = pd.DataFrame(depth_results)
print(depth_results_df)

# Repetir el proceso con criterion="entropy"


   Max Depth  Accuracy
0          1  0.781778
1          2  0.853556
2          3  0.902556
3          4  0.916444
4          5  0.919111
5          6  0.919222
6          7  0.919556
7          8  0.923778
8          9  0.923222
9         10  0.924444
