# MODELO DEEP LEARNING

>1: Importación de bibliotecas

In [1]:
# Importar bibliotecas necesarias
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

>2: Cargar y barajar el conjunto de datos

In [2]:
# Cargar el conjunto de datos
df = pd.read_csv('dataset_procesado.csv')

# Barajar el conjunto de datos
df = shuffle(df, random_state=42)

>3: Preprocesamiento de datos

In [3]:
# Dividir el conjunto de datos en características (X) y etiquetas (y)
X = df.drop('income', axis=1)
y = df['income']

# Codificar las etiquetas ('<=50K' y '>50K') a valores numéricos
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Dividir el conjunto de datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Escalar las características
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

>4: Crear el modelo de red neuronal

In [4]:
# Crear el modelo de red neuronal
model = Sequential()
model.add(Dense(64, input_dim=X_train_scaled.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

>5: Compilar el modelo

In [5]:
# Compilar el modelo
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

>6: Entrenar el modelo

In [6]:
# Entrenar el modelo
model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x293b26e3340>

>7: Evaluar el modelo en el conjunto de prueba

In [7]:
# Evaluar el modelo en el conjunto de prueba
y_pred_probs = model.predict(X_test_scaled)
y_pred = (y_pred_probs > 0.5).astype(int)  # Redondear las probabilidades a 0 o 1

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy on test set: {accuracy}')

Accuracy on test set: 0.8523355869698832


>8: Guardar y cargar el modelo

In [11]:
# Guardar el modelo
model.save('my_model.h5')

# AJUSTE DE HIPERPARAMETROS con OPTUNA

In [28]:
import optuna

def objective(trial):
    # Parámetros a optimizar
    units_layer1 = trial.suggest_int('units_layer1', 32, 128)
    units_layer2 = trial.suggest_int('units_layer2', 16, 64)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2)

    # Crear el modelo de red neuronal con parámetros sugeridos por Optuna
    model = Sequential()
    model.add(Dense(units_layer1, input_dim=X_train_scaled.shape[1], activation='relu'))
    model.add(Dense(units_layer2, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    # Compilar el modelo con tasa de aprendizaje sugerida
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    # Entrenar el modelo
    model.fit(X_train_scaled, y_train, epochs=5, batch_size=32, validation_split=0.2, verbose=0)

    # Evaluar el modelo
    y_pred_probs = model.predict(X_test_scaled)
    y_pred = (y_pred_probs > 0.5).astype(int)
    accuracy = accuracy_score(y_test, y_pred)

    return accuracy

# Crear un estudio de Optuna y realizar la optimización
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# Obtener los mejores hiperparámetros
best_params = study.best_params
best_accuracy = study.best_value

print(f'Best Hyperparameters: {best_params}')
print(f'Best Accuracy: {best_accuracy}')

  from .autonotebook import tqdm as notebook_tqdm
[I 2024-01-23 21:50:34,900] A new study created in memory with name: no-name-08a896ef-8fb6-47a3-a6ba-8f45921cee8d




[I 2024-01-23 21:50:46,932] Trial 0 finished with value: 0.8589428395820529 and parameters: {'units_layer1': 69, 'units_layer2': 22, 'learning_rate': 0.008235440331758503}. Best is trial 0 with value: 0.8589428395820529.




[I 2024-01-23 21:51:04,259] Trial 1 finished with value: 0.858020897357099 and parameters: {'units_layer1': 41, 'units_layer2': 21, 'learning_rate': 0.004767352622445229}. Best is trial 0 with value: 0.8589428395820529.




[I 2024-01-23 21:51:22,273] Trial 2 finished with value: 0.8561770129071912 and parameters: {'units_layer1': 113, 'units_layer2': 18, 'learning_rate': 0.0050007723430312324}. Best is trial 0 with value: 0.8589428395820529.




[I 2024-01-23 21:51:44,553] Trial 3 finished with value: 0.8592501536570375 and parameters: {'units_layer1': 73, 'units_layer2': 52, 'learning_rate': 0.0043909237174682625}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:51:59,799] Trial 4 finished with value: 0.854179471419791 and parameters: {'units_layer1': 52, 'units_layer2': 20, 'learning_rate': 0.004354756066867156}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:52:14,099] Trial 5 finished with value: 0.8551014136447449 and parameters: {'units_layer1': 69, 'units_layer2': 39, 'learning_rate': 0.0047579379152161115}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:52:30,713] Trial 6 finished with value: 0.8564843269821758 and parameters: {'units_layer1': 100, 'units_layer2': 63, 'learning_rate': 0.007728618415955917}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:52:44,932] Trial 7 finished with value: 0.8560233558696988 and parameters: {'units_layer1': 72, 'units_layer2': 59, 'learning_rate': 0.0049956342895404035}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:52:58,410] Trial 8 finished with value: 0.8578672403196066 and parameters: {'units_layer1': 43, 'units_layer2': 37, 'learning_rate': 0.000892961653289581}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:53:11,961] Trial 9 finished with value: 0.8518746158574063 and parameters: {'units_layer1': 47, 'units_layer2': 33, 'learning_rate': 0.002950207360613199}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:53:26,948] Trial 10 finished with value: 0.8537185003073141 and parameters: {'units_layer1': 128, 'units_layer2': 51, 'learning_rate': 0.009794948987964899}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:53:40,386] Trial 11 finished with value: 0.8523355869698832 and parameters: {'units_layer1': 88, 'units_layer2': 48, 'learning_rate': 0.007734505953440602}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:54:00,793] Trial 12 finished with value: 0.8547940995697603 and parameters: {'units_layer1': 63, 'units_layer2': 49, 'learning_rate': 0.007242303217530884}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:54:19,005] Trial 13 finished with value: 0.8554087277197295 and parameters: {'units_layer1': 85, 'units_layer2': 30, 'learning_rate': 0.002381230790769652}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:54:32,362] Trial 14 finished with value: 0.8531038721573448 and parameters: {'units_layer1': 60, 'units_layer2': 56, 'learning_rate': 0.009662879939273747}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:54:45,849] Trial 15 finished with value: 0.8549477566072526 and parameters: {'units_layer1': 95, 'units_layer2': 26, 'learning_rate': 0.006692420794328468}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:54:59,211] Trial 16 finished with value: 0.8574062692071297 and parameters: {'units_layer1': 77, 'units_layer2': 44, 'learning_rate': 0.0062694401167889165}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:55:12,550] Trial 17 finished with value: 0.8523355869698832 and parameters: {'units_layer1': 58, 'units_layer2': 43, 'learning_rate': 0.0032224092184015324}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:55:28,359] Trial 18 finished with value: 0.8349723417332514 and parameters: {'units_layer1': 34, 'units_layer2': 26, 'learning_rate': 4.377555550453931e-05}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:55:45,782] Trial 19 finished with value: 0.8475722188076213 and parameters: {'units_layer1': 98, 'units_layer2': 53, 'learning_rate': 0.00836263626519489}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:56:05,130] Trial 20 finished with value: 0.8534111862323295 and parameters: {'units_layer1': 79, 'units_layer2': 16, 'learning_rate': 0.005966902923242659}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:56:20,611] Trial 21 finished with value: 0.8557160417947142 and parameters: {'units_layer1': 34, 'units_layer2': 23, 'learning_rate': 0.0038637163253356695}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:56:34,254] Trial 22 finished with value: 0.851259987707437 and parameters: {'units_layer1': 68, 'units_layer2': 31, 'learning_rate': 0.00584639486924637}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:56:47,390] Trial 23 finished with value: 0.8566379840196681 and parameters: {'units_layer1': 52, 'units_layer2': 35, 'learning_rate': 0.001879016056759701}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:57:02,828] Trial 24 finished with value: 0.8549477566072526 and parameters: {'units_layer1': 43, 'units_layer2': 22, 'learning_rate': 0.0036141699225569848}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:57:16,459] Trial 25 finished with value: 0.8557160417947142 and parameters: {'units_layer1': 87, 'units_layer2': 27, 'learning_rate': 0.008863412959368957}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:57:32,679] Trial 26 finished with value: 0.8529502151198525 and parameters: {'units_layer1': 105, 'units_layer2': 42, 'learning_rate': 0.005575431783872839}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:57:52,382] Trial 27 finished with value: 0.8540258143822987 and parameters: {'units_layer1': 75, 'units_layer2': 16, 'learning_rate': 0.004171449004579922}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:58:07,801] Trial 28 finished with value: 0.8526429010448678 and parameters: {'units_layer1': 63, 'units_layer2': 46, 'learning_rate': 0.0020545205352142516}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:58:21,958] Trial 29 finished with value: 0.8560233558696988 and parameters: {'units_layer1': 118, 'units_layer2': 20, 'learning_rate': 0.0052287847983563465}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:58:35,775] Trial 30 finished with value: 0.8578672403196066 and parameters: {'units_layer1': 55, 'units_layer2': 64, 'learning_rate': 0.006636118644619143}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:58:48,226] Trial 31 finished with value: 0.8564843269821758 and parameters: {'units_layer1': 42, 'units_layer2': 36, 'learning_rate': 0.00030742304778206154}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:59:01,176] Trial 32 finished with value: 0.8552550706822373 and parameters: {'units_layer1': 38, 'units_layer2': 38, 'learning_rate': 0.0012699191005745947}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:59:14,923] Trial 33 finished with value: 0.8563306699446834 and parameters: {'units_layer1': 48, 'units_layer2': 23, 'learning_rate': 0.001023325361590199}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:59:28,204] Trial 34 finished with value: 0.8543331284572834 and parameters: {'units_layer1': 45, 'units_layer2': 19, 'learning_rate': 0.004577352682241331}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:59:41,635] Trial 35 finished with value: 0.8544867854947756 and parameters: {'units_layer1': 68, 'units_layer2': 29, 'learning_rate': 0.002556323303569209}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 21:59:54,671] Trial 36 finished with value: 0.8552550706822373 and parameters: {'units_layer1': 52, 'units_layer2': 40, 'learning_rate': 0.005116808479102531}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 22:00:09,826] Trial 37 finished with value: 0.8574062692071297 and parameters: {'units_layer1': 83, 'units_layer2': 58, 'learning_rate': 0.0033483681869825427}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 22:00:22,940] Trial 38 finished with value: 0.8520282728948986 and parameters: {'units_layer1': 39, 'units_layer2': 61, 'learning_rate': 0.004164571218028024}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 22:00:37,236] Trial 39 finished with value: 0.8520282728948986 and parameters: {'units_layer1': 92, 'units_layer2': 54, 'learning_rate': 0.007804434719471566}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 22:00:50,573] Trial 40 finished with value: 0.8547940995697603 and parameters: {'units_layer1': 72, 'units_layer2': 33, 'learning_rate': 0.009237093023249005}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 22:01:14,219] Trial 41 finished with value: 0.8569452980946527 and parameters: {'units_layer1': 56, 'units_layer2': 61, 'learning_rate': 0.00647281708482203}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 22:01:28,777] Trial 42 finished with value: 0.8563306699446834 and parameters: {'units_layer1': 54, 'units_layer2': 50, 'learning_rate': 0.00701082010199358}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 22:01:43,580] Trial 43 finished with value: 0.854640442532268 and parameters: {'units_layer1': 64, 'units_layer2': 46, 'learning_rate': 0.008407838932635782}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 22:01:58,168] Trial 44 finished with value: 0.8564843269821758 and parameters: {'units_layer1': 48, 'units_layer2': 63, 'learning_rate': 0.007205780659685626}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 22:02:11,499] Trial 45 finished with value: 0.8537185003073141 and parameters: {'units_layer1': 63, 'units_layer2': 57, 'learning_rate': 0.007788753312975962}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 22:02:25,122] Trial 46 finished with value: 0.8537185003073141 and parameters: {'units_layer1': 58, 'units_layer2': 55, 'learning_rate': 0.004836317249033037}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 22:02:39,021] Trial 47 finished with value: 0.8532575291948371 and parameters: {'units_layer1': 39, 'units_layer2': 53, 'learning_rate': 0.0067871360326895655}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 22:02:52,447] Trial 48 finished with value: 0.8544867854947756 and parameters: {'units_layer1': 32, 'units_layer2': 60, 'learning_rate': 0.008459850981752118}. Best is trial 3 with value: 0.8592501536570375.




[I 2024-01-23 22:03:07,826] Trial 49 finished with value: 0.852181929932391 and parameters: {'units_layer1': 50, 'units_layer2': 64, 'learning_rate': 0.00996459857254542}. Best is trial 3 with value: 0.8592501536570375.


Best Hyperparameters: {'units_layer1': 73, 'units_layer2': 52, 'learning_rate': 0.0043909237174682625}
Best Accuracy: 0.8592501536570375
