In [None]:
#!pip install tensorflow
#!pip install --upgrade keras

In [None]:
# Vamos a utilizar Keras 3, que permite trabajar con TensorFlow 2.0, pytorch y jax.
# Mediante una variable de entorno fijamos el backend de tensorflow
import keras
import os
os.environ["KERAS_BACKEND"] = "tensorflow"

In [None]:
import tensorflow as tf
import pandas as pd
import keras
from keras import layers
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.feature_selection import SelectPercentile
from sklearn.feature_selection import mutual_info_classif
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import OneHotEncoder



In [None]:
# Load data set.
URL = "https://drive.google.com/uc?export=download&id=15WrwH8HrIP5PJXIcSI-cYw5iY0u3BUof"
adult_income = pd.read_csv(URL)

In [None]:
# Separamos en entrenamiento y en test
adult_train, adult_test = train_test_split(adult_income, test_size=0.3, random_state=42)

In [None]:
# separamos x e y
X_train = adult_train.drop('income', axis=1)
y_train = adult_train['income'].dropna()
# transformamos y train en true y false.
y_train_coded = y_train == ' >50K'
y_train_coded.value_counts()

False    17265
True      5527
Name: income, dtype: int64

In [None]:
# separamos variables numéricas de categóricas
X_train_numerical = X_train.select_dtypes(include='number')
X_train_categorical = X_train.select_dtypes(exclude='number')



In [None]:
# convertimos categoricas an one-hot y concatemanos todas las variables
import numpy as np
import numpy as np
oEncoder = OneHotEncoder(handle_unknown='ignore', sparse=False)
oEncoder.fit(X_train_categorical, y_train_coded)
X_train_encode_cat = oEncoder.transform(X_train_categorical)
X_train_encode = np.concatenate((X_train_numerical, X_train_encode_cat), axis=1)
X_train_encode.shape

(22792, 108)

In [None]:
# repetimos todo el proceso con test
X_test = adult_test.drop('income', axis=1)
y_test = adult_test['income'].dropna()
y_test_coded = y_test == ' >50K'
X_test_numerical = X_test.select_dtypes(include='number')
X_test_categorical = X_test.select_dtypes(exclude='number')

X_test_encode_cat = oEncoder.transform(X_test_categorical)
X_test_encode = np.concatenate((X_test_numerical, X_test_encode_cat), axis=1)
X_test_encode.shape

y_test_coded = y_test == ' >50K'

In [None]:
#Scale all features with StandarScale
scaler = StandardScaler(with_mean=False)
X_scaled = scaler.fit_transform(X_train_encode)
X_scaled_test = scaler.transform(X_test_encode)

In [None]:
# Creamos nuestra red neuronal densa mediante clases.
# tambien se pueden crear mediante un enfoque funcional.

class NeuralNetwork(keras.Model):
    def __init__(self, input_shape, **kwargs):
        # creamos las capas
        super(NeuralNetwork, self).__init__(**kwargs)
        self.layer1 = layers.Dense(256, activation='relu', input_shape=(input_shape,))
        self.layer2 = layers.Dense(128, activation='relu')
        self.layer3 = layers.Dense(64, activation='relu')
        self.layer4 = layers.Dense(1, activation='sigmoid')

    def call(self, inputs):
        # conectamos las capas
        x = self.layer1(inputs)
        x = self.layer2(x)
        x = self.layer3(x)
        out = self.layer4(x)
        return out

# Creamos nuestra red y compilamos: decimos que optimizador, que función de pérdida
# y que métricas vamos a utilizar
model = NeuralNetwork(X_scaled.shape[1])
model.compile(optimizer = 'Adam', loss="binary_crossentropy", metrics=["accuracy"])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# entrenamos.
# realmente deberíamos tener un set de validación a parte, y solo utilzar el de test
# para ver el resultado final. Por simplicidad, lo hemos reducido a dos sets.
model.fit(x=X_scaled, y=y_train_coded, epochs=50, validation_data=(X_scaled_test, y_test_coded))

Epoch 1/50
[1m713/713[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - accuracy: 0.8162 - loss: 0.3898 - val_accuracy: 0.8527 - val_loss: 0.3286
Epoch 2/50
[1m713/713[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.8472 - loss: 0.3269 - val_accuracy: 0.8512 - val_loss: 0.3230
Epoch 3/50
[1m713/713[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.8529 - loss: 0.3088 - val_accuracy: 0.8478 - val_loss: 0.3257
Epoch 4/50
[1m713/713[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.8572 - loss: 0.3051 - val_accuracy: 0.8475 - val_loss: 0.3262
Epoch 5/50
[1m713/713[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.8555 - loss: 0.3085 - val_accuracy: 0.8532 - val_loss: 0.3217
Epoch 6/50
[1m713/713[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.8578 - loss: 0.3008 - val_accuracy: 0.8535 - val_loss: 0.3298
Epoch 7/50
[1m713/713

<keras.src.callbacks.history.History at 0x3833a41c0>

In [None]:
# Hacemos inferencia con un dato de test!!
# IMPORTANTE: como el modelo espera un tensor de dos dimensiones,
# tenemos que pasarle el ejemplo en dos dimensiones
# podemos hacer dos cosas:
# un reshape
print(model(X_scaled_test[0].reshape(1, -1)))
# o seleccionar mediante slicing
print(model(X_scaled_test[0:1]))

# Es un enfoque muy sencillo.
# existen maneras más eficientes de procesar y transformar los datos de entrada y de hacer inferencia.

tf.Tensor([[0.00049766]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.00049766]], shape=(1, 1), dtype=float32)
