# Taller 6: Redes Nueronales

## Importar librerias

In [None]:
import tensorflow as tf
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import matplotlib.pyplot as plt

## Punto 1. Clasificación con redes neuronales - Datos iris

### Datos, Selección y División

Descarga Datos Iris

In [None]:
iris = load_iris()
iris.target_names

Selección de Especimen

In [None]:
y = tf.keras.utils.to_categorical(iris.target, num_classes=3)
y_s = y[:,[0]]
y_s

División de datos (entrenamiento, validación, prueba)

In [None]:
X_train_full, X_test, y_train_full, y_test = train_test_split(
    iris.data, y_s, test_size=0.2, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, test_size=0.2, random_state=42)

X_train.shape

In [None]:
std_scl = StandardScaler()
std_scl.fit(X_train)

print(X_train[0:3,])
X_train = std_scl.transform(X_train)
print(X_train[0:3,])
X_valid = std_scl.transform(X_valid)
X_test = std_scl.transform(X_test)

### Modelo Base

Creación del modelo base (capa de entrada, capa oculta con 8 neuronas y una capa de salida comn una neurona para la única categoria)

In [None]:
tf.random.set_seed(42)
tf.keras.backend.clear_session()
base = tf.keras.Sequential()
base.add(tf.keras.layers.InputLayer(input_shape=(4,)))
base.add(tf.keras.layers.Dense(8, activation="relu"))
base.add(tf.keras.layers.Dense(1, activation="sigmoid"))

In [None]:
base.summary()

In [None]:
base.compile(loss="binary_crossentropy",
              optimizer="sgd",
              metrics=["accuracy"])

Entrenamiento del modelo base

In [None]:
history_base = base.fit(X_train, y_train, epochs=100,
                    validation_data=(X_valid, y_valid))

Grafica historial de pérdida de entrenamiento y validación

In [None]:
plt.plot(history_base.history['loss'], label='loss')
plt.plot(history_base.history['val_loss'], label='val_loss')
plt.xlabel('Epoch')
plt.ylabel('Error')
plt.legend()

### Metricas

In [None]:
y_test

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, fbeta_score, roc_auc_score, confusion_matrix, classification_report
import seaborn as sns
import numpy as np

y_pred_base = base.predict(X_test)
#y_pred_base_classes = np.argmax(y_pred_base, axis=1)
#y_pred_base_classes
y_pred_base

In [None]:
y_test

In [None]:
print(y_test[0][0])
print(y_pred_base[0][0])

In [None]:
y_test.astype(int).flatten().tolist()

def to_binary_predictions(array, threshold=0.5):
    return (array >= threshold).astype(int)

# Example usage:
y_pred_base = to_binary_predictions(y_pred_base)

print(y_pred_base)



# Evaluación
accuracy = accuracy_score(y_test, y_pred_base)
precision = precision_score(y_test, y_pred_base)
recall = recall_score(y_test, y_pred_base)
f1 = f1_score(y_test, y_pred_base)
f2 = fbeta_score(y_test, y_pred_base, beta=2)
roc_auc = roc_auc_score(y_test, y_pred_base)

print("--- MÉTRICAS DEL MODELO (Entrenamiento) ---")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"F2 Score: {f2:.4f}")
print(f"ROC AUC Score: {roc_auc:.4f}")

# Matriz de confusión
conf_matrix = confusion_matrix(y_test, y_pred_base)
plt.figure(figsize=(6, 4))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels = ['No Setosa', 'Setosa'], yticklabels= ['No Setosa', 'Setosa'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Matriz de Confusión - Test')
plt.show()


In [None]:



# Evaluación en conjunto de validación
y_pred_base_validation = base.predict(X_valid)
y_pred_base_validation = to_binary_predictions(y_pred_base_validation)


accuracy = accuracy_score(y_valid, y_pred_base_validation)
precision = precision_score(y_valid, y_pred_base_validation)
recall = recall_score(y_valid, y_pred_base_validation)
f1 = f1_score(y_valid, y_pred_base_validation)
f2 = fbeta_score(y_valid, y_pred_base_validation, beta=2)
roc_auc = roc_auc_score(y_valid, y_pred_base_validation)

print("--- MÉTRICAS DEL MODELO (Validación) ---")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"F2 Score: {f2:.4f}")
print(f"ROC AUC Score: {roc_auc:.4f}")


# Matriz de confusión
conf_matrix = confusion_matrix(y_valid, y_pred_base_validation)
plt.figure(figsize=(6, 4))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels =  ['No Setosa', 'Setosa'], yticklabels= ['No Setosa', 'Setosa'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Matriz de Confusión - Validacion')
plt.show()



### Nuevos modelos 

**modelo 3 capas y (10,15,20) neuronas**

In [None]:
tf.random.set_seed(42)
tf.keras.backend.clear_session()
m1 = tf.keras.Sequential()
m1.add(tf.keras.layers.InputLayer(input_shape=(4,)))
m1.add(tf.keras.layers.Dense(10, activation="relu"))
m1.add(tf.keras.layers.Dense(15, activation="relu"))
m1.add(tf.keras.layers.Dense(20, activation="relu"))
m1.add(tf.keras.layers.Dense(1, activation="sigmoid"))

In [None]:
m1.summary()

In [None]:
m1.compile(loss="binary_crossentropy",
              optimizer="sgd",
              metrics=["accuracy"])

In [None]:
history_m1 = m1.fit(X_train, y_train, epochs=100,
                    validation_data=(X_valid, y_valid))

In [None]:
y_pred_m1 = m1.predict(X_test)
y_pred_m1

In [None]:
y_test.astype(int).flatten().tolist()

def to_binary_predictions(array, threshold=0.5):
    return (array >= threshold).astype(int)

# Example usage:
y_pred_m1 = to_binary_predictions(y_pred_m1)

print(y_pred_m1)



# Evaluación
accuracy = accuracy_score(y_test, y_pred_m1)
precision = precision_score(y_test, y_pred_m1)
recall = recall_score(y_test, y_pred_m1)
f1 = f1_score(y_test, y_pred_m1)
f2 = fbeta_score(y_test, y_pred_m1, beta=2)
roc_auc = roc_auc_score(y_test, y_pred_m1)

print("--- MÉTRICAS DEL MODELO (Entrenamiento) ---")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"F2 Score: {f2:.4f}")
print(f"ROC AUC Score: {roc_auc:.4f}")

# Matriz de confusión
conf_matrix = confusion_matrix(y_test, y_pred_m1)
plt.figure(figsize=(6, 4))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels = ['No Setosa', 'Setosa'], yticklabels= ['No Setosa', 'Setosa'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Matriz de Confusión - Test')
plt.show()

**modelo 5 capas y (5,7,21,30,5) neuronas**

In [None]:
tf.random.set_seed(42)
tf.keras.backend.clear_session()
m2 = tf.keras.Sequential()
m2.add(tf.keras.layers.InputLayer(input_shape=(4,)))
m2.add(tf.keras.layers.Dense(5, activation="relu"))
m2.add(tf.keras.layers.Dense(7, activation="relu"))
m2.add(tf.keras.layers.Dense(21, activation="relu"))
m2.add(tf.keras.layers.Dense(30, activation="relu"))
m2.add(tf.keras.layers.Dense(5, activation="relu"))
m2.add(tf.keras.layers.Dense(1, activation="sigmoid"))

In [None]:
m2.summary()

In [None]:
m2.compile(loss="binary_crossentropy",
              optimizer="sgd",
              metrics=["accuracy"])

In [None]:
history_m2 = m2.fit(X_train, y_train, epochs=100,
                    validation_data=(X_valid, y_valid))

In [None]:
y_pred_m2 = m2.predict(X_test)
y_pred_m2

In [None]:
y_test.astype(int).flatten().tolist()

def to_binary_predictions(array, threshold=0.5):
    return (array >= threshold).astype(int)

# Example usage:
y_pred_m2 = to_binary_predictions(y_pred_m2)

print(y_pred_m2)



# Evaluación
accuracy = accuracy_score(y_test, y_pred_m2)
precision = precision_score(y_test, y_pred_m2)
recall = recall_score(y_test, y_pred_m2)
f1 = f1_score(y_test, y_pred_m2)
f2 = fbeta_score(y_test, y_pred_m2, beta=2)
roc_auc = roc_auc_score(y_test, y_pred_m2)

print("--- MÉTRICAS DEL MODELO (Entrenamiento) ---")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"F2 Score: {f2:.4f}")
print(f"ROC AUC Score: {roc_auc:.4f}")

# Matriz de confusión
conf_matrix = confusion_matrix(y_test, y_pred_m2)
plt.figure(figsize=(6, 4))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels = ['No Setosa', 'Setosa'], yticklabels= ['No Setosa', 'Setosa'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Matriz de Confusión - Test')
plt.show()

**modelo 4 capas y (8,20,35,46) neuronas**

In [None]:
tf.random.set_seed(42)
tf.keras.backend.clear_session()
m3 = tf.keras.Sequential()
m3.add(tf.keras.layers.InputLayer(input_shape=(4,)))
m3.add(tf.keras.layers.Dense(8, activation="relu"))
m3.add(tf.keras.layers.Dense(20, activation="relu"))
m3.add(tf.keras.layers.Dense(35, activation="relu"))
m3.add(tf.keras.layers.Dense(46, activation="relu"))
m3.add(tf.keras.layers.Dense(1, activation="sigmoid"))

In [None]:
m3.summary()

In [None]:
m3.compile(loss="binary_crossentropy",
              optimizer="sgd",
              metrics=["accuracy"])

In [None]:
history_m3 = m3.fit(X_train, y_train, epochs=100,
                    validation_data=(X_valid, y_valid))

In [None]:
y_pred_m3 = m3.predict(X_test)
y_pred_m3

In [None]:
y_test.astype(int).flatten().tolist()

def to_binary_predictions(array, threshold=0.5):
    return (array >= threshold).astype(int)

# Example usage:
y_pred_m3 = to_binary_predictions(y_pred_m3)

print(y_pred_m3)



# Evaluación
accuracy = accuracy_score(y_test, y_pred_m3)
precision = precision_score(y_test, y_pred_m3)
recall = recall_score(y_test, y_pred_m3)
f1 = f1_score(y_test, y_pred_m3)
f2 = fbeta_score(y_test, y_pred_m3, beta=2)
roc_auc = roc_auc_score(y_test, y_pred_m3)

print("--- MÉTRICAS DEL MODELO (Entrenamiento) ---")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"F2 Score: {f2:.4f}")
print(f"ROC AUC Score: {roc_auc:.4f}")

# Matriz de confusión
conf_matrix = confusion_matrix(y_test, y_pred_m3)
plt.figure(figsize=(6, 4))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels = ['No Setosa', 'Setosa'], yticklabels= ['No Setosa', 'Setosa'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Matriz de Confusión - Test')
plt.show()

## Punto 2: Clasificacion con redes neuronales - Datos heart

## Modelo Base

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras

In [None]:
df = pd.read_csv('heart.csv')

In [None]:
df.shape

In [None]:
df.isna().sum()

**Seleccion de variables:**
Se seleccionan el siguinete subconjunto de variables: 

categoricas numericas: 'sex', 'exang', 'fbs'

categorica string: 'thal'

numerica: 'age', 'trestbps', 'chol'

In [None]:
df.drop(columns=['cp', 'restecg','ca','thalach', 'oldpeak', 'slope'])

In [None]:
cat_int_feats = ['sex', 'fbs', 'exang']
cat_str_feats = ['thal']
num_feats = ['age', 'trestbps', 'chol']

In [None]:
feats_ordered = cat_int_feats+cat_str_feats+num_feats

In [None]:
df = df[feats_ordered+['target']]

In [None]:
df.head()

Separacion de conjuntos: 

In [None]:
train = df.sample(frac=0.8, random_state=100)
train.head()

In [None]:
train.shape

In [None]:
test = df.drop(train.index)
test.head()

In [None]:
test.shape

In [None]:
val = train.sample(frac=0.2, random_state=100)

In [None]:
val.shape

In [None]:
train = train.drop(val.index)

In [None]:
print(train.shape)
print(val.shape)
print(test.shape)

Función para convertir de dataframe (pandas) a dataset (tensorflow), separando características y etiquetas

In [None]:
def dataframe_to_dataset(dataframe):
    dataframe = dataframe.copy()
    labels = dataframe.pop("target")
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    ds = ds.shuffle(buffer_size=len(dataframe))
    return ds

In [None]:
train_ds = dataframe_to_dataset(train)
val_ds = dataframe_to_dataset(val)
test_ds = dataframe_to_dataset(test)

In [None]:
type(train_ds)

In [None]:
for x, y in train_ds.take(1):
    print("Input:", x)
    print("Target:", y)

In [None]:
batch_size = 32
train_ds = train_ds.batch(batch_size)
test_ds = test_ds.batch(batch_size)
val_ds = val_ds.batch(batch_size)

In [None]:
def encode_numerical_feature(feature, name, dataset):
    # Crea capa de normalización para este feature
    normalizer = keras.layers.Normalization()

    # Prepara el dataset para considerar únicamente la feature de interés (name)
    feature_ds = dataset.map(lambda x, y: x[name]) # selecciona variable
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1)) # deja el tensor de una dimensión

    # Aprende las estadísticas de los datos (media, varianza)
    normalizer.adapt(feature_ds)

    # Aplica la normalización a la variable
    encoded_feature = normalizer(feature)
    return encoded_feature

In [None]:
def encode_categorical_feature(feature, name, dataset, is_string):
    lookup_class = keras.layers.StringLookup if is_string else keras.layers.IntegerLookup
    # Crea una capa Lookup para retornas variables 0/1 (dummies)
    # lookup: busca el valor correspondiente de la variable categórica
    lookup = lookup_class(output_mode="binary")

    # Prepara el dataset para considerar únicamente la feature de interés (name)
    feature_ds = dataset.map(lambda x, y: x[name]) # selecciona variable
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1)) # deja el tensor de una dimensión

    # Aprende el conjunto de posibles valores que toma la variable categórica y asigna enteros
    lookup.adapt(feature_ds)

    # Aplica la conversión de categorías a enteros
    encoded_feature = lookup(feature)
    return encoded_feature

In [None]:
inputs = []
for i in cat_int_feats:
  inputs.append(keras.Input(shape=(1,), name=i, dtype="int64"))

In [None]:
for i in cat_str_feats:
  inputs.append(keras.Input(shape=(1,), name=i, dtype="string"))

In [None]:
for i in num_feats:
  inputs.append(keras.Input(shape=(1,), name=i))

In [None]:
for i in inputs:
   print(i)

In [None]:
feats_encoded=[]

In [None]:
for i,feat in enumerate(cat_int_feats):
  feats_encoded.append(
      encode_categorical_feature(inputs[i], feat, train_ds, False)
  )

In [None]:
len_feats = len(feats_encoded)
len_feats

In [None]:
for i,feat in enumerate(cat_str_feats):
  feats_encoded.append(
      encode_categorical_feature(inputs[len_feats+i], feat, train_ds, True)
  )

In [None]:
len_feats = len(feats_encoded)
len_feats

In [None]:
for i,feat in enumerate(num_feats):
  feats_encoded.append(
      encode_numerical_feature(inputs[len_feats+i], feat, train_ds)
  )

In [None]:
for i in feats_encoded:
  print(i)

Creamos una capa concatenando todas las variables codificadas

In [None]:
all_feats = keras.layers.concatenate(feats_encoded)

In [None]:
type(all_feats)

Agregamos una capa densa con 32 neuronas y función de activación relu

In [None]:
model_layers = keras.layers.Dense(32, activation='relu')(all_feats)

Creamos una capa concatenando todas las variables codificadas

In [None]:
model_layers = keras.layers.Dense(1, activation='sigmoid')(model_layers)

Creamos el modelo con las capas ya creadas y las variables de entrada

In [None]:
model = keras.Model(inputs, model_layers)

Compilamos el modelo, definiendo optimizador, función de pérdida y métricas adicionales a capturar

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
keras.utils.plot_model(model, show_shapes=True, rankdir="LR")

Aseguramos que Keras use TensorFlow como backend, para asegurar que el modelo pueda usar strings como entradas

In [None]:
import os
os.environ["KERAS_BACKEND"] = "tensorflow"

## Entrenamineto

In [None]:
hostory = model.fit(train_ds, epochs=100, validation_data=val_ds)

In [None]:
print(test_ds)
y_pred_modelo= model.predict(test_ds)
print(len(y_pred_modelo))
y_pred_modelo


In [None]:
plt.plot(hostory.history['loss'], label='loss')
plt.plot(hostory.history['val_loss'], label='val_loss')
plt.xlabel('Epoch')
plt.ylabel('Error')
plt.legend()

In [None]:
y_true = np.concatenate([y.numpy() for _, y in test_ds], axis=0)
print(y_true)
print(len(y_true))

def to_binary_predictions(array, threshold=0.5):
    return (array >= threshold).astype(int)

# Example usage:
y_pred_modelo = to_binary_predictions(y_pred_modelo)

print(y_pred_modelo)



# Evaluación
accuracy = accuracy_score(y_true, y_pred_modelo)
precision = precision_score(y_true, y_pred_modelo)
recall = recall_score(y_true, y_pred_modelo)
f1 = f1_score(y_true, y_pred_modelo)
f2 = fbeta_score(y_true, y_pred_modelo, beta=2)
roc_auc = roc_auc_score(y_true, y_pred_modelo)

print("--- MÉTRICAS DEL MODELO (Entrenamiento) ---")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"F2 Score: {f2:.4f}")
print(f"ROC AUC Score: {roc_auc:.4f}")

# Matriz de confusión
conf_matrix = confusion_matrix(y_true, y_pred_modelo)
plt.figure(figsize=(6, 4))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')#, xticklabels = ['No Setosa', 'Setosa'], yticklabels= ['No Setosa', 'Setosa'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Matriz de Confusión - Test')
plt.show()