In [None]:
# Encadenar iterables
from itertools import chain

# Proporciona una barra de progreso rápida
from tqdm import tqdm

# Selección aleatoria de una lista sin repetición
from random import sample

# Interfaz para hacer gráficos y visualizaciones
import matplotlib.pyplot as plt

# Computación científica
import numpy as np

# Manipulación de datos
import pandas as pd

# Para guardar y cargar modelos
from joblib import dump, load

# Extraer parches (pequeños subconjuntos de imágenes) de imágenes
from sklearn.feature_extraction.image import PatchExtractor

# data: conjunto de datos de muestra y funciones de carga
# color: convertir imágenes entre espacios de color
# feature: funciones para identificar y extraer características de imágenes
from skimage import data, color, feature

# Cambiar el tamaño de una imagen
from skimage.transform import resize, rescale

# Train test split
from sklearn.model_selection import train_test_split

# Descarga y carga en memoria un conjunto de datos de imágenes de caras de personas famosas
from sklearn.datasets import fetch_lfw_people

# Modelos
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam


# Validación cruzada
from sklearn.model_selection import cross_validate
from sklearn.model_selection import StratifiedKFold

# Matriz de confusión
from sklearn.metrics import confusion_matrix

# La curva ROC
from sklearn.metrics import roc_curve , auc

# Métricas
from sklearn.metrics import accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Métricas custom
from sklearn.metrics import make_scorer

# Download csv
from google.colab import files

## Funciones auxiliares

In [None]:
# Función para extraer porciones de una imagen
def extract_patches(img, N, scale=1.0, patch_size=(62,47), random_state=0):
    # Calcula el tamaño del parche extraído basado en el factor de escala dado
    H = img.shape[0]
    W = img.shape[1]
    H_patch = min(H , int(scale * patch_size[0]))
    W_patch = min(W , int(scale * patch_size[1]))
    extracted_patch_size = (H_patch, W_patch)

    # Inicializa un objeto PatchExtractor con el tamaño de parche calculado,
    # el número máximo de parches, y una semilla de estado aleatorio
    extractor = PatchExtractor(patch_size=extracted_patch_size, max_patches=N, random_state=random_state)

    # Extrae parches de la imagen dada
    # img[np.newaxis] se utiliza la entrada de PatchExtractor es un conjunto de imágenes
    patches = extractor.transform(img[np.newaxis])

    # Si el factor de escala no es 1, redimensiona cada parche extraído
    # al tamaño del parche original
    if scale != 1:
        patches = np.array([resize(patch, patch_size) for patch in patches])

    # Devuelve la lista de parches extraídos (y posiblemente redimensionados)
    return patches

In [None]:
def non_max_suppression(indices, Ni, Nj, overlapThresh):
    # Si no hay rectángulos, regresar una lista vacía
    if len(indices) == 0:
        return []

    # Si las cajas son enteros, convertir a flotantes
    if indices.dtype.kind == "i":
        indices = indices.astype("float")

    # Inicializar la lista de índices seleccionados
    pick = []

    # Tomar las coordenadas de los cuadros
    x1 = np.array([indices[i,0] for i in range(indices.shape[0])])
    y1 = np.array([indices[i,1] for i in range(indices.shape[0])])
    x2 = np.array([indices[i,0]+Ni for i in range(indices.shape[0])])
    y2 = np.array([indices[i,1]+Nj for i in range(indices.shape[0])])

    # Calcula el área de los cuadros y ordena los cuadros
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = np.argsort(y2)

    # Mientras todavía hay índices en la lista de índices
    while len(idxs) > 0:
        # Toma el último índice de la lista y agrega el índice a la lista de seleccionados
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)

        # Encontrar las coordenadas (x, y) más grandes para el inicio de la caja y las coordenadas (x, y) más pequeñas para el final de la caja
        xx1 = np.maximum(x1[i], x1[idxs[:last]])
        yy1 = np.maximum(y1[i], y1[idxs[:last]])
        xx2 = np.minimum(x2[i], x2[idxs[:last]])
        yy2 = np.minimum(y2[i], y2[idxs[:last]])

        # Calcula el ancho y alto de la caja
        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)

        # Calcula la proporción de superposición
        overlap = (w * h) / area[idxs[:last]]

        # Elimina todos los índices del índice de lista que tienen una proporción de superposición mayor que el umbral proporcionado
        idxs = np.delete(idxs, np.concatenate(([last], np.where(overlap > overlapThresh)[0])))

    # Devuelve solo las cajas seleccionadas
    return indices[pick].astype("int")

In [None]:
# True Positive Rate
def tpr_scorer(clf, X, y):
  y_pred = clf.predict(X)
  cm = confusion_matrix(y, y_pred)
  tpr = cm[1,1]/(cm[1,1]+cm[1,0])
  return tpr

# False Positive Rate
def fpr_scorer(clf, X, y):
  y_pred = clf.predict(X)
  cm = confusion_matrix(y, y_pred)
  fpr = cm[0,1]/(cm[0,0]+cm[0,1])
  return fpr

# True Negative Rate
def tnr_scorer(clf, X, y):
  y_pred = clf.predict(X)
  cm = confusion_matrix(y, y_pred)
  tnr = cm[0,0]/(cm[0,0]+cm[0,1])
  return tnr

# True Negative Rate
def fnr_scorer(clf, X, y):
  y_pred = clf.predict(X)
  cm = confusion_matrix(y, y_pred,)
  fnr = cm[1,0]/(cm[1,0]+cm[1,1])
  return fnr


In [None]:
# Define una función para realizar una ventana deslizante (sliding window) sobre una imagen.
def sliding_window(img,
                   patch_size=(62,47),  # Define el tamaño del parche (patch) basado en el primer parche positivo por defecto
                   istep=2,  # Paso de desplazamiento en la dirección i (verticalmente)
                   jstep=2,  # Paso de desplazamiento en la dirección j (horizontalmente)
                   scale=1.0):  # Factor de escala para ajustar el tamaño del parche

    # Calcula las dimensiones Ni y Nj del parche ajustadas por el factor de escala.
    Ni, Nj = (int(scale * s) for s in patch_size)

    # Itera a lo largo de la imagen en la dirección i
    for i in range(0, img.shape[0] - Ni, istep):
        # Itera a lo largo de la imagen en la dirección j
        for j in range(0, img.shape[1] - Ni, jstep):

            # Extrae el parche de la imagen usando las coordenadas actuales i, j.
            patch = img[i:i + Ni, j:j + Nj]

            # Si el factor de escala es diferente de 1, redimensiona el parche al tamaño original del parche.
            if scale != 1:
                patch = resize(patch, patch_size)

            # Usa yield para devolver las coordenadas actuales y el parche.
            # Esto convierte la función en un generador.
            yield (i, j), patch

## Dataset de rostros (LFW)

In [None]:
# Cargamos el dataset
faces = fetch_lfw_people()
positive_patches = faces.images
positive_patches.shape

In [None]:
# Dividimos en train y test
positive_patches_train, positive_patches_test = train_test_split(
    positive_patches,
    test_size=0.1,
    random_state=42
)

##Para subir las fotos de la siguiente celda

In [None]:
from google.colab import files
uploaded = files.upload()


## Dataset de fondos

In [None]:
# Tomamos algunas imágenes de sklearn
imgs = ['camera',
        'text',
        'coins',
        'moon',
        'page',
        'clock',
        'immunohistochemistry',
        'chelsea',
        'coffee',
        'hubble_deep_field'
        ]

backgrounds = []
for name in imgs:
    img = getattr(data, name)()
    if len(img.shape) == 3 and img.shape[2] == 3:  # Chequeamos si la imagen es RGB
        img = color.rgb2gray(img)
    backgrounds.append(img)

for i in range(31):
    filename = str(i)+'.jpg'
    img = plt.imread(filename)
    img = color.rgb2gray(img)
    backgrounds.append(img)

print(len(backgrounds))

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Definimos los datos y las HOG a usar en el experimento

In [None]:
resolution = 1
scales = [0.5,1,2,4,8]
proportion_train = 10
proportion_test = 100
num_patches_train = int((proportion_train * len(positive_patches_train))/(len(scales) * len(backgrounds)))
num_patches_test = int((proportion_test * len(positive_patches_test))/(len(scales) * len(backgrounds)))

orientations = 3
pixels_per_cell = (8, 8)
cells_per_block = (3, 3)

experiment_name = '_R_' + str(resolution)
experiment_name += '_S_' + str(scales)
experiment_name += '_PTrain_' + str(proportion_train)
experiment_name += '_PTest_' + str(proportion_test)
experiment_name += '_O_' + str(orientations)
experiment_name += '_C_' + str(pixels_per_cell)
experiment_name += '_B_' + str(cells_per_block)

print(experiment_name)

In [None]:
# Tamaño de las imágenes de rostros

# Train
positive_patches_train = np.array(
    [rescale(positive_patches_train[i], resolution)
    for i in tqdm(range(len(positive_patches_train)))]
    )

# Test
positive_patches_test = np.array(
    [rescale(positive_patches_test[i], resolution)
    for i in tqdm(range(len(positive_patches_test)))]
    )

In [None]:
size = positive_patches[0].shape


In [None]:
# Extraemos las imágenes de fondo

# Train
negative_patches_train = np.vstack(
    [extract_patches(im, num_patches_train, scale, random_state=42)
    for im in tqdm(backgrounds, desc='Procesando imágenes train')
    for scale in scales]
    )

# Test
negative_patches_test = np.vstack(
    [extract_patches(im, num_patches_test, scale, random_state=0)
    for im in tqdm(backgrounds, desc='Procesando imágenes test')
    for scale in scales]
    )

In [None]:
# Armamos la matriz de features y el vector de etiquetas

# Train
X_train = np.array(
    [feature.hog(image=im,
                 orientations=orientations,
                 pixels_per_cell=pixels_per_cell,
                 cells_per_block=cells_per_block)
    for im in tqdm(chain(positive_patches_train, negative_patches_train))]
    )
y_train = np.zeros(X_train.shape[0])
y_train[:positive_patches_train.shape[0]] = 1

# Test
X_test = np.array(
    [feature.hog(image=im,
                 orientations=orientations,
                 pixels_per_cell=pixels_per_cell,
                 cells_per_block=cells_per_block)
    for im in tqdm(chain(positive_patches_test, negative_patches_test))]
    )
y_test = np.zeros(X_test.shape[0])
y_test[:positive_patches_test.shape[0]] = 1

In [None]:
print('Shape X_train: ', X_train.shape)
print('Shape y_train: ', y_train.shape)
print('Shape X_test: ', X_test.shape)
print('Shape y_test: ', y_test.shape)

In [None]:
# Def to execute a model
def executeModel(model, is_keras=False):
  if is_keras:
    # Para Keras, el fit ya no retorna el objeto model, por lo que se llama directamente
    model.fit(X_train, y_train, epochs=10, batch_size=10, verbose=0)
    # Keras utiliza el método `predict` para obtener las probabilidades directamente
    y_pred_proba = model.predict(X_test).ravel()
    # Para obtener las etiquetas predichas, se toma un umbral de decisión, comúnmente 0.5 para clasificación binaria
    y_pred = (y_pred_proba > 0.5).astype(int)
  else:
    # Para modelos de Scikit-learn
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:,1]

  # Métrcias
  acc = accuracy_score(y_test, y_pred)
  b_acc = balanced_accuracy_score(y_test, y_pred)
  prec = precision_score(y_test,y_pred,average='macro')
  rec = recall_score(y_test, y_pred, average='macro')
  f1 = f1_score(y_test, y_pred)
  auc = roc_auc_score(y_test, y_pred_proba)
  tpr = tpr_scorer(model, X_test, y_test)
  fpr = fpr_scorer(model, X_test, y_test)
  tnr = tnr_scorer(model, X_test, y_test)
  fnr = fnr_scorer(model, X_test, y_test)

  # Guardamos en un dataframe

  results = pd.DataFrame(
      data={
          'Métrica': ['Accuracy', 'Precision', 'Recall', 'F1', 'B_Accuracy', 'AUC', 'TPR', 'FPR', 'TNR', 'FNR'],
          'Valor': [acc, prec, rec, f1, b_acc, auc,tpr,fpr,tnr,fnr]
      }
    )

  display(results)

  # build csv
  model_name = str(model)

  results.to_csv(experiment_name + '_' + model_name + '.csv', header=False)
  files.download(experiment_name + '_' + model_name + '.csv')

  # auc graphs
  fig, ax = plt.subplots(1,2,figsize=(8, 8))

  fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
  gmean = np.sqrt(tpr * (1 - fpr))
  index = np.argmax(gmean)
  thresholdOpt = round(thresholds[index], ndigits = 4)
  fprOpt = round(fpr[index], ndigits = 4)
  tprOpt = round(tpr[index], ndigits = 4)

  ax[0].step(
      fpr,
      tpr,
      lw=1,
      alpha=1,
  )

  ax[0].plot(
      fprOpt,
      tprOpt,
      marker = 'o'
  )

  ax[0].set(
      xlim=[-0.05, 1.05],
      ylim=[-0.05, 1.05],
      xlabel="False Positive Rate",
      ylabel="True Positive Rate",
      title=f"Curva ROC",
  )
  ax[0].axis("square")

  ax[1].set_aspect('equal')
  ax[1].set_xlim([-0.05, 0.1])
  ax[1].set_xbound(lower=-0.05, upper=0.1)
  ax[1].set_ylim([0.85,1])
  ax[1].set_ybound(lower=0.85, upper=1.0)

  ax[1].step(
      fpr,
      tpr,
      lw=1,
      alpha=1,
  )

  ax[1].plot(
      fprOpt,
      tprOpt,
      marker = 'o'
  )

  ax[1].set(
      xlabel="False Positive Rate",
      ylabel="True Positive Rate",
      title=f"Zoom",
  )

  plt.tight_layout()
  plt.show()

  print(f'Umbral óptimo: {thresholdOpt}')
  print(f'FPR: {fprOpt}, TPR: {tprOpt}')


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve
from google.colab import files
import numpy as np

def executeKerasModel(model, X_train, y_train, X_test, y_test, epochs=10, batch_size=10):
    # Entrenar el modelo
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)

    # Hacer predicciones y calcular probabilidades
    y_pred_proba = model.predict(X_test).ravel()
    y_pred = (y_pred_proba > 0.5).astype(int)

    # Calcular métricas
    acc = accuracy_score(y_test, y_pred)
    b_acc = balanced_accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='macro')
    rec = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_pred_proba)

    # Guardar resultados en un dataframe
    results = pd.DataFrame(
      data={
          'Métrica': ['Accuracy', 'Precision', 'Recall', 'F1', 'B_Accuracy', 'AUC'],
          'Valor': [acc, prec, rec, f1, b_acc, auc]
      }
    )

    display(results)

    # build csv
    model_name = str(model)

    results.to_csv(experiment_name + '_' + model_name + '.csv', header=False)
    files.download(experiment_name + '_' + model_name + '.csv')

    # Generar gráficos de la curva ROC
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
    gmean = np.sqrt(tpr * (1 - fpr))
    index = np.argmax(gmean)
    thresholdOpt = round(thresholds[index], ndigits = 4)
    fprOpt = round(fpr[index], ndigits = 4)
    tprOpt = round(tpr[index], ndigits = 4)

    fig, ax = plt.subplots(1,2,figsize=(8, 8))
    ax[0].step(fpr, tpr, lw=1, alpha=1)
    ax[0].plot(fprOpt, tprOpt, marker='o')
    ax[0].set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05], xlabel="False Positive Rate", ylabel="True Positive Rate", title="Curva ROC")
    ax[0].axis("square")

    ax[1].set_aspect('equal')
    ax[1].set_xlim([-0.05, 0.1])
    ax[1].set_xbound(lower=-0.05, upper=0.1)
    ax[1].set_ylim([0.85,1])
    ax[1].set_ybound(lower=0.85, upper=1.0)
    ax[1].step(fpr, tpr, lw=1, alpha=1)
    ax[1].plot(fprOpt, tprOpt, marker='o')
    ax[1].set(xlabel="False Positive Rate", ylabel="True Positive Rate", title="Zoom")
    plt.tight_layout()
    plt.show()

    print(f'Umbral óptimo: {thresholdOpt}')
    print(f'FPR: {fprOpt}, TPR: {tprOpt}')

# Uso de la función
# model = [tu modelo de Keras definido aquí]
# executeKerasModel(model, X_train, y_train, X_test, y_test, 'experimento_1')




## Aca ejecutamos los modelos

In [None]:
# run logistic regression
model = LogisticRegression(C=1, max_iter=1000)
executeModel(model)

In [None]:
# run decision tree
model = DecisionTreeClassifier(criterion = 'entropy', max_depth=11, random_state=42)
executeModel(model)

In [None]:
# run Adaptive Boosting
model = AdaBoostClassifier(n_estimators=100, learning_rate=0.2)
executeModel(model)

In [None]:
# run Gradient Boosting
model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.2, n_iter_no_change=5, tol=1e-4, validation_fraction=0.1)
executeModel(model)

In [None]:
# run Bagging
model = BaggingClassifier(n_estimators=10)
executeModel(model)

In [None]:
# run deep learning

input_dimension = X_train.shape[1]

model = Sequential([
    Dense(64, input_dim=input_dimension, activation='relu'),
    Dense(16, input_dim=input_dimension, activation='relu'),
    Dense(1, activation='sigmoid'),
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
executeKerasModel(model, X_train, y_train, X_test, y_test)


In [None]:
# run random forest
model = RandomForestClassifier(n_estimators=100, max_depth=5)
executeModel(model)

## Test con varios rostros


In [None]:
test_image = plt.imread('Central.jpg')
test_image = color.rgb2gray(test_image)
test_image = rescale(test_image,0.5)
test_image.shape

In [None]:
# Visualizamos la imagen
# Buscamos la escala de los rostros
fig, ax = plt.subplots()
ax.imshow(test_image, cmap='gray')

scale = 0.6
Ni, Nj = (int(scale * s) for s in size)

ax.add_patch(plt.Rectangle((0, 0), Nj, Ni, edgecolor='red', alpha=1, lw=1, facecolor='none'))
plt.show()

In [None]:
# Utiliza la función de ventana deslizante en una imagen de prueba.
# zip(*...) toma las tuplas generadas y las descompone en índices y parches.
indices, patches = zip(*sliding_window(test_image, scale=scale))

# Calcula las características HOG para cada parche y las almacena en un array.
patches_hog = np.array([feature.hog(patch,
                                    orientations=orientations,
                                    pixels_per_cell=pixels_per_cell,
                                    cells_per_block=cells_per_block) for patch in patches])

# Muestra la forma del array de características HOG.
patches_hog.shape

In [None]:
# Predicción
labels = model.predict(patches_hog).astype(int)
labels.sum()

In [None]:
Ni, Nj = (int(scale*s) for s in size)
indices = np.array(indices)
detecciones = indices[labels == 1]
detecciones = non_max_suppression(np.array(detecciones),Ni,Nj, 0.3)

# Visualizamos las detecciones
fig, ax = plt.subplots()
ax.imshow(test_image, cmap='gray')
ax.axis('off')

for i, j in detecciones:
    ax.add_patch(plt.Rectangle((j, i), Nj, Ni, edgecolor='red',
                               alpha=1, lw=1, facecolor='none'))

plt.savefig('test_central.png')

## Guardamos el modelo

In [None]:
from joblib import dump, load

dump(model, "BestModel" + '.joblib')