# **Trabajo Práctico Final - Visión por Computadora - 2024**

**Sistema de clasificacion de animales**

---
**Alumno: Gustavo Fontana**

**Legajo: F-3749/4**



## **Librerías**

In [None]:
!pip install ultralytics
!pip install fiftyone
!pip install comet_ml
!pip install ultralytics --upgrade
!pip install torch torchvision torchaudio

# Clonar el repositorio de YOLOv5
!git clone https://github.com/ultralytics/yolov5.git
%cd yolov5

# Instalar dependencias del repositorio
!pip install -r requirements.txt

In [1]:
#from ultralytics import YOLO
import json
import torch
from comet_ml import Experiment
import tensorflow as tf
from sklearn.utils.class_weight import compute_class_weight
from keras.layers import Input, BatchNormalization, Flatten, Dropout
from tensorflow.keras.layers import Dense, Conv2D, Activation, MaxPooling2D, GlobalMaxPooling2D, Rescaling
from tensorflow.python.keras.callbacks import EarlyStopping, ModelCheckpoint
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import gdown
import zipfile

## **Dataset**

In [3]:
# Descargar el archivo
file_id = '1XPFQgqnRm915SONRgWPSrZaUAfz1lQFn'
destination = '/content/dataset.zip'

gdown.download(f'https://drive.google.com/uc?id={file_id}', destination, quiet=False)

# Descomprimir y guardar en el entorno
with zipfile.ZipFile(destination, 'r') as zip_ref:
    zip_ref.extractall('/content/dataset')

Downloading...
From (original): https://drive.google.com/uc?id=1XPFQgqnRm915SONRgWPSrZaUAfz1lQFn
From (redirected): https://drive.google.com/uc?id=1XPFQgqnRm915SONRgWPSrZaUAfz1lQFn&confirm=t&uuid=6a497bbe-77e4-4b05-9f83-7a85654014f0
To: /content/dataset.zip
100%|██████████| 2.82G/2.82G [00:20<00:00, 135MB/s]


In [4]:
classifiction_dataset = '/content/dataset'

In [5]:
# Tamaño del lote (batch size)
batch_size = 32

# Tamaño de las imágenes
image_height = 416
image_width = 416

# Número de clases
num_classes = 38

In [6]:
# Divisiones del dataset

# Conjunto de entrenamiento
train_dataset = tf.keras.utils.image_dataset_from_directory(
    classifiction_dataset,
    validation_split=0.3,
    subset="training",
    seed=123,
    image_size=(image_height, image_width),
    batch_size=32,
    label_mode="categorical"
)

# Conjunto de validación
val_dataset = tf.keras.utils.image_dataset_from_directory(
    classifiction_dataset,
    validation_split=0.15,
    subset="validation",
    seed=123,
    image_size=(image_height, image_width),
    batch_size=32,
    label_mode="categorical"
)

# Conjunto de prueba
test_dataset = tf.keras.utils.image_dataset_from_directory(
    classifiction_dataset,
    validation_split=0.15,
    subset="validation",
    seed=123,
    image_size=(image_height, image_width),
    batch_size=32,
    label_mode="categorical"
)

Found 8301 files belonging to 38 classes.
Using 5811 files for training.
Found 8301 files belonging to 38 classes.
Using 1245 files for validation.
Found 8301 files belonging to 38 classes.
Using 1245 files for validation.


## **Información del dataset**

In [7]:
# Obtener los nombres de las clases
class_names = train_dataset.class_names
num_classes = len(class_names)

print(f'El total de clases del dataset es {num_classes}')
print('Las clases son:')
for class_ in class_names:
  print(f' {class_}')

El total de clases del dataset es 38
Las clases son:
 Bear
 Bull
 Camel
 Cheetah
 Chicken
 Crocodile
 Deer
 Duck
 Elephant
 Fox
 Frog
 Goat
 Goose
 Hippopotamus
 Horse
 Jaguar
 Kangaroo
 Koala
 Leopard
 Lion
 Lizard
 Lynx
 Monkey
 Owl
 Panda
 Penguin
 Pig
 Rabbit
 Raccoon
 Raven
 Rhinoceros
 Sheep
 Snake
 Spider
 Squirrel
 Tiger
 Tortoise
 Zebra


### **Balance del dataset**

In [11]:
# Obtener la frecuencias de las clases
labels_freq = {}

# Itera sobre cada subdirectorio en TRAIN_DIRECTORY
for class_ in os.listdir(classifiction_dataset):
    class_path = os.path.join(classifiction_dataset, class_)

    if os.path:
        labels_freq[class_] = len(os.listdir(class_path))

# Crear el DataFrame a partir del diccionario
df_frequencies = pd.DataFrame(list(labels_freq.items()), columns=['Clase', 'Frec_abs'])
df_frequencies['Frec_rel'] = round(df_frequencies['Frec_abs']/sum(df_frequencies['Frec_abs']),2)

In [12]:
# Frecuncias por clase
df_frequencies

Unnamed: 0,Clase,Frec_abs,Frec_rel
0,Bear,87,0.01
1,Bull,47,0.01
2,Camel,67,0.01
3,Cheetah,132,0.02
4,Chicken,388,0.05
5,Crocodile,108,0.01
6,Duck,542,0.07
7,Elephant,155,0.02
8,Frog,1,0.0
9,Fox,148,0.02


## **Modelos**

### **Modelo de clasificacion**

In [8]:
# Aplicar caché, mezcla y precarga para el conjunto de entrenamiento
train_prefetch = train_dataset.cache().shuffle(5811).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

# Aplicar caché y precarga para el conjunto de validación
val_prefetch = val_dataset.cache().prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

In [27]:
# Construir el modelo
def build_model(input_shape, num_classes):
    base_model = tf.keras.applications.EfficientNetB0(input_shape=input_shape,
                                                      include_top=False,
                                                      weights='imagenet')
    i = Input(shape=input_shape)
    base_model.trainable = False

    x = Rescaling(1./255)(i)

    x = tf.keras.applications.efficientnet.preprocess_input(i)

    x = base_model(x, training=False)

    # Agregar capas convolucionales adicionales
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((1, 1))(x)

    x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((1, 1))(x)

    # Global Max Pooling
    x = GlobalMaxPooling2D()(x)
    x = Dropout(0.6)(x)
    x = BatchNormalization()(x)

    x = Dense(num_classes, activation='softmax')(x)

    model = tf.keras.Model(i, x)

    model.compile(
    optimizer='adam',
    loss="categorical_crossentropy",
    metrics=['accuracy'])

    return model

In [31]:
# Obtener las clases del dataset de entrenamiento
train_labels = np.concatenate([y.numpy() for x, y in train_dataset], axis=0)
train_labels = np.argmax(train_labels, axis=1)

# Calcular los pesos de clase
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_labels),
    y=train_labels
)

# Crear un diccionario de pesos por clase
class_weight_dict = dict(zip(np.unique(train_labels), class_weights))

In [None]:
print("Building model")

backbone_model = build_model((image_height, image_width, 3), num_classes)

# Callback
early_stopping = EarlyStopping(monitor='val_accuracy', patience=15, verbose=0, mode='max')

# Número de épocas de entrenamiento
epochs = 100

# Entrena el modelo
backbone_model_history = backbone_model.fit(
    train_prefetch,
    validation_data=val_prefetch,
    epochs=epochs,
    batch_size=batch_size,
    callbacks=[early_stopping],
    class_weight=class_weight_dict
)

In [None]:
# Rutas para guardar el modelo entrenado
path_to_save_model = '/content/drive/MyDrive/Vision 2'

# Guardar el modelo completo
backbone_model.save(f'{path_to_save_model}/backbone_model.keras')

# Guardar el history en formato JSON
with open(f'{path_to_save_model}/backbone_model_history.json', 'w') as f:
    json.dump(backbone_model_history.history, f)

In [None]:
# Cargar el modelo guardado
loaded_model = tf.keras.models.load_model('/content/drive/MyDrive/Vision 2/backbone_model.keras')

### **Modelo de detección**

In [None]:
# Api para el seguimiento de la evolución del modelo
os.environ['COMET_API_KEY'] = ''

In [None]:
# Entrenar el modelo GPU
!python train.py \
--img 416 \
--batch 32 \
--epochs 250 \
--data /content/drive/MyDrive/Vision/eval/train_config.yml \
--weights yolov5n.pt \
--device 0 \
--project /content/drive/MyDrive/Vision/model \
--name animal_detector \
--cache \