# Reporte y Entrenamiento del Modelo para el taller 1

## Samuel Lopera Torres

## Inteligencia Artificial

## Escuela de Ciencias e Ingenieria

### **Sobre los Hiperparametros**

Tras varios experimentos, se definio la utilizacion de un learning rate de 0.1, 50 generaciones para el entrenamiento, y una probabilidad de dropout del 0.2, esto se debe que debido a la regularizacion del modelo, es algo mas resistente a cambios en la tasa de aprendizaje, y en los experimentos realizados utilizar una tasa menor resultaba en quedarse atascado en un minimo local para la funcion de costo, y utilizar tasas mayores y tiempos mayores de entrenamiento, resultaba en sobreentrenamiento (diferencia de 30 puntos porcentuales en el estimador de precision del modelo entre el dataset de entrenamiento y validacion).

### **Estructura de la Red**

La estructura de la red se inspira desde la estructura de redes profundas, buscando comprimir (en la medida de lo posible) el input inicial, con fin de facilitar el output de las clases para el modelo.

En terminos generales, la estructura es la siguiente.

Input (imagen aplanada en vector 150\*150) -> Capa Lineal (150\*150,11250) -> Batchnorm -> ReLu -> Dropout 0.2 ->
Capa Lineal (11250,5625) -> Capa Lineal (5625,1125) -> Capa Lineal (1125, 256) -> Batchnorm -> ReLu -> Dropout 0.2 ->
Capa Lineal (256,128) -> Capa Lineal (128,64) -> Batchnorm -> ReLu -> Dropout 0.2 ->
Capa Lineal (64,6) -> Output

Se toma en consideracion, que el modelo no va a presentar unas tasas de precision altas, debido a que la exclusiva utilizacion de capas lineales no es el acercamiento optimo a la clasificacion de imagenes,por lo que una precision mayor al 40% se considera un punto de parada decente para el modelo.

### **Otras Consideraciones**

- Para facilitar la lectura del codigo, las clases que permiten la implementacion de la red neuronal, estan en modulos en la carpeta raiz del proyecto, realizadas a mano durante la realizacion del minitorch workshop, presentado por el profesor como insumo para esta practica

- Los datos del modelo estan en la carpeta data (importados de forma manual debido a no haber sido realizado en kaggle.com)

In [1]:
import torch
import matplotlib.pyplot as plt
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets
from tqdm import tqdm
import numpy as np
from torch.utils.data import DataLoader, random_split, Subset, Dataset
from torch.utils.data import random_split
from Net import *
from Linear import *
from CrossEntropyFromLogits import *
from ReLU import *
from Dropout import *
from BatchNorm import *

# Define transform
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Convert to grayscale
    transforms.Resize((150, 150)),                # Resize images to 150x150
    transforms.ToTensor(),                        # Convert images to tensors
    transforms.Normalize(mean=[0.5], std=[0.5])   # Normalize images to [0, 1]
])

# Load full train dataset
train_path = 'data/seg_train/seg_train'
full_trainset = datasets.ImageFolder(root=train_path, transform=transform)

# Split into train and validation
train_size = int(0.8 * len(full_trainset))
val_size = len(full_trainset) - train_size
trainset, valset = random_split(full_trainset, [train_size, val_size])

# DataLoaders
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=64, shuffle=False)

# Test data
testpath = 'data/seg_test/seg_test'
testset = datasets.ImageFolder(root=testpath, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Forcing the device to CPU (this line will override the previous check)
print(device)

# Define the number of input features and output classes
n_features = 150*150
n_classes = 6

net = Net()

net.add(Linear(n_features, 11250, device=device))
#Complete Layer
net.add(BatchNorm1D(n_features=11250, device= device))
net.add(ReLU(device = device))
net.add(Dropout(p = 0.2,device= device))
#Complete Layer
net.add(Linear(11250, 5625, device=device))
net.add(Linear(5625, 1125, device=device))
net.add(Linear(1125, 256, device=device))
#Complete Layer
net.add(BatchNorm1D(n_features=256, device= device))
net.add(ReLU(device = device))
net.add(Dropout(p = 0.2,device= device))
#Complete Layer
net.add(Linear(256,128,device=device))
net.add(Linear(128, 64, device=device))
#Complete Layer
net.add(BatchNorm1D(n_features=64, device= device))
net.add(ReLU(device = device))
net.add(Dropout(p = 0.2,device= device))
#Complete Layer
net.add(Linear(64, n_classes, device=device))

CELoss = CrossEntropyFromLogits()

num_epochs = 50
learning_rate = 0.1
history = {"train_loss": [], "train_acc": [], "val_loss": [], "val_acc": []}
batch_losses = []  # per-batch losses for the plot

for epoch in range(1, num_epochs + 1):
    # -------- TRAIN --------
    if hasattr(net, "train"): net.train()
    running_loss, tot_correct, tot_samples = 0.0, 0, 0
    total_batches = len(trainloader)

    pbar = tqdm(trainloader, desc=f"Epoch {epoch}/{num_epochs} [Train]")
    for batch_idx, (images, labels) in enumerate(pbar, 1):
        X = images.view(images.size(0), -1).to(device)
        Y = labels.to(device)

        # Forward
        Z = net.forward(X)
        loss = CELoss.forward(Z, Y)

        # Backward + update (manual autograd)
        dZ = CELoss.backward(n_classes)
        _ = net.backward(dZ)
        net.update(learning_rate)

        # Stats
        running_loss += loss.item()
        batch_losses.append(loss.detach().cpu().item())
        _, predicted = torch.max(Z, 1)
        tot_correct += (predicted == Y).sum().item()
        tot_samples += Y.size(0)

        if batch_idx % max(1, total_batches // 10) == 0:
            pbar.set_postfix(loss=f"{loss.item():.4f}",
                             acc=f"{tot_correct / max(1, tot_samples):.4f}")

    train_loss = running_loss / total_batches
    train_acc = tot_correct / tot_samples

    # -------- VALIDATION --------
    if hasattr(net, "eval"): net.eval()
    val_running_loss, val_correct, val_samples = 0.0, 0, 0
    with torch.no_grad():
        for images, labels in tqdm(valloader, desc=f"Epoch {epoch}/{num_epochs} [Val]"):
            X = images.view(images.size(0), -1).to(device)
            Y = labels.to(device)

            Z = net.forward(X)
            vloss = CELoss.forward(Z, Y)
            val_running_loss += vloss.item()

            _, predicted = torch.max(Z, 1)
            val_correct += (predicted == Y).sum().item()
            val_samples += Y.size(0)

    val_loss = val_running_loss / len(valloader)
    val_acc = val_correct / val_samples

    history["train_loss"].append(train_loss)
    history["train_acc"].append(train_acc)
    history["val_loss"].append(val_loss)
    history["val_acc"].append(val_acc)

    print(f"Epoch {epoch}/{num_epochs} | "
          f"Train Loss: {train_loss:.4f} Acc: {train_acc:.4f} | "
          f"Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}")

# -------- OPTIONAL TEST --------
if 'testloader' in globals() and testloader is not None:
    if hasattr(net, "eval"): net.eval()
    test_correct, test_samples, test_running_loss = 0, 0, 0.0
    with torch.no_grad():
        for images, labels in tqdm(testloader, desc="[Test]"):
            X = images.view(images.size(0), -1).to(device)
            Y = labels.to(device)
            Z = net.forward(X)
            loss = CELoss.forward(Z, Y)
            test_running_loss += loss.item()
            _, pred = torch.max(Z, 1)
            test_correct += (pred == Y).sum().item()
            test_samples += Y.size(0)
    test_loss = test_running_loss / len(testloader)
    test_acc = test_correct / test_samples
    print(f"[Test] Loss: {test_loss:.4f} | Acc: {test_acc:.4f}")

# -------- PLOTS --------
plt.figure(); plt.plot(np.array(batch_losses))
plt.xlabel('Batch'); plt.ylabel('Loss'); plt.title('Training Loss (per batch)'); plt.show()

plt.figure(); plt.plot(history["train_loss"], label='Train'); plt.plot(history["val_loss"], label='Val')
plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.legend(); plt.title('Loss per Epoch'); plt.show()

plt.figure(); plt.plot(history["train_acc"], label='Train'); plt.plot(history["val_acc"], label='Val')
plt.xlabel('Epoch'); plt.ylabel('Accuracy'); plt.legend(); plt.title('Accuracy per Epoch'); plt.show()

KeyboardInterrupt: 

Como se puede observar en el output del entrenamiento, el modelo llega a una precision mayor al 40% en los sets de prueba y validacion, pese a que las graficas de la funcion de perdida y de la precision indican que utilizar una tasa de entrenamiento menor pudo haber sido beneficioso, se considera este rendimiento lo suficientemente bueno para el modelo.

In [9]:
import os
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torch

# Root directory
image_directory = 'data/seg_pred/seg_pred'

# ✅ Your custom list of images
image_files = ['6234.jpg', '22288.jpg', '20529.jpg', '21440.jpg', '5982.jpg', '7737.jpg', '2081.jpg',
               '10054.jpg', '3919.jpg', '21838.jpg', '6399.jpg', '9960.jpg', '3757.jpg', '9131.jpg',
               '9062.jpg', '4489.jpg', '3417.jpg', '6074.jpg', '7894.jpg', '10305.jpg', '4407.jpg',
               '17720.jpg', '15979.jpg', '8983.jpg', '9697.jpg', '21590.jpg', '2628.jpg', '22604.jpg',
               '16202.jpg', '3363.jpg', '13333.jpg', '14395.jpg', '6943.jpg', '3228.jpg', '12132.jpg',
               '8075.jpg', '6365.jpg', '6459.jpg', '7690.jpg', '21966.jpg', '23566.jpg', '13087.jpg',
               '4772.jpg', '21145.jpg', '6925.jpg', '10201.jpg', '15764.jpg', '7918.jpg', '16401.jpg',
               '20789.jpg', '11617.jpg', '23258.jpg', '19651.jpg', '6800.jpg', '14376.jpg', '20321.jpg',
               '12267.jpg', '18227.jpg', '4765.jpg', '22270.jpg', '21588.jpg', '6209.jpg', '5068.jpg',
               '11529.jpg', '6229.jpg', '1749.jpg', '15360.jpg', '1995.jpg', '24068.jpg', '18048.jpg',
               '12334.jpg', '20429.jpg', '3537.jpg', '21946.jpg', '2278.jpg', '11901.jpg', '12675.jpg',
               '20308.jpg', '18399.jpg', '8929.jpg', '1971.jpg', '6248.jpg', '16993.jpg', '1383.jpg',
               '9887.jpg', '20381.jpg', '20760.jpg', '11087.jpg', '6513.jpg', '18013.jpg', '22118.jpg',
               '19382.jpg', '11848.jpg', '9486.jpg', '6493.jpg', '5734.jpg', '23082.jpg', '6797.jpg',
               '20987.jpg', '18935.jpg', '6794.jpg', '13614.jpg', '11682.jpg', '19792.jpg', '22945.jpg',
               '21493.jpg', '15578.jpg', '7516.jpg', '12205.jpg', '6952.jpg', '12941.jpg', '6059.jpg',
               '24065.jpg', '23412.jpg', '22798.jpg', '13598.jpg', '19727.jpg', '5882.jpg', '20042.jpg',
               '2532.jpg', '11849.jpg', '22108.jpg', '785.jpg', '2213.jpg', '18482.jpg', '18708.jpg',
               '9336.jpg', '1777.jpg', '16360.jpg', '11895.jpg', '6924.jpg', '212.jpg', '19998.jpg',
               '22445.jpg', '19165.jpg', '12838.jpg', '792.jpg', '2057.jpg', '21582.jpg', '17763.jpg',
               '16406.jpg', '14737.jpg', '5158.jpg', '23924.jpg', '19596.jpg', '19079.jpg', '6917.jpg',
               '10651.jpg', '10880.jpg', '17832.jpg', '2094.jpg', '16519.jpg', '2302.jpg', '21495.jpg',
               '12421.jpg', '20578.jpg', '14919.jpg', '14476.jpg', '23881.jpg', '21809.jpg', '5458.jpg',
               '2028.jpg', '14539.jpg', '4166.jpg', '1173.jpg', '3729.jpg', '6.jpg', '6941.jpg',
               '8369.jpg', '13476.jpg', '7986.jpg', '20990.jpg', '1211.jpg', '8979.jpg', '8852.jpg',
               '12464.jpg', '17836.jpg', '18626.jpg', '6715.jpg', '149.jpg', '22422.jpg', '23415.jpg',
               '11422.jpg', '11678.jpg', '1644.jpg', '4494.jpg', '14730.jpg', '20839.jpg', '2640.jpg',
               '436.jpg', '17103.jpg', '12141.jpg', '19654.jpg', '4439.jpg', '76.jpg', '21585.jpg',
               '2844.jpg', '15027.jpg', '9257.jpg', '23424.jpg']

# Custom dataset
class SelectedFilesDataset(Dataset):
    def __init__(self, root_dir, file_names, transform=None):
        self.root_dir = root_dir
        self.file_names = file_names
        self.transform = transform

    def __len__(self):
        return len(self.file_names)

    def __getitem__(self, idx):
        file_path = os.path.join(self.root_dir, self.file_names[idx])
        image = Image.open(file_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, self.file_names[idx]

# Build dataset/loader
comp_test_dataset = SelectedFilesDataset(image_directory, image_files, transform=transform)
comp_test_loader = DataLoader(comp_test_dataset, batch_size=100, shuffle=False)

# Predict
preds, ids = [], []
with torch.no_grad():
    for images, names in comp_test_loader:
        X = images.view(images.shape[0], -1).to(device)
        Z = net.forward(X)
        _, predicted = torch.max(Z, 1)
        preds.extend(predicted.detach().cpu().numpy())
        ids.extend(names)

# Save to CSV
df = pd.DataFrame({'id': ids, 'pred': preds})
df.to_csv("predictions.csv", index=False)
print("✅ CSV file 'predictions.csv' created successfully.")


✅ CSV file 'predictions.csv' created successfully.
