## Instalación de librerías y packages

In [6]:
!pip install wandb -qU
!pip install torcheval-nightly
!pip install torchinfo
!pip install torchmetrics
!pip install onnxruntime
!pip install onnxscript
!pip install onnx
!pip install torchvision
!pip install matplotlib
!pip install scikit-learn



Importación de librerías y gpu

In [7]:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import wandb
import time
import onnxruntime
import shutil
import os

from torch.cuda.amp import GradScaler, autocast 

from torchinfo import summary
from torchmetrics import Accuracy
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
import torch
print("CUDA disponible:", torch.cuda.is_available())
print("Número de GPUs:", torch.cuda.device_count())
print("Nombre de la GPU:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "N/A")

CUDA disponible: True
Número de GPUs: 1
Nombre de la GPU: NVIDIA GeForce RTX 3070 Laptop GPU


## Carga del dataset y pre-procesamiento de imágenes

In [9]:
path = 'FloreView_split'

In [None]:
#Split de las fotos apartir de la carpeta original FloreView

INPUT_DIR = "FloreView"
OUTPUT_DIR = "FloreView_split"
train_ratio = 0.7  # 70% train

# Crear carpetas destino
for split in ["train", "test"]:
    os.makedirs(os.path.join(OUTPUT_DIR, split), exist_ok=True)

# Recorrer clases
for class_name in os.listdir(INPUT_DIR):
    class_path = os.path.join(INPUT_DIR, class_name)
    if not os.path.isdir(class_path):
        continue

    images = [f for f in os.listdir(class_path) if f.lower().endswith(('.jpg', '.jpeg', '.JPG',".JPEG"))]
    train_imgs, test_imgs = train_test_split(images, train_size=train_ratio, random_state=42)

    for split_name, split_imgs in zip(["train", "test"], [train_imgs, test_imgs]):
        split_class_dir = os.path.join(OUTPUT_DIR, split_name, class_name)
        os.makedirs(split_class_dir, exist_ok=True)

        for img in split_imgs:
            src = os.path.join(class_path, img)
            dst = os.path.join(split_class_dir, img)
            shutil.copy2(src, dst)

print(" División completada y archivos copiados a 'FloreView_split'")


In [11]:
#Cambio de extension a .png para homogeneidad

import os
from PIL import Image

base_path = 'FloreView_split'

# Recorre tanto train_split como test_split
for split in ['train_split', 'test_split']:
    split_path = os.path.join(base_path, split)

    for root, dirs, files in os.walk(split_path):
        for file in files:
            # Verifica si es una imagen (puedes ajustar las extensiones según tus datos)
            if file.lower().endswith(('.jpeg', '.JPG', '.jpg')):
                img_path = os.path.join(root, file)
                img = Image.open(img_path).convert('RGB')  # Convertir a RGB por compatibilidad
                new_name = os.path.splitext(file)[0] + '.png'
                new_path = os.path.join(root, new_name)

                # Guardar en formato .png
                img.save(new_path, 'PNG')
                os.remove(img_path)  # Eliminar archivo original si deseas

print("Conversión completada.")

Conversión completada.


In [12]:
#Crear el dataset en local con ImageFolder

transform_train = transforms.Compose([
    transforms.Resize((512,512)),  # O el tamaño que necesites
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.Resize((512,512)),  # O el tamaño que necesites
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Cargar datasets
train_dataset = ImageFolder(root='FloreView_split/train_split', transform=transform_train)
test_dataset = ImageFolder(root='FloreView_split/test_split', transform=transform_test)

# Cargar con DataLoader
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

# Ver clases
print("Clases:", train_dataset.classes)


Clases: ['Apple_iPhone12', 'Apple_iPhone13mini', 'Apple_iPhone8Plus', 'Apple_iPhoneSE', 'Apple_iPhoneX', 'DOOGEE_S96Pro', 'Google_Pixel3a', 'Google_Pixel5', 'Huawei_Mate10Lite', 'Huawei_Mate10Pro', 'Huawei_Nova5T', 'Huawei_P30Lite', 'Huawei_P8Lite', 'Huawei_P9Lite', 'LG_G4c', 'LG_G7ThinQ', 'LG_V50ThinQ', 'Motorola_MotoG', 'Motorola_MotoG5', 'Motorola_MotoG5SPlus', 'Motorola_MotoG9Plus', 'OnePlus_6T', 'OnePlus_8T', 'Samsung_GalaxyA12', 'Samsung_GalaxyA40', 'Samsung_GalaxyA52s', 'Samsung_GalaxyNote8', 'Samsung_GalaxyS10', 'Samsung_GalaxyS10+', 'Samsung_GalaxyS20+', 'Samsung_GalaxyS21+', 'Samsung_GalaxyS6', 'Sony_XperiaM2', 'Xiaomi_MiA2Lite', 'Xiaomi_MiMix3', 'Xiaomi_Redmi5Plus', 'Xiaomi_RedmiNote8', 'Xiaomi_RedmiNote8T', 'Xiaomi_RedmiNote9']


## Definición del modelo (Resnet50 Modificado)



In [None]:
class Model(torch.nn.Module):
  # se congelan las capas convolucionales y los pesos pre entrenados se mantienen
    def __init__(self, outputs = 39, pretrained = True, freeze = True):
      super().__init__()

      resnet50 = torchvision.models.resnet50(pretrained = pretrained)
      # se descargó resnet50 pre-entrenado
      self.resnet50 = torch.nn.Sequential(*list(resnet50.children())[:-1])

      if freeze:
        for param in self.resnet50.parameters():
          param.requires_grad=False
      self.fc = torch.nn.Linear(2048, 512)
      self.fc1 = torch.nn.Linear(512, 256)
      self.fc2 = torch.nn.Linear(256, 39)

    def forward(self, batch):
      batch = self.resnet50(batch)
      batch = batch.view(batch.shape[0], -1)
      batch = self.fc(batch)
      batch = self.fc1(batch)
      batch = self.fc2(batch)
      return batch


## Implementación y entrenamiento

In [None]:
def fit(model, trainloader, testloader, epochs, lr):

    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.CrossEntropyLoss()
    scaler = torch.amp.GradScaler()

    #Definición para el mejor modelo
    best_val_accuracy = 0.0
    best_model_path = 'best_model.pth'

    wandb.login(key="8b67cfcbdea25a891a0c70382c955f441f82941b")

    wandb.init(
    project="camera-identification",
    name="finetuning_resnet50",
    config={
        "dataset": "FloreView",
        "architecture": "Resnet50",
        "batch_size": 64,
        "epochs": 200,
        "optimizer": "Adam",
        "learning_rate": 3e-3
    }
)
    accuracy = Accuracy(num_classes=39, average='micro',task='multiclass').to(device)  # Micro promedio para el accuracy
    accuracy_class = Accuracy(num_classes=39, average=None,task='multiclass').to(device)

    #Criterios de convergencia
    patience = 10
    best_loss = float('inf')
    epochs_without_improvement = 0
    convergence_start_time = time.time()

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0

        for x, y in trainloader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()


            with torch.autocast(device_type=device, dtype=torch.float16):
                y_hat = model(x)
                loss = criterion(y_hat, y) #es un tensor escalar loss

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            train_loss +=loss.item()*x.size(0)

        # Calcular las métricas
        accT = accuracy(y_hat, y)
        accTC= accuracy_class(y_hat, y)

        wandb.log({"epoch": epoch, "accuracyTraining": accT, "lossTraining": train_loss})

        start_time_testing = time.time()
        model.eval()
        val_loss = 0.0

        for x, y in testloader:
            x, y = x.to(device), y.to(device)
            y_hat = model(x)
            loss = criterion(y_hat, y)
            val_loss+=loss.item()*x.size(0)

        end_time_testing = time.time()
        total_time = end_time_testing - start_time_testing
        iterations_per_second_testing = epochs / total_time #Cálculo de iteraciones por segundo para el testing loop

        if val_loss < best_loss:
          best_loss = val_loss
          epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1

        if epochs_without_improvement >= patience:
            print("Convergence reached.")
            break

        accV = accuracy(y_hat, y)
        accVC = accuracy_class(y_hat, y)

        if accV > best_val_accuracy:
          best_val_accuracy = accV
          torch.save(model.state_dict(), best_model_path)
          print(f'Improved model saved on epoch {epoch+1} with accuracy: {accV:.4f}')

        wandb.log({"epoch": epoch, "accuracyValidation": accV, "lossValidation": val_loss})

        print(f"Epoch {epoch+1}/{epochs} training_loss {train_loss:.5f} val_loss {val_loss:.5f} "
              f"training_accuracy {accT:.5f} val_accuracy {accV:.5f}")
        print(f"Iterations per second for testing: {iterations_per_second_testing:.2f} ")

    wandb.finish()
    convergence_end_time = time.time()
    convergence_time = convergence_end_time - convergence_start_time
    print(f"Convergence time: {convergence_time:.2f} seconds")

Arquitectura del modelo e instanciamiento

In [15]:
resnet50mod = Model()
summary(resnet50mod, input_size=(1, 3, 512, 512))



Layer (type:depth-idx)                        Output Shape              Param #
Model                                         [1, 39]                   --
├─Sequential: 1-1                             [1, 2048, 1, 1]           --
│    └─Conv2d: 2-1                            [1, 64, 256, 256]         (9,408)
│    └─BatchNorm2d: 2-2                       [1, 64, 256, 256]         (128)
│    └─ReLU: 2-3                              [1, 64, 256, 256]         --
│    └─MaxPool2d: 2-4                         [1, 64, 128, 128]         --
│    └─Sequential: 2-5                        [1, 256, 128, 128]        --
│    │    └─Bottleneck: 3-1                   [1, 256, 128, 128]        (75,008)
│    │    └─Bottleneck: 3-2                   [1, 256, 128, 128]        (70,400)
│    │    └─Bottleneck: 3-3                   [1, 256, 128, 128]        (70,400)
│    └─Sequential: 2-6                        [1, 512, 64, 64]          --
│    │    └─Bottleneck: 3-4                   [1, 512, 64, 64]       

Entrenamiento y validación del modelo

In [None]:
fit(model=resnet50mod, trainloader=train_loader, testloader=test_loader, epochs=200, lr=3e-3)

## Definición del modelo (EfficientNetV2-M Modificado)

In [None]:
import torch
import torchvision
import wandb
import time

from torchmetrics import Accuracy
from tqdm import tqdm
from data import data_loaders

train_dataloader, test_dataloader = data_loaders('Floreview_split', batch_size=24, num_workers=4)

print(f"Total training batches: {len(train_dataloader)}")

class Model2(torch.nn.Module):
    def _init_(self, outputs = 39, pretrained = True, freeze = False):
      super()._init_()

      efficientnet =  torchvision.models.efficientnet_v2_m(pretrained = pretrained)
      self.efficientnet = torch.nn.Sequential(*list(efficientnet.children())[:-1])

      self.fc = torch.nn.Linear(1280, 512)
      self.fc1 = torch.nn.Linear(512, 256)
      self.fc2 = torch.nn.Linear(256, outputs)

    def forward(self, batch):
      batch = self.efficientnet(batch)
      batch = batch.view(batch.shape[0], -1)
      batch = self.fc(batch)
      batch = self.fc1(batch)
      batch = self.fc2(batch)
      return batch

In [None]:
def validate_one_epoch(model, dataloader, criterion, device, epoch):
    accuracy = Accuracy(num_classes=39, average='micro', task='multiclass').to(device)
    model.eval()
    running_loss = 0.0

    with torch.no_grad():
        for batch_idx, (x, y) in enumerate(tqdm(dataloader)):
            x, y = x.to(device), y.to(device)
            y_hat = model(x)
            loss = criterion(y_hat, y)
            accuracy.update(y_hat, y)  # Proper accumulation
            running_loss += loss.item()

    avg_loss = running_loss / len(dataloader)
    avg_acc = accuracy.compute()  # Correct final accuracy

    wandb.log({
        "val/loss": avg_loss,
        "val/epoch": epoch,
        "val/accuracy": avg_acc
    })
    accuracy.reset()  # Important for next epoch
    return avg_loss

In [None]:
def train_one_epoch(model, dataloader, criterion, optimizer, device, epoch):
    model.train()
    running_loss = 0.0
    total_norm = 0.0

    for batch_idx, (x, y) in enumerate(tqdm(dataloader, desc=f"Train Epoch {epoch}")):
        x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
        optimizer.zero_grad()

        y_hat = model(x)
        loss = criterion(y_hat, y)
        loss.backward()
        optimizer.step()

        # gradient norm
        total_norm = 0.0
        for p in model.parameters():
            if p.grad is not None:
                total_norm += p.grad.data.norm(2).item() ** 2
        total_norm = total_norm ** 0.5

        running_loss += loss.item()
        wandb.log({
            "train/batch_loss": loss.item(),
            "train/grad_norm": total_norm,
            "train/step": epoch * len(dataloader) + batch_idx
        })

    avg_loss = running_loss / len(dataloader)
    return avg_loss

In [None]:
def fit(model, trainloader, validloader, epochs, lr, num_classes=39):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.CrossEntropyLoss()

    # metrics
    accuracy_micro = Accuracy(num_classes=num_classes, average='micro', task='multiclass').to(device)
    accuracy_per_class = Accuracy(num_classes=num_classes, average=None, task='multiclass').to(device)

    # best-model tracking
    best_val_acc = 0.0
    best_model_path = "best_model.pth"
    patience, no_improve = 10, 0
    best_val_loss = float("inf")

    wandb.login(key="8b67cfcbdea25a891a0c70382c955f441f82941b")

    wandb.init(
    project="camera-identification",
    name="finetuning_resnet50",
    config={
        "dataset": "FloreView",
        "architecture": "Resnet50",
        "batch_size": 64,
        "epochs": 200,
        "optimizer": "Adam",
        "learning_rate": 3e-3
    })

    start_time = time.time()
    for epoch in range(1, epochs + 1):
        train_loss = train_one_epoch(model, trainloader, criterion, optimizer, device, epoch)
        wandb.log({"train/loss": train_loss, "train/epoch": epoch})

        val_loss = validate_one_epoch(model, validloader, criterion, device, epoch)

        # compute validation accuracy on last batch
        #x_val, y_val = next(iter(validloader))
        #x_val, y_val = x_val.to(device), y_val.to(device)
        #y_hat = model(x_val)
        #acc = accuracy_micro(y_hat, y_val)
        #wandb.log({"val/accuracy_micro": acc, "val/epoch": epoch})

        print(f"Epoch {epoch}/{epochs} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            no_improve = 0
            torch.save(model.state_dict(), best_model_path)  # Save best model
        else:
            no_improve += 1
            if no_improve >= patience:
                print("Early stopping triggered!")
                break
        """
        if acc > best_val_acc:
            best_val_acc = acc
            torch.save(model.state_dict(), best_model_path)
            print(f"→ New best model saved (acc {acc:.4f})")
        if no_improve >= patience:
            print("Convergence reached. Stopping early.")
            break
        """
    total_time = time.time() - start_time
    print(f"Total training time: {total_time:.1f}s")
    wandb.finish()

if _name_ == '_main_':
    model = Model2()
    fit(model, train_dataloader, test_dataloader, epochs=200, lr=1e-3)
    print("Training complete.")
    print("Model saved as 'best_model.pth'.")