## Instalación de librerías y packages

In [1]:
!pip install wandb -qU
!pip install torcheval-nightly
!pip install torchinfo
!pip install datasets
!pip install torchmetrics
!pip install onnxruntime
!pip install onnxscript
!pip install onnx

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.4/9.4 MB[0m [31m37.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.1/309.1 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torcheval-nightly
  Downloading torcheval_nightly-2024.7.31-py3-none-any.whl.metadata (8.6 kB)
Downloading torcheval_nightly-2024.7.31-py3-none-any.whl (192 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m192.4/192.4 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torcheval-nightly
Successfully installed torcheval-nightly-2024.7.31
Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-no

Importación de librerías y gpu

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import wandb
import time
import onnxruntime
from google.colab import files
from torchinfo import summary
from torchmetrics import Accuracy
device = "cuda" if torch.cuda.is_available() else "cpu"

## Carga del dataset y pre-procesamiento de imágenes



Se usará el dataset creado a partir de las imágenes recopiladas sobre los materiales escogidos para el reconocimiento, este fue clasificado, organizado y publicado en la plataforma **HuggingFace**

In [3]:
from datasets import load_dataset

dataset = load_dataset('Sneider-Exe/Material_classification_2U')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/2.67k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/16.5M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

In [4]:
from datasets import load_dataset
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms

# Definir transformaciones
transform = transforms.Compose([
    transforms.ToTensor(), #transforma la imagen a tensor
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) #normaliza la imagen (rgb-media/desviación)
])

transformImage = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.ToTensor(), #transforma la imagen a tensor
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) #normaliza la imagen (rgb-media/desviación)
])

# Crear una clase de dataset personalizada de datos en hugging a datos compatibles pytorch
class CustomDataset(Dataset):
    def __init__(self, hf_dataset, transform=None):
        self.dataset = hf_dataset
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        item = self.dataset[idx]
        image = item['image']
        label = item['label']

        if self.transform:
            image = self.transform(image)

        return image, label

# Crear datasets para entrenamiento y prueba
trainset = CustomDataset(dataset['train'], transform=transform)
testset = CustomDataset(dataset['test'], transform=transform)

# Crear DataLoaders en lotes
batch_size = 64
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)



## Definición del modelo (Resnet50 Modificado)



In [5]:
class Model(torch.nn.Module):
  # se congelan las capas convolucionales y los pesos pre entrenados se mantienen
    def __init__(self, outputs = 5, pretrained = True, freeze = True):
      super().__init__()

      resnet50 = torchvision.models.resnet50(pretrained = pretrained)
      # se descargó resnet50 pre-entrenado
      self.resnet50 = torch.nn.Sequential(*list(resnet50.children())[:-1])

      if freeze:
        for param in self.resnet50.parameters():
          param.requires_grad=False
      self.fc = torch.nn.Linear(2048, 512)
      self.fc1 = torch.nn.Linear(512, 256)
      self.fc2 = torch.nn.Linear(256, 5)



    def forward(self, batch):
      batch = self.resnet50(batch)
      batch = batch.view(batch.shape[0], -1)
      batch = self.fc(batch)
      batch = self.fc1(batch)
      batch = self.fc2(batch)
      return batch


## Implementación y entrenamiento

Definición de la función de entrenamiento

In [17]:
def fit(model, trainloader, testloader, epochs, lr):

    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.CrossEntropyLoss()
    scaler = torch.amp.GradScaler()

    #Definición para el mejor modelo
    best_val_accuracy = 0.0
    best_model_path = 'best_model.pth'

    wandb.login(key="c1f88d30aa7c210b3af22b7c2e3c81c62c623319")
    wandb.init(
        project="Clasificación de Materiales"
    )
    accuracy = Accuracy(num_classes=5, average='micro',task='multiclass').to(device)  # Micro promedio para el accuracy
    accuracy_class = Accuracy(num_classes=5, average=None,task='multiclass').to(device)

    #Criterios de convergencia
    patience = 10
    best_loss = float('inf')
    epochs_without_improvement = 0
    convergence_start_time = time.time()

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0

        for x, y in trainloader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()


            with torch.autocast(device_type=device, dtype=torch.float16):
                y_hat = model(x)
                loss = criterion(y_hat, y) #es un tensor escalar loss

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            train_loss +=loss.item()*x.size(0)#.item convierte el tensor en un num estandar
            #size(0) es el tamaño del primer eje de x -> # de muestras - para calcular el loss por lote

        # Calcular las métricas
        accT = accuracy(y_hat, y)
        accTC= accuracy_class(y_hat, y)

        wandb.log({"epoch": epoch, "accuracyTraining": accT, "lossTraining": train_loss})

        start_time_testing = time.time()
        model.eval()
        val_loss = 0.0

        for x, y in testloader:
            x, y = x.to(device), y.to(device)
            y_hat = model(x)
            loss = criterion(y_hat, y)
            val_loss+=loss.item()*x.size(0)

        end_time_testing = time.time()
        total_time = end_time_testing - start_time_testing
        iterations_per_second_testing = epochs / total_time #Cálculo de iteraciones por segundo para el testing loop

        if val_loss < best_loss:
          best_loss = val_loss
          epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1

        if epochs_without_improvement >= patience:
            print("Convergence reached.")
            break

        accV = accuracy(y_hat, y)
        accVC = accuracy_class(y_hat, y)

        if accV > best_val_accuracy:
          best_val_accuracy = accV
          torch.save(model.state_dict(), best_model_path)
          print(f'Improved model saved on epoch {epoch+1} with accuracy: {accV:.4f}')

        wandb.log({"epoch": epoch, "accuracyValidation": accV, "lossValidation": val_loss})

        print(f"Epoch {epoch+1}/{epochs} training_loss {train_loss:.5f} val_loss {val_loss:.5f} "
              f"training_accuracy {accT:.5f} val_accuracy {accV:.5f} brick_T_accuracy {accTC[0].item():.5f} metal_T_accuracy {accTC[1].item():.5f} "
              f"paper_T_accuracy {accTC[2].item():.5f} plastic_T_accuracy {accTC[3].item():.5f} wood_T_accuracy {accTC[4].item():.5f} "
              f"brick_V_accuracy {accVC[0].item():.5f} metal_V_accuracy {accVC[1].item():.5f} "
              f"paper_V_accuracy {accVC[2].item():.5f} plastic_V_accuracy {accVC[3].item():.5f} wood_V_accuracy {accVC[4].item():.5f} ")
        print(f"Iterations per second for testing: {iterations_per_second_testing:.2f} ")

    wandb.finish()
    convergence_end_time = time.time()
    convergence_time = convergence_end_time - convergence_start_time
    print(f"Convergence time: {convergence_time:.2f} seconds")

Arquitectura del modelo e instanciamiento

In [18]:
resnet50mod = Model()
summary(resnet50mod, input_size=(1, 3, 512, 512))



Layer (type:depth-idx)                        Output Shape              Param #
Model                                         [1, 5]                    --
├─Sequential: 1-1                             [1, 2048, 1, 1]           --
│    └─Conv2d: 2-1                            [1, 64, 256, 256]         (9,408)
│    └─BatchNorm2d: 2-2                       [1, 64, 256, 256]         (128)
│    └─ReLU: 2-3                              [1, 64, 256, 256]         --
│    └─MaxPool2d: 2-4                         [1, 64, 128, 128]         --
│    └─Sequential: 2-5                        [1, 256, 128, 128]        --
│    │    └─Bottleneck: 3-1                   [1, 256, 128, 128]        (75,008)
│    │    └─Bottleneck: 3-2                   [1, 256, 128, 128]        (70,400)
│    │    └─Bottleneck: 3-3                   [1, 256, 128, 128]        (70,400)
│    └─Sequential: 2-6                        [1, 512, 64, 64]          --
│    │    └─Bottleneck: 3-4                   [1, 512, 64, 64]       

Entrenamiento y validación del modelo

In [19]:
fit(model=resnet50mod, trainloader=trainloader, testloader=testloader, epochs=30, lr=3e-3)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112743833333778, max=1.0…

Improved model saved on epoch 1 with accuracy: 0.2667
Epoch 1/30 training_loss 276.94327 val_loss 78.02407 training_accuracy 0.44643 val_accuracy 0.26667 brick_T_accuracy 1.00000 metal_T_accuracy 0.00000 paper_T_accuracy 0.00000 plastic_T_accuracy 0.66667 wood_T_accuracy 0.87500 brick_V_accuracy 1.00000 metal_V_accuracy 0.00000 paper_V_accuracy 0.00000 plastic_V_accuracy 0.33333 wood_V_accuracy 0.00000 
Iterations per second for testing: 86.71 
Improved model saved on epoch 2 with accuracy: 0.3333
Epoch 2/30 training_loss 343.86388 val_loss 228.96881 training_accuracy 0.44643 val_accuracy 0.33333 brick_T_accuracy 0.00000 metal_T_accuracy 0.00000 paper_T_accuracy 1.00000 plastic_T_accuracy 1.00000 wood_T_accuracy 0.25000 brick_V_accuracy 0.00000 metal_V_accuracy 1.00000 paper_V_accuracy 0.66667 plastic_V_accuracy 0.00000 wood_V_accuracy 0.00000 
Iterations per second for testing: 77.96 
Epoch 3/30 training_loss 933.53374 val_loss 233.15044 training_accuracy 0.41071 val_accuracy 0.33333 

VBox(children=(Label(value='0.022 MB of 0.022 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracyTraining,▁▁▁▂▄▂▅▅█▇▇▇████████████████
accuracyValidation,▂▂▂▁▄▄▄▅▆▇██▇█▇▇██▇▇██▇▇▇▇▇
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
lossTraining,▃▄███▃▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lossValidation,▂▅▅█▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracyTraining,1.0
accuracyValidation,0.9
epoch,27.0
lossTraining,0.19514
lossValidation,11.1695


Convergence time: 41.23 seconds


Comprobación del mejor modelo guardado

In [22]:
best_model = Model()
best_model.to(device)
best_model_path = 'best_model.pth'
criterion = torch.nn.CrossEntropyLoss()

best_model.load_state_dict(torch.load('best_model.pth', weights_only=True))

accuracy = Accuracy(num_classes=5, average='micro',task='multiclass').to(device)  # Micro promedio para el accuracy

best_model.eval()
val_loss = 0.0
for x, y in testloader:
          x, y = x.to(device), y.to(device)
          y_hat = best_model(x)

          loss = criterion(y_hat, y)
          val_loss+=loss.item()*x.size(0)
accV = accuracy(y_hat, y)

print(f"val_accuracy {accV:.5f}")

val_accuracy 1.00000


## Conexión del modelo para el deployment web

In [24]:
model = best_model
model.cpu()
input = torch.randn(1, 3, 512, 512)

input_names = ['actual_input_1']+["learned_%d"% i for i in range(16)]
output_names = ['output_1']

torch.onnx.export(model, input, "model.onnx", verbose=True, input_names=input_names, output_names=output_names)

ort_session = onnxruntime.InferenceSession("model.onnx", providers=['CPUExecutionProvider'])

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

onnxruntime_input = {ort_session.get_inputs()[0].name: to_numpy(input)}

onnxruntime_outputs = ort_session.run(None, onnxruntime_input)

print("Resultados de la inferencia:", onnxruntime_outputs[0].shape[0])

model.eval()  # Set the model to evaluation mode
with torch.no_grad():  # Disable gradient calculations
    torch_outputs = model(input)
torch_outputs_np = torch_outputs.detach().numpy()

assert len(torch_outputs_np) == len(onnxruntime_outputs[0])

for torch_output, onnxruntime_output in zip(torch_outputs_np, onnxruntime_outputs):
    torch.testing.assert_close(torch_output, onnxruntime_output[0])

print("PyTorch and ONNX Runtime output matched!")
print(f"Output length: {len(onnxruntime_outputs)}")
print(f"Sample output: {onnxruntime_outputs}")

Resultados de la inferencia: 1
PyTorch and ONNX Runtime output matched!
Output length: 1
Sample output: [array([[ 1.8588467, -3.3509178,  1.3225751, -3.8026237,  8.652672 ]],
      dtype=float32)]


In [25]:
torch.save(best_model.state_dict(), "model.bin")

In [26]:
from transformers import PretrainedConfig

class ModelConfig(PretrainedConfig):
    model_type = "resnet"
    outputs = 5

    def __init__(self, outputs=5, **kwargs):
        super().__init__(**kwargs)
        self.outputs = outputs

config = ModelConfig(outputs=5)
config.save_pretrained("./")

In [27]:
import onnx
from onnxruntime.quantization import quantize_dynamic, QuantType

# Cargar el modelo ONNX
model_fp32 = 'model.onnx'
model_quant = 'model_quantized.onnx'

# Cuantizar el modelo
quantize_dynamic(model_fp32, model_quant, weight_type=QuantType.QInt8)



## Estadísticas y pruebas del modelo

Prueba del modelo con una imagen extraída de internet

In [42]:
image_path = '/content/metal_collar.jpg'
image = Image.open(image_path)
best_model.eval()
image = transformImage(image)
image = image.unsqueeze(0)
y_hat = best_model(image)
softmax = torch.nn.Softmax(dim=1)
prob_tensor = softmax(y_hat)

labels = ['Brick', 'Metal', 'Paper', 'Plastic', 'Wood']
probabilities = prob_tensor.squeeze().tolist()  # Convertir el tensor a lista

print("Probabilities for each class:")
for label, prob in zip(labels, probabilities):
    print(f"{label}: {prob:.2f}")


Probabilities for each class:
Brick: 0.13
Metal: 0.84
Paper: 0.00
Plastic: 0.03
Wood: 0.00


Cálculo de memoria en MB utilizada durante la ejecución del modelo

In [31]:
# Liberar memoria previamente usada
torch.cuda.empty_cache()

# Medir memoria inicial
torch.cuda.reset_peak_memory_stats()
start_mem = torch.cuda.memory_allocated()

# Validación
best_model.eval()
for x, y in testloader:
          x, y = x.to(device), y.to(device)

# Medir memoria después de la ejecución
end_mem = torch.cuda.memory_allocated()
max_mem = torch.cuda.max_memory_allocated()

# Conversión de bytes a MB
end_mem_mb = (end_mem - start_mem) / (1024 ** 2)
max_mem_mb = max_mem / (1024 ** 2)

print(f"Memory used after execution: {end_mem_mb:.2f} MB")
print(f"Peak memory used during execution: {max_mem_mb:.2f} MB")

Memory used after execution: 22.50 MB
Peak memory used during execution: 327.41 MB


Cálculo de FLOPs y conversión a GFLOPs

In [32]:
model_summary = summary(resnet50mod, input_size=(1, 3, 512, 512))
FLOPs = model_summary.total_mult_adds / 1e9
print(f"Total FLOPs: {FLOPs:.3f} GFLOPs")

Total FLOPs: 21.354 GFLOPs
