In [None]:
from ultralytics import YOLO

model = YOLO("yolov8n-cls.pt")

model.train(data="dataset_split", epochs=50, imgsz=224, augment=True)

In [None]:
model.val()

In [88]:
from ultralytics import YOLO
import os

# Carpeta con imágenes
IMAGE_FOLDER = "Images_test"
model = YOLO("runs/classify/train/weights/best.pt")

# Obtener todas las imágenes de la carpeta (jpg, png)
imagenes = [
    f for f in os.listdir(IMAGE_FOLDER) if f.lower().endswith((".jpg", ".jpeg", ".png"))
]

for img_name in imagenes:
    img_path = os.path.join(IMAGE_FOLDER, img_name)
    print("🔍 Probando en imagen:", img_name)

    results = model.predict(source=img_path)

    for r in results:
        # Obtener la clase y la confianza top1
        clase = r.names[r.probs.top1]
        conf = r.probs.top1conf.item()

        if clase == "bird":
            print(f"✅ Hay un ave con confianza {conf:.2f}")
        else:
            print(f"❌ No hay ave (confianza {conf:.2f})")

    print("-" * 40)  # Separador entre imágenes

🔍 Probando en imagen: 10.jpeg

image 1/1 d:\2025\Tesis\ML_PYNQ\Images_test\10.jpeg: 224x224 bird 1.00, no_bird 0.00, 41.4ms
Speed: 38.9ms preprocess, 41.4ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)
✅ Hay un ave con confianza 1.00
----------------------------------------
🔍 Probando en imagen: 11.jpg

image 1/1 d:\2025\Tesis\ML_PYNQ\Images_test\11.jpg: 224x224 bird 1.00, no_bird 0.00, 9.0ms
Speed: 14.8ms preprocess, 9.0ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)
✅ Hay un ave con confianza 1.00
----------------------------------------
🔍 Probando en imagen: 12.jpg

image 1/1 d:\2025\Tesis\ML_PYNQ\Images_test\12.jpg: 224x224 bird 0.87, no_bird 0.13, 8.3ms
Speed: 13.5ms preprocess, 8.3ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)
✅ Hay un ave con confianza 0.87
----------------------------------------
🔍 Probando en imagen: 13.jpg

image 1/1 d:\2025\Tesis\ML_PYNQ\Images_test\13.jpg: 224x224 bird 1.00, no_bird 0.00, 8.2ms
Sp

In [3]:
model = YOLO("runs/classify/train/weights/best.pt")
model.info()  # Mostrar información del modelo

YOLOv8n-cls summary: 56 layers, 1,440,850 parameters, 0 gradients, 3.4 GFLOPs


(56, 1440850, 0, 3.3619968)

In [7]:
import torch
from ultralytics import YOLO

model = YOLO("runs/classify/train/weights/best.pt")
pt_model = model.model  

for name, param in pt_model.state_dict().items():
    print(name, param.shape)

model.0.conv.weight torch.Size([16, 3, 3, 3])
model.0.bn.weight torch.Size([16])
model.0.bn.bias torch.Size([16])
model.0.bn.running_mean torch.Size([16])
model.0.bn.running_var torch.Size([16])
model.0.bn.num_batches_tracked torch.Size([])
model.1.conv.weight torch.Size([32, 16, 3, 3])
model.1.bn.weight torch.Size([32])
model.1.bn.bias torch.Size([32])
model.1.bn.running_mean torch.Size([32])
model.1.bn.running_var torch.Size([32])
model.1.bn.num_batches_tracked torch.Size([])
model.2.cv1.conv.weight torch.Size([32, 32, 1, 1])
model.2.cv1.bn.weight torch.Size([32])
model.2.cv1.bn.bias torch.Size([32])
model.2.cv1.bn.running_mean torch.Size([32])
model.2.cv1.bn.running_var torch.Size([32])
model.2.cv1.bn.num_batches_tracked torch.Size([])
model.2.cv2.conv.weight torch.Size([32, 48, 1, 1])
model.2.cv2.bn.weight torch.Size([32])
model.2.cv2.bn.bias torch.Size([32])
model.2.cv2.bn.running_mean torch.Size([32])
model.2.cv2.bn.running_var torch.Size([32])
model.2.cv2.bn.num_batches_tracked 

In [11]:
import torch
from PIL import Image
from torchvision import transforms

# Imagen de prueba
img_path = "Images_test/paseo-en-catamaran-insonoro.jpg"
img = Image.open(img_path).convert("RGB")

# Transformaciones que espera YOLO
transform = transforms.Compose(
    [transforms.Resize((224, 224)), transforms.ToTensor()]  # Tamaño típico de entrada
)

input_tensor = transform(img).unsqueeze(0)  # Añadir batch dimension
# Inferencia
pt_model.eval()
with torch.no_grad():
    output = pt_model(input_tensor)  # output es un tuple: (logits, ...)

# En YOLOv8, el primer elemento del tuple suele ser logits
logits = output[0]

# Ahora sí aplicas softmax
probs = torch.softmax(logits, dim=1)
top1_prob, top1_class = torch.max(probs, dim=1)

print("Clase predicha:", top1_class.item())
print("Confianza:", top1_prob.item())

Clase predicha: 1
Confianza: 0.7302632331848145


In [15]:
print(pt_model)

ClassificationModel(
  (model): Sequential(
    (0): Conv(
      (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act): SiLU(inplace=True)
    )
    (1): Conv(
      (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act): SiLU(inplace=True)
    )
    (2): C2f(
      (cv1): Conv(
        (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (cv2): Conv(
        (conv): Conv2d(48, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
   

In [1]:
from PIL import Image
img = Image.open("Images_test/paseo-en-catamaran-insonoro.jpg")
print(img.mode)

RGB


In [90]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CustomCNN(nn.Module):
    def __init__(self):
        super(CustomCNN, self).__init__()
    
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv4 = nn.Conv2d(128, 128, kernel_size=3, padding=1)        
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(128 * 4 * 4, 100)  # Ajusta según el tamaño de entrada
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(100, 2) # 2 clases: ave y no ave
    def forward(self, x):
        
        x = F.relu(self.conv1(x))
        x = self.pool1(x)

        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        
        x = F.relu(self.conv3(x))
        x = self.pool3(x)

        x = F.relu(self.conv4(x))
        x = self.pool4(x)
        
        x = x.view(x.size(0), -1)  
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        
        return x

In [91]:
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

# Transformaciones: ajustar tamaño y normalizar
transform = transforms.Compose(
    [
        transforms.Resize((64, 64)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.2, contrast=0.3, saturation=0.3),
        transforms.RandomResizedCrop(64, scale=(0.8, 1.0)),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
    ]
)

val_transform = transforms.Compose(
    [
        transforms.Resize((64, 64)),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
    ]
)
val_dataset = ImageFolder(root="dataset_split/val", transform=val_transform)


train_dataset = ImageFolder(root="dataset_split/train", transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [92]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CustomCNN().to(device)

criterion = nn.CrossEntropyLoss()  # Para clasificación multiclase
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode="max", factor=0.5, patience=3
)

In [93]:
num_epochs = 30

for epoch in range(num_epochs):
    model.train()
    running_loss = 0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    val_acc = 100 * correct / total
    print(
        f"Epoch {epoch+1}/{num_epochs} - Loss: {running_loss/len(train_loader):.4f} - Acc: {100*correct/total:.2f}%"
    )

    scheduler.step(val_acc)

Epoch 1/30 - Loss: 0.6842 - Acc: 52.88%
Epoch 2/30 - Loss: 0.6557 - Acc: 63.62%
Epoch 3/30 - Loss: 0.5953 - Acc: 70.38%
Epoch 4/30 - Loss: 0.5911 - Acc: 68.38%
Epoch 5/30 - Loss: 0.5354 - Acc: 73.88%
Epoch 6/30 - Loss: 0.5505 - Acc: 73.25%
Epoch 7/30 - Loss: 0.5504 - Acc: 76.75%
Epoch 8/30 - Loss: 0.5485 - Acc: 75.25%
Epoch 9/30 - Loss: 0.5129 - Acc: 76.50%
Epoch 10/30 - Loss: 0.5067 - Acc: 74.88%
Epoch 11/30 - Loss: 0.5057 - Acc: 76.62%
Epoch 12/30 - Loss: 0.4708 - Acc: 77.62%
Epoch 13/30 - Loss: 0.4845 - Acc: 76.62%
Epoch 14/30 - Loss: 0.4774 - Acc: 78.88%
Epoch 15/30 - Loss: 0.4553 - Acc: 79.50%
Epoch 16/30 - Loss: 0.4535 - Acc: 79.88%
Epoch 17/30 - Loss: 0.4496 - Acc: 79.75%
Epoch 18/30 - Loss: 0.4326 - Acc: 80.62%
Epoch 19/30 - Loss: 0.4569 - Acc: 79.75%
Epoch 20/30 - Loss: 0.4239 - Acc: 81.75%
Epoch 21/30 - Loss: 0.4288 - Acc: 80.88%
Epoch 22/30 - Loss: 0.4350 - Acc: 80.50%
Epoch 23/30 - Loss: 0.4195 - Acc: 81.50%
Epoch 24/30 - Loss: 0.4263 - Acc: 81.12%
Epoch 25/30 - Loss: 0.406

In [94]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()


val_acc = 100 * correct / total
print(f"Validation Accuracy: {val_acc:.2f}%")

Validation Accuracy: 78.11%


In [95]:
import os
from PIL import Image

folder = "Images_test"
model.eval()

with torch.no_grad():
    for filename in os.listdir(folder):
        if filename.endswith((".jpg", ".png", ".jpeg")):  # filtra solo imágenes
            img_path = os.path.join(folder, filename)
            img = Image.open(img_path)
            img = transform(img).unsqueeze(0).to(device)  # agrega batch dimension

            output = model(img)
            pred_class = output.argmax(dim=1).item()

            print(f"{filename} --> Predicción: {train_dataset.classes[pred_class]}")

10.jpeg --> Predicción: bird
11.jpg --> Predicción: no_bird
12.jpg --> Predicción: bird
13.jpg --> Predicción: bird
14.jpg --> Predicción: bird
16.jpg --> Predicción: no_bird
17.jpg --> Predicción: no_bird
3.jpg --> Predicción: no_bird
349664072_639634714750284_7488197136792291295_n.jpg --> Predicción: bird
4.jpg --> Predicción: bird
5.jpg --> Predicción: no_bird
6.jpeg --> Predicción: bird
65a9d7da0d6bb119203b1c13.jpg --> Predicción: bird
7.jpeg --> Predicción: bird
8.jpg --> Predicción: bird
9.jpg --> Predicción: bird
images.jpg --> Predicción: no_bird
images_1.jpeg --> Predicción: bird
orig-1437426411440.jpg --> Predicción: bird
pantanos-scaled.jpg --> Predicción: bird
paseo-en-catamaran-insonoro.jpg --> Predicción: no_bird
