In [None]:
#!pip install torch torchvision opencv-python albumentations matplotlib scikit-learn pillow

In [111]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from PIL import Image
import os
import numpy as np
import matplotlib as plt
import cv2
from pathlib import Path

In [None]:
img_size = 416
batch_size = 4
NUM_CLASSES = 2 
LEARNING_RATE = 0.001
num_epochs = 50

# Caminhos dos dados
img_dir = 'all_dataset_images'  # Substitua pelo caminho das suas imagens
label_dir = 'final_human_boat_dataset'  # Substitua pelo caminho das suas anotações


In [None]:
class HumanBoatDataset(Dataset):
    def __init__(self, images_dir, labels_dir, img_size=640, transform=None):

        self.images_dir = Path(images_dir)
        self.labels_dir = Path(labels_dir)
        self.img_size = img_size
        self.transform = transform
        
        # Listar todas as imagens
        self.image_files = list(self.images_dir.glob('*.jpg'))
        
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        # Carregar imagem
        img_path = self.image_files[idx]
        image = Image.open(img_path).convert('RGB')
        
        # Carregar anotações
        label_path = self.labels_dir / f"{img_path.stem}.txt"
        boxes = []
        
        if label_path.exists():
            with open(label_path, 'r') as f:
                for line in f.readlines():
                    line = line.strip()
                    if line:
                        # Parse: class x_center y_center width height
                        parts = line.split()
                        if len(parts) == 5:
                            class_id = int(parts[0])
                            x_center = float(parts[1])
                            y_center = float(parts[2])
                            width = float(parts[3])
                            height = float(parts[4])
                            boxes.append([class_id, x_center, y_center, width, height])
        
        # Aplicar transformações na imagem
        if self.transform:
            image = self.transform(image)
        
        # Converter boxes para tensor
        if boxes:
            boxes = torch.tensor(boxes, dtype=torch.float32)
        else:
            boxes = torch.zeros((0, 5), dtype=torch.float32)
        
        return image, boxes


In [115]:
class SimpleYOLO(nn.Module):
    def __init__(self, num_classes=2):
        super(SimpleYOLO, self).__init__()
        self.num_classes = num_classes
        
        # Backbone simples (substitua por YOLOv5 backbone se necessário)
        self.backbone = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),
            
            nn.Conv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2),
            
            nn.Conv2d(256, 512, 3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((20, 20))  # Para 640x640 input
        )
        
        # Head YOLO (5 = x,y,w,h,conf + num_classes)
        self.head = nn.Conv2d(512, 3 * (5 + num_classes), 1)  # 3 anchors por grid
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.head(x)
        return x

In [116]:
class YOLOLoss(nn.Module):
    def __init__(self):
        super(YOLOLoss, self).__init__()
        self.mse_loss = nn.MSELoss()
        self.bce_loss = nn.BCEWithLogitsLoss()
        
    def forward(self, predictions, targets):
        # Loss simplificada - implemente a loss completa do YOLO se necessário
        # Por agora, retorna um valor constante para demonstração
        return torch.tensor(1.0, requires_grad=True)

In [117]:
def collate_fn(batch):
    """
    Função para tratar batches com número diferente de objetos por imagem
    """
    images, targets = zip(*batch)
    images = torch.stack(images)
    return images, targets

In [118]:
# Configuração do treinamento
def setup_training(dataset_dir, batch_size=16, img_size=640, num_epochs=100):
    """
    Configura o pipeline de treinamento
    """
    
    # Transformações
    transform = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                           std=[0.229, 0.224, 0.225])
    ])
    
    # Datasets
    train_dataset = HumanBoatDataset(
        images_dir=f"{dataset_dir}/train/images",
        labels_dir=f"{dataset_dir}/train/labels",
        img_size=img_size,
        transform=transform
    )
    
    val_dataset = HumanBoatDataset(
        images_dir=f"{dataset_dir}/val/images",
        labels_dir=f"{dataset_dir}/val/labels", 
        img_size=img_size,
        transform=transform
    )
    
    # DataLoaders
    train_loader = DataLoader(
        train_dataset, 
        batch_size=batch_size, 
        shuffle=True,
        num_workers=4,
        collate_fn=collate_fn
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False, 
        num_workers=4,
        collate_fn=collate_fn
    )
    
    # Modelo
    model = SimpleYOLO(num_classes=2)  # 0: banhistas, 1: barcos
    
    # Loss e Optimizer
    criterion = YOLOLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
    
    # GPU se disponível
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    return model, train_loader, val_loader, criterion, optimizer, scheduler, device

In [119]:
def train_one_epoch(model, train_loader, criterion, optimizer, device):
    """
    Treina por uma época
    """
    model.train()
    total_loss = 0
    
    for batch_idx, (images, targets) in enumerate(train_loader):
        images = images.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, targets)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        if batch_idx % 10 == 0:
            print(f'Batch {batch_idx}/{len(train_loader)}, Loss: {loss.item():.4f}')
    
    return total_loss / len(train_loader)

In [120]:
def validate(model, val_loader, criterion, device):
    """
    Validação
    """
    model.eval()
    total_loss = 0
    
    with torch.no_grad():
        for images, targets in val_loader:
            images = images.to(device)
            outputs = model(images)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    
    return total_loss / len(val_loader)

In [None]:
# Configurações
DATASET_DIR = "dataset_yolo"  # Pasta criada pelo script anterior
BATCH_SIZE = 8
IMG_SIZE = 640
NUM_EPOCHS = 50

# Setup
model, train_loader, val_loader, criterion, optimizer, scheduler, device = setup_training(
    DATASET_DIR, BATCH_SIZE, IMG_SIZE, NUM_EPOCHS
)

print(f"Usando device: {device}")
print(f"Dataset train: {len(train_loader.dataset)} imagens")
print(f"Dataset val: {len(val_loader.dataset)} imagens")

# Loop de treinamento
best_val_loss = float('inf')

for epoch in range(NUM_EPOCHS):
    print(f"\nEpoch {epoch+1}/{NUM_EPOCHS}")
    print("-" * 50)
    
    # Treinar
    train_loss = train_one_epoch(model, train_loader, criterion, optimizer, device)
    
    # Validar
    val_loss = validate(model, val_loader, criterion, device)
    
    # Scheduler
    scheduler.step()
    
    print(f"Train Loss: {train_loss:.4f}")
    print(f"Val Loss: {val_loss:.4f}")
    
    # Salvar melhor modelo
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'best_yolo_model.pth')
        print("Modelo salvo!")

Usando device: cpu
Dataset train: 2653 imagens
Dataset val: 1048 imagens

Epoch 1/50
--------------------------------------------------
