In [1]:
import torch
import numpy as np
from tqdm import tqdm
import torch.optim as optim
from src.trainer import Trainer
import matplotlib.pyplot as plt
from src.loss import YoloLoss, YoloLossV1
from src.dataset_rob import DroneDetection
from src.model import YoloTiny, count_parameters
from src.utils import display_stats, calculate_map, display_train_eval_batch

In [2]:
DATA_PATH = "/Volumes/Lexar/ML DATA/Detection Dataset/Robflow"
OPTIM_PATH = "models/yolo_optimizer_rob.pth"
MODEL_WEIGHTS_PATH = "models/yolo_model_rob.pth"
STATS_PATH = "stats/yoloy_model_stats_rob.npz"
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')

In [3]:
# Training parameters
IM_SIZE = 448
threshold = 0.5

LR = 1e-4
EPOCHS = 50
FACTOR = 0.5
PATIENCE = 5
BATCH_SIZE = 32
NUM_WORKERS = 0
EARLY_STOPPING = PATIENCE * 2

In [4]:
# Model parameters
f = 16
n_reg = 5
p_box = 2
d_fc = 496
n_classes = 2
cell_size = 7
out_channels = (cell_size * cell_size) * (n_classes + n_reg * p_box)

In [5]:
model = YoloTiny(in_channels=3, out_channels=out_channels, f = f, d_fc = d_fc, dropt=0.2, im_size=IM_SIZE)
print(f"Paramètres : {count_parameters(model):,}")

Paramètres : 25,573,132


In [6]:
trainer = Trainer(model=model, threshold=threshold, batch_size=BATCH_SIZE, device=device, B=p_box, C=n_classes, S=cell_size)
criterion = YoloLoss(n_cell=cell_size, n_box=p_box, n_classes=n_classes)
optimizer = optim.Adam(model.parameters(), lr=LR)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=FACTOR, patience=PATIENCE,
)
RobDroneDetectionDataset = DroneDetection(data_path=DATA_PATH, i_size=IM_SIZE, cell_size=cell_size, n_box=p_box, n_classes=n_classes)
train_loader, valid_loader = RobDroneDetectionDataset.load_data(batch_size = BATCH_SIZE, n_workers=NUM_WORKERS)

In [7]:
# Lists to store the training and validation statistics
train_losses = []
valid_losses = []
valid_map = []
epochs_mAP = 0
epochs_mAP_50 = 0
epochs_mAP_per_class = 0
best_val_loss = float('inf')
current_patience_step = 0
test_visual_batch = next(iter(valid_loader))

In [8]:
pbar = tqdm(range(EPOCHS), desc="Epochs: ")
# Training loop
for ep in pbar:
    # Set model to training mode and reset statistics at the start of each epoch
    model.train()
    trainer.init_params()
    for data_train in train_loader:
        # Perform training steps
        trainer.train_step(data_train, criterion, optimizer)
    
    epochs_train_loss = trainer.train_loss / len(train_loader)
    # Store training stats for this epoch
    train_losses.append(epochs_train_loss)
    is_compute_map = (ep + 1)%5 == 0
    with torch.no_grad():
        model.eval()
        if is_compute_map:
            images, target = test_visual_batch
            images = images.to(device)
            target = target.to(device)
            predictions = model(images)
            display_train_eval_batch(images, predictions, target, threshold=threshold, im_size=IM_SIZE)

        for eval_batch_idx, data_valid in enumerate(valid_loader):
            trainer.val_step(data_valid, criterion, is_compute_map)
    epochs_val_loss = trainer.val_loss / len(valid_loader)
    scheduler.step(epochs_val_loss)
    if is_compute_map:
        try:
            pred_boxes = torch.cat(trainer.pred_boxes, dim=0)
            true_boxes = torch.cat(trainer.true_boxes, dim=0)
        except RuntimeError as e:
            # If concatenation fails due to shape mismatch, handle accordingly
            print(f"Shape mismatch error: {e}")
            print("Tensor shapes in pred_boxes:", [t.shape for t in trainer.pred_boxes])
            print("Tensor shapes in true_boxes:", [t.shape for t in trainer.true_boxes])
            # You might need to pad or reshape tensors here
            pred_boxes = torch.stack(trainer.pred_boxes, dim=0)
            true_boxes = torch.stack(trainer.true_boxes, dim=0)
            
        results = calculate_map(pred_boxes, true_boxes, n_cell=cell_size, n_classes=n_classes, n_box=p_box)
        epochs_mAP = results['map'].item()
        epochs_mAP_50 = results['map_50'].item()
        epochs_mAP_per_class = results['map_per_class']
        valid_map.append(epochs_mAP)
        
    valid_losses.append(epochs_val_loss)

    pbar.set_postfix({
        'train_loss': f'{epochs_train_loss:.4f}',
        'val_loss': f'{epochs_val_loss:.4f}',
        'best_val_loss': f'{best_val_loss:.4f}',
        'val_AP': f'{epochs_mAP:.4f}',
        'val_mAP@50': f'{epochs_mAP_50:.4f}',
        'val_mAP@perclass': f'{epochs_mAP_per_class}'
    })
    
    if epochs_val_loss < best_val_loss:
        best_val_loss = epochs_val_loss
        current_patience_step = 0
        torch.save(model.state_dict(), MODEL_WEIGHTS_PATH)
    else:
        current_patience_step += 1
        if current_patience_step >= EARLY_STOPPING:
            print("Early stopping triggered")
            model.load_state_dict(torch.load(MODEL_WEIGHTS_PATH))
            break
            
print("Training completed.")

Epochs:   8%|▊         | 4/50 [58:37<11:14:12, 879.41s/it, train_loss=202.5970, val_loss=148.8749, best_val_loss=169.2296, val_AP=0.0000, val_mAP@50=0.0000, val_mAP@perclass=0] 


RuntimeError: MPS backend out of memory (MPS allocated: 8.05 GB, other allocations: 863.70 MB, max allowed: 9.07 GB). Tried to allocate 392.00 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

In [None]:
# Save the trained model parameters
torch.save(model.state_dict(), MODEL_WEIGHTS_PATH)
torch.save(optimizer.state_dict(), OPTIM_PATH)
# Save training and validation statistics to a file
np.savez(STATS_PATH, train_losses=np.array(train_losses), valid_losses=np.array(valid_losses), allow_pickle=True)

In [None]:
# Load model training statistics (accuracy and loss)
training_stats  = np.load(STATS_PATH)
# Display the training statistics
display_stats(training_stats)

In [None]:
plt.plot(valid_map)
plt.title('Validation mAp')
plt.show()

In [None]:
with torch.no_grad():
    trainer.init_params()
    model.eval()
    for eval_batch_idx, data_valid in enumerate(valid_loader):
        trainer.val_step(data_valid, criterion, True)
    epochs_val_loss = trainer.val_loss / len(valid_loader)
    try:
        pred_boxes = torch.cat(trainer.pred_boxes, dim=0)
        true_boxes = torch.cat(trainer.true_boxes, dim=0)
    except RuntimeError as e:
        # If concatenation fails due to shape mismatch, handle accordingly
        print(f"Shape mismatch error: {e}")
        print("Tensor shapes in pred_boxes:", [t.shape for t in trainer.pred_boxes])
        print("Tensor shapes in true_boxes:", [t.shape for t in trainer.true_boxes])
        # You might need to pad or reshape tensors here
        pred_boxes = torch.stack(trainer.pred_boxes, dim=0)
        true_boxes = torch.stack(trainer.true_boxes, dim=0)
        
    results = calculate_map(pred_boxes, true_boxes, n_cell=cell_size, n_classes=n_classes, n_box=p_box)
    epochs_mAP = results['map'].item()
    epochs_mAP_50 = results['map_50'].item()
    epochs_mAP_per_class = results['map_per_class']
# Print evaluation results
print(f'\nTest set: Average loss: {epochs_val_loss:.4f}, mAp: {epochs_mAP}, mAp@50: {epochs_mAP_50}, mAp/class: {epochs_mAP_per_class}\n')