In [1]:
import os
import logging
import itertools
import torch
import sys
sys.path.append("./")
from torch.utils.data import DataLoader, ConcatDataset
from torch.optim.lr_scheduler import CosineAnnealingLR, MultiStepLR
from mobilenet_v2_ssd_lite import create_mobilenetv2_ssd_lite
import mobilenetv1_ssd_config
from ssd import MatchPrior
from data_preprocessing import TrainAugmentation, TestTransform
from misc import str2bool, Timer, freeze_net_layers, store_labels
from voc_dataset import VOCDataset
from multibox_loss import MultiboxLoss

In [2]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    torch.backends.cudnn.benchmark = True
    logging.info("Use Cuda.")

In [3]:
def train(loader, net, criterion, optimizer, device, debug_steps=100, epoch=-1):
    net.train(True)
    running_loss = 0.0
    running_regression_loss = 0.0
    running_classification_loss = 0.0
    for i, data in enumerate(loader):
        images, boxes, labels = data
        images = images.to(device)
        boxes = boxes.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        confidence, locations = net(images)
        regression_loss, classification_loss = criterion(confidence, locations, labels, boxes)  # TODO CHANGE BOXES
        loss = regression_loss + classification_loss
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        running_regression_loss += regression_loss.item()
        running_classification_loss += classification_loss.item()
        if i and i % debug_steps == 0:
            avg_loss = running_loss / debug_steps
            avg_reg_loss = running_regression_loss / debug_steps
            avg_clf_loss = running_classification_loss / debug_steps
            logging.info(
                f"Epoch: {epoch}, Step: {i}, " +
                f"Average Loss: {avg_loss:.4f}, " +
                f"Average Regression Loss {avg_reg_loss:.4f}, " +
                f"Average Classification Loss: {avg_clf_loss:.4f}"
            )
            running_loss = 0.0
            running_regression_loss = 0.0
            running_classification_loss = 0.0

In [4]:
def test(loader, net, criterion, device):
    net.eval()
    running_loss = 0.0
    running_regression_loss = 0.0
    running_classification_loss = 0.0
    num = 0
    for _, data in enumerate(loader):
        images, boxes, labels = data
        images = images.to(device)
        boxes = boxes.to(device)
        labels = labels.to(device)
        num += 1

        with torch.no_grad():
            confidence, locations = net(images)
            regression_loss, classification_loss = criterion(confidence, locations, labels, boxes)
            loss = regression_loss + classification_loss

        running_loss += loss.item()
        running_regression_loss += regression_loss.item()
        running_classification_loss += classification_loss.item()
    return running_loss / num, running_regression_loss / num, running_classification_loss / num

In [11]:
if __name__ == "__main__":
    train_datasets_path = r"/home/zhangyouan/桌面/zya/dataset/681/good/VOCdevkit/VOC2007/"
    validation_datasets_path = r"/home/zhangyouan/桌面/zya/dataset/681/good/VOCdevkit/VOC2007/"
    net = "mb2-ssd-lite"
    lr = 1e-3
    momentum = 0.9
    weight_decay = 5e-4
    gamma = 0.1
    batch_size = 32
    num_epochs = 200
    validation_epochs = 5
    mb2_width_mult = 1.0  # width multiplifier for MobilenetV2 在MNV2中用于调整模型宽度的超参数
    num_workers = 1
    checkpoint_folder = "/home/zhangyouan/桌面/zya/NN_net/network/SSD/IMX_681_ssd_mobilenet_git/pytorch/detection/gesture_detection/ipynb/models/"
    scheduler = "cosine"
    t_max = 120 # 'T_max value for Cosine Annealing Scheduler.'
    debug_steps = 100
    num_classes = 2
    
    
    create_net = lambda num: create_mobilenetv2_ssd_lite(num, width_mult=mb2_width_mult)
    config = mobilenetv1_ssd_config
    train_transform = TrainAugmentation(config.image_size, config.image_mean, config.image_std)
    target_transform = MatchPrior(config.priors, config.center_variance, config.size_variance, 0.5)
    test_transform = TestTransform(config.image_size, config.image_mean, config.image_std)
    
    train_dataset = VOCDataset(train_datasets_path, transform=train_transform, target_transform=target_transform)
    val_dataset = VOCDataset(validation_datasets_path, transform=test_transform, target_transform=target_transform, is_test=True)
    train_loader = DataLoader(train_dataset, batch_size, num_workers = num_workers, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size, num_workers=num_workers, shuffle=False)
    
    label_file = os.path.join(checkpoint_folder, "voc-model-labels.txt")
    store_labels(label_file, train_dataset.class_names)
    num_classes = len(train_dataset.class_names)
    net = create_net(num_classes)
    min_loss = -10000.0
    last_epoch = -1
    
    base_net_lr = lr
    extra_layers_lr = lr
    
    params = [
            {'params': net.base_net.parameters(), 'lr': base_net_lr},
            {'params': itertools.chain(
                net.source_layer_add_ons.parameters(),
                net.extras.parameters()
            ), 'lr': extra_layers_lr},
            {'params': itertools.chain(
                net.regression_headers.parameters(),
                net.classification_headers.parameters()
            )}
        ]
    net.to(DEVICE)
    criterion = MultiboxLoss(config.priors, iou_threshold=0.5, neg_pos_ratio=3, center_variance=0.1, size_variance=0.2, device=DEVICE)
    optimizer = torch.optim.SGD(params, lr=lr, momentum=momentum, weight_decay=weight_decay)
    if scheduler == 'cosine':
        scheduler = CosineAnnealingLR(optimizer, t_max, last_epoch=last_epoch)
        
    for epoch in range(last_epoch + 1, num_epochs):
        scheduler.step()
        train(train_loader, net, criterion, optimizer,
              device=DEVICE, debug_steps=debug_steps, epoch=epoch)
        
        if epoch % validation_epochs == 0 or epoch == num_epochs - 1:
            val_loss, val_regression_loss, val_classification_loss = test(val_loader, net, criterion, DEVICE)
            logging.info(
                f"Epoch: {epoch}, " +
                f"Validation Loss: {val_loss:.4f}, " +
                f"Validation Regression Loss {val_regression_loss:.4f}, " +
                f"Validation Classification Loss: {val_classification_loss:.4f}"
            )
            # model_path = os.path.join(checkpoint_folder, f"{net}-Epoch-{epoch}-Loss-{val_loss}.pth")
            model_path = f"{net}-Epoch-{epoch}-Loss-{val_loss}.pth"
            net.save(model_path)
            logging.info(f"Saved model {model_path}")

  mode = random.choice(self.sample_options)
