In [1]:
import torch
# ---------- edit ---------- #
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# torch.set_default_tensor_type('torch.cuda.FloatTensor')
# device = torch.device('cpu')
print(device)
# -------------------------- #

In [None]:
import torch.utils.data as data
from voc import make_filepath_list, GetBBoxAndLabel, DataTransform, multiobject_collate_fn
from preprocessDataset import PreprocessVOC2012

rootpath = '/home/masakibandai/object_detection/data/VOCdevkit/VOC2012/'
train_img_list, train_anno_list, val_img_list, val_anno_list = make_filepath_list(rootpath)
voc_classes = [
    'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 
    'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
]

color_mean = (104, 117, 123)
input_size = 300
train_dataset = PreprocessVOC2012(
    train_img_list,
    train_anno_list,
    phase='train',
    transform=DataTransform(input_size, color_mean),
    get_bbox_label=GetBBoxAndLabel(voc_classes)
)

val_dataset = PreprocessVOC2012(
    val_img_list,
    val_anno_list,
    phase='val',
    transform=DataTransform(input_size, color_mean),
    get_bbox_label=GetBBoxAndLabel(voc_classes)
)

batch_size = 32

train_dataloader = data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=multiobject_collate_fn
)

val_dataloader = data.DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    collate_fn=multiobject_collate_fn
)

dataloaders_dict = {'train': train_dataloader, 'val': val_dataloader}


In [None]:
import torch.nn as nn
import torch.nn.init as init
from ssd import SSD
from torchinfo import summary

ssd_cfg = {
    'classes_num': 21,
    'input_size': 300,
    'dbox_num': [4, 6, 6, 6, 4, 4],
    'feature_maps': [38, 19, 10, 5, 3, 1],
    'steps': [8, 16, 32, 64, 100, 300],
    'min_sizes': [30, 60, 111, 162, 213, 264],
    'max_sizes': [60, 111, 162, 213, 264, 315],
    'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
}

net = SSD(phase='train', cfg=ssd_cfg)

vgg_weights = torch.load('/home/masakibandai/object_detection/weights/vgg16_reducedfc.pth')
net.vgg.load_state_dict(vgg_weights)

def weights_init(m):
    if isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0.0)

net.extras.apply(weights_init)
net.loc.apply(weights_init)
net.conf.apply(weights_init)

summary(
    net,
    input_size=(batch_size, 3, 300, 300),
    col_names=['input_size', 'output_size', 'num_params']
)

Layer (type:depth-idx)                   Input Shape               Output Shape              Param #
SSD                                      [32, 3, 300, 300]         [32, 8732, 4]             --
├─ModuleList: 1-3                        --                        --                        (recursive)
│    └─Conv2d: 2-1                       [32, 3, 300, 300]         [32, 64, 300, 300]        1,792
│    └─ReLU: 2-2                         [32, 64, 300, 300]        [32, 64, 300, 300]        --
│    └─Conv2d: 2-3                       [32, 64, 300, 300]        [32, 64, 300, 300]        36,928
│    └─ReLU: 2-4                         [32, 64, 300, 300]        [32, 64, 300, 300]        --
│    └─MaxPool2d: 2-5                    [32, 64, 300, 300]        [32, 64, 150, 150]        --
│    └─Conv2d: 2-6                       [32, 64, 150, 150]        [32, 128, 150, 150]       73,856
│    └─ReLU: 2-7                         [32, 128, 150, 150]       [32, 128, 150, 150]       --
│    └─Conv2d: 

In [None]:
import torch.optim as optim
from ssd import MultiBoxLoss

criterion = MultiBoxLoss(
    jaccard_thresh=0.5,
    neg_pos=3,
    device=device
)

optimizer = optim.SGD(
    net.parameters(),
    lr=1e-3,
    momentum=0.9,
    weight_decay=5e-4
)



In [None]:
import time
import pandas as pd

def train(net, dataloaders_dict, criterion, optimizer, num_epochs):
    '''
    Parameter:
        net(object): SSD model
        dataloaders_dict(dict of object): dataloader
        criterion(object): loss function
        optimizer(object): optimizer
        num_epochs(object): num learning
    '''

    device = torch.device('cuda:1')
    print(device)
    # ---------edit--------- #
    net = net.to(device)
    net.cuda()
    # ---------------------- #
    print('Start training with {}'.format(torch.cuda.get_device_name()))
    torch.backends.cudnn.benchmark = True
    iteration = 1
    epoch_train_loss = 0.0
    epoch_val_loss = 0.0
    logs = []

    for epoch in range(num_epochs):
        t_epoch_start = time.time()
        t_iter_start = time.time()

        print('----------------------------------------------------------------------')
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('----------------------------------------------------------------------')

        for phase in ['train', 'val']:
            if phase == 'train':
                net.train()
            else:
                if ((epoch+1) % 10 == 0):
                    net.eval()
                    print('----------------------------------------------------------------------')
                    print('(validation)')
                else:
                    continue
        
            for images, targets in dataloaders_dict[phase]:
                images = images.to(device)
                targets = [ann.to(device) for ann in targets]
                # ---------edit---------- #
                images.cuda()
                # targets.cuda()
                # ----------------------- #
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase=='train'):
                    outputs = net(images)
                    loss_l, loss_c = criterion(outputs, targets)
                    loss = loss_l + loss_c

                    if phase == 'train':
                        loss.backward()
                        nn.utils.clip_grad_value_(net.parameters(), clip_value=2.0)
                        optimizer.step()
                        
                        if (iteration % 10 == 0):
                            t_iter_finish = time.time()
                            duration = t_iter_finish - t_iter_start
                            print('Step({}) loss: {:.4f} -- time: {:.4f} sec.'.format(iteration, loss.item(), duration))
                            t_iter_start = time.time()
                        
                        epoch_train_loss += loss.item()
                        iteration += 1

                    else:
                        epoch_val_loss += loss.item()
        t_epoch_finish = time.time()
        print('----------------------------------------------------------------------')

        print('train_loss: {:.4f} - val_loss: {:.4f}'.format(epoch_train_loss, epoch_val_loss))
        print('time: {:.4f} sec.'.format(t_epoch_finish - t_epoch_start))

        t_epoch_start = time.time()
        log_epoch = {
            'epoch': epoch+1,
            'train_loss': epoch_train_loss,
            'val_loss': epoch_val_loss
        }

        logs.append(log_epoch)
        df = pd.DataFrame(logs)
        df.to_csv('/home/masakibandai/object_detection/epoch_loss.csv')
        epoch_train_loss = 0.0
        epoch_val_loss = 0.0

        if ((epoch+1) % 10 == 0):
            torch.save(
                net.state_dict(),
                '/home/masakibandai/object_detection/weights/ssd_weights' + str(epoch+1) + '.pth'
            )
            print('--saved weights--')





In [None]:
%%time
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
num_epochs = 50
train(
    net,
    dataloaders_dict,
    criterion,
    optimizer,
    num_epochs
)

cuda:1
Start training with NVIDIA GeForce RTX 3090
----------------------------------------------------------------------
Epoch 1/50
----------------------------------------------------------------------


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:1 and cuda:0! (when checking argument for argument weight in method wrapper__cudnn_convolution)

In [None]:
device

device(type='cuda', index=1)