In [1]:
import torch
# ---------- edit ---------- #
print(torch.cuda.get_device_name(torch.device('cuda')))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# torch.set_default_tensor_type('torch.cuda.FloatTensor')
# device = torch.device('cpu')
print(device)
# -------------------------- #

NVIDIA GeForce RTX 4090
cuda


In [2]:
import torch.utils.data as data
from voc import make_filepath_list, GetBBoxAndLabel, DataTransform, multiobject_collate_fn
from preprocessDataset import PreprocessVOC2012

# rootpath = '/home/masakibandai/object_detection/data/VOCdevkit/VOC2012/'
rootpath = '/Users/ShimaSef/object_detection/data/VOCdevkit/VOC2012/'
train_img_list, train_anno_list, val_img_list, val_anno_list = make_filepath_list(rootpath)
voc_classes = [
    'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 
    'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
]

color_mean = (104, 117, 123)
input_size = 300
train_dataset = PreprocessVOC2012(
    train_img_list,
    train_anno_list,
    phase='train',
    transform=DataTransform(input_size, color_mean),
    get_bbox_label=GetBBoxAndLabel(voc_classes)
)

val_dataset = PreprocessVOC2012(
    val_img_list,
    val_anno_list,
    phase='val',
    transform=DataTransform(input_size, color_mean),
    get_bbox_label=GetBBoxAndLabel(voc_classes)
)

batch_size = 32

train_dataloader = data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=multiobject_collate_fn
)

val_dataloader = data.DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    collate_fn=multiobject_collate_fn
)

dataloaders_dict = {'train': train_dataloader, 'val': val_dataloader}


In [3]:
import torch.nn as nn
import torch.nn.init as init
from ssd import SSD
from torchinfo import summary

ssd_cfg = {
    'classes_num': 21,
    'input_size': 300,
    'dbox_num': [4, 6, 6, 6, 4, 4],
    'feature_maps': [38, 19, 10, 5, 3, 1],
    'steps': [8, 16, 32, 64, 100, 300],
    'min_sizes': [30, 60, 111, 162, 213, 264],
    'max_sizes': [60, 111, 162, 213, 264, 315],
    'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
}

net = SSD(phase='train', cfg=ssd_cfg)
# weightpath = '/home/masakibandai/object_detection/weights/vgg16_reducedfc.pth'
weightpath = '/Users/ShimaSef/object_detection/weights/vgg16_reducedfc.pth'
vgg_weights = torch.load(weightpath)
net.vgg.load_state_dict(vgg_weights)

def weights_init(m):
    if isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0.0)

net.extras.apply(weights_init)
net.loc.apply(weights_init)
net.conf.apply(weights_init)

summary(
    net,
    input_size=(batch_size, 3, 300, 300),
    col_names=['input_size', 'output_size', 'num_params']
)

Layer (type:depth-idx)                   Input Shape               Output Shape              Param #
SSD                                      [32, 3, 300, 300]         [32, 8732, 4]             --
├─ModuleList: 1-3                        --                        --                        (recursive)
│    └─Conv2d: 2-1                       [32, 3, 300, 300]         [32, 64, 300, 300]        1,792
│    └─ReLU: 2-2                         [32, 64, 300, 300]        [32, 64, 300, 300]        --
│    └─Conv2d: 2-3                       [32, 64, 300, 300]        [32, 64, 300, 300]        36,928
│    └─ReLU: 2-4                         [32, 64, 300, 300]        [32, 64, 300, 300]        --
│    └─MaxPool2d: 2-5                    [32, 64, 300, 300]        [32, 64, 150, 150]        --
│    └─Conv2d: 2-6                       [32, 64, 150, 150]        [32, 128, 150, 150]       73,856
│    └─ReLU: 2-7                         [32, 128, 150, 150]       [32, 128, 150, 150]       --
│    └─Conv2d: 

In [4]:
import torch.optim as optim
from ssd import MultiBoxLoss

criterion = MultiBoxLoss(
    jaccard_thresh=0.5,
    neg_pos=3,
    device=device
)

# ---------- edit ---------- #
optimizer = optim.Adam(
    net.parameters(),
    lr=1e-3,
    # momentum=0.9,
    betas=(0.9, 0.999),
    eps=1e-08,
    weight_decay=5e-4
)
# -------------------------- #


In [5]:
import time
import pandas as pd
import os

def train(net, dataloaders_dict, criterion, optimizer, num_epochs):
    '''
    Parameter:
        net(object): SSD model
        dataloaders_dict(dict of object): dataloader
        criterion(object): loss function
        optimizer(object): optimizer
        num_epochs(object): num learning
    '''
    print(device)
    # ---------edit--------- #
    net.to(device)
    # net.cuda()
    # ---------------------- #
    print('Start training with {}'.format(torch.cuda.get_device_name()))
    torch.backends.cudnn.benchmark = True
    iteration = 1
    epoch_train_loss = 0.0
    epoch_val_loss = 0.0
    logs = []

    for epoch in range(num_epochs):
        t_epoch_start = time.time()
        t_iter_start = time.time()

        print('----------------------------------------------------------------------')
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('----------------------------------------------------------------------')

        for phase in ['train', 'val']:
            if phase == 'train':
                net.train()
            else:
                if ((epoch+1) % 10 == 0):
                    net.eval()
                    print('----------------------------------------------------------------------')
                    print('(validation)')
                else:
                    continue
        
            for images, targets in dataloaders_dict[phase]:
                images = images.to(device)
                targets = [ann.to(device) for ann in targets]
                # ---------edit---------- #
                # images.cuda()
                # targets.cuda()
                # ----------------------- #
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase=='train'):
                    outputs = net(images)
                    loss_l, loss_c = criterion(outputs, targets)
                    loss = loss_l + loss_c

                    if phase == 'train':
                        loss.backward()
                        nn.utils.clip_grad_value_(net.parameters(), clip_value=2.0)
                        optimizer.step()
                        
                        if (iteration % 10 == 0):
                            t_iter_finish = time.time()
                            duration = t_iter_finish - t_iter_start
                            print('Step({}) loss: {:.4f} -- time: {:.4f} sec.'.format(iteration, loss.item(), duration))
                            t_iter_start = time.time()
                        
                        epoch_train_loss += loss.item()
                        iteration += 1

                    else:
                        epoch_val_loss += loss.item()
        t_epoch_finish = time.time()
        print('----------------------------------------------------------------------')

        print('train_loss: {:.4f} - val_loss: {:.4f}'.format(epoch_train_loss, epoch_val_loss))
        print('time: {:.4f} sec.'.format(t_epoch_finish - t_epoch_start))
        print('time_left: {:.4f} min'.format((t_epoch_finish-t_epoch_start)*(num_epochs-epoch-1)/60))

        t_epoch_start = time.time()
        log_epoch = {
            'epoch': epoch+1,
            'train_loss': epoch_train_loss,
            'val_loss': epoch_val_loss
        }

        logs.append(log_epoch)
        df = pd.DataFrame(logs)
        # ---------- edit ---------- #
        # csvpath = '/home/masakibandai/object_detection/epoch_loss.csv'
        if epoch == 0:
            csvidx = os.listdir('loss_csv')
            csvidx = [int(os.path.splitext(i)[0].split('_')[2]) for i in csvidx]
            csvpath = '/Users/ShimaSef/object_detection/loss_csv/epoch_loss_{}.csv'.format(max(csvidx)+1)
        # -------------------------- #
        df.to_csv(csvpath)
        epoch_train_loss = 0.0
        epoch_val_loss = 0.0
        
        # ---------- edit ---------- #
        # statedictpath = '/home/masakibandai/object_detection/weights/ssd_weights'
        if epoch == 0:
            weightidx = os.listdir('weights')
            weightidx = [int(f.split('_')[2]) for f in weightidx if os.path.isdir(os.path.join('weights', f))]
            if not os.path.exists('/Users/ShimaSef/object_detection/weights/ssd_weights_{}/'.format(max(weightidx)+1)):
                os.mkdir('/Users/ShimaSef/object_detection/weights/ssd_weights_{}/'.format(max(weightidx)+1))
            statedictpath = '/Users/ShimaSef/object_detection/weights/ssd_weights_{}/ssd_weights_'.format(max(weightidx)+1)
        # -------------------------- #

        if ((epoch+1) % 10 == 0):
            torch.save(
                net.state_dict(),
                 statedictpath + str(epoch+1) + '.pth'
            )
            print('--saved weights--')





In [6]:
%%time
import os
# os.environ['CUDA_LAUNCH_BLOCKING'] = "0"
num_epochs = 100
train(
    net,
    dataloaders_dict,
    criterion,
    optimizer,
    num_epochs
)

cuda
Start training with NVIDIA GeForce RTX 4090
----------------------------------------------------------------------
Epoch 1/100
----------------------------------------------------------------------
Step(10) loss: 14.6157 -- time: 11.7751 sec.
Step(20) loss: 49.1457 -- time: 5.1677 sec.
Step(30) loss: 15.1407 -- time: 5.1734 sec.
Step(40) loss: 13.0464 -- time: 5.0840 sec.
Step(50) loss: 9.5318 -- time: 5.1928 sec.
Step(60) loss: 12.0531 -- time: 5.0445 sec.
Step(70) loss: 10.4175 -- time: 5.1474 sec.
Step(80) loss: 7.7630 -- time: 5.1808 sec.
Step(90) loss: 10.3285 -- time: 5.0385 sec.
Step(100) loss: 8.2770 -- time: 5.0868 sec.
Step(110) loss: 10.6653 -- time: 5.1570 sec.
Step(120) loss: 8.0794 -- time: 5.1786 sec.
Step(130) loss: 7.8079 -- time: 5.2341 sec.
Step(140) loss: 11.0748 -- time: 4.9170 sec.
Step(150) loss: 8.2388 -- time: 5.2333 sec.
Step(160) loss: 10.0891 -- time: 5.1455 sec.
Step(170) loss: 25.9709 -- time: 5.0954 sec.
----------------------------------------------

In [7]:
# import os
# os.environ['CUDA_LAUNCH_BLOCKING'] = "0"
# print(os.environ['CUDA_LAUNCH_BLOCKING'])
# csvidx = os.listdir('loss_csv')
# print(csvidx)
# csvidx = [int(os.path.splitext(i)[0].split('_')[2]) for i in csvidx]
# csvpath = '/Users/ShimaSef/object_detection/loss_csv/epoch_loss_{}.csv'.format(max(csvidx)+1)
# print(csvpath)
# weightidx = os.listdir('weights')
# weightidx = [int(f.split('_')[2]) for f in weightidx if os.path.isdir(os.path.join('weights', f))]
# if not os.path.exists('/Users/ShimaSef/object_detection/weights/ssd_weights_{}/'.format(max(weightidx)+1)):
#   os.mkdir('/Users/ShimaSef/object_detection/weights/ssd_weights_{}/'.format(max(weightidx)+1))
# max(csvidx)
# print(csvidx)
# weightidx