In [1]:
import torch
import torchvision
from dataset import PASCALVOC
import transforms as T
import copy
from yoloLoss import yoloLoss
import warnings
import time

In [2]:
warnings.filterwarnings('ignore')

In [3]:
device = torch.device('cuda')

In [4]:
def get_transform(train):
    transforms = []
    if train:
        transforms.append(T.ChangeExposure(0.5))
        transforms.append(T.ChangeSaturation(0.5))
        transforms.append(T.RandomScale(0.5))
        transforms.append(T.RandomTranslation(0.5))
    transforms.append(T.Resize(448, 448))
    transforms.append(T.ToTensor())
    return T.Compose(transforms)

In [5]:
train_data = PASCALVOC('dataset/VOC2007train', get_transform(train=True))
test_data = PASCALVOC('dataset/VOC2007test', get_transform(train=False))

In [6]:
train_dataloader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True, num_workers=4)
test_dataloader = torch.utils.data.DataLoader(test_data, batch_size=64, shuffle=False, num_workers=4)

In [7]:
model = torchvision.models.resnet50(pretrained=False)

In [8]:
model.load_state_dict(torch.load('resnet50-19c8e357.pth'))

<All keys matched successfully>

In [9]:
for param in model.parameters():
    param.requires_grad = False

In [10]:
in_features = model.fc.in_features
model.fc = torch.nn.Sequential(torch.nn.Linear(in_features, 1470), 
                               torch.nn.ReLU(), 
                               torch.nn.Sigmoid())

In [11]:
model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [12]:
criterion = yoloLoss(7, 2, 5, 0.5)

In [13]:
optimizer = torch.optim.SGD(model.fc.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0005)

In [14]:
num_epochs = 136

In [15]:
print('Start training!')
best_model_wts = copy.deepcopy(model.state_dict())
best_loss = 999999999.0
for epoch in range(num_epochs):
    since = time.time()
    print('-' * 10)
    print('Epoch {0}/{1}'.format(epoch, num_epochs - 1))
    print('-' * 10)
    
    if epoch == 1:
        learning_rate = 0.01
        for param_group in optimizer.param_groups:
            param_group['lr'] = learning_rate
    if epoch == 75:
        learning_rate = 0.001
        for param_group in optimizer.param_groups:
            param_group['lr'] = learning_rate
    if epoch == 105:
        learning_rate = 0.0001
        for param_group in optimizer.param_groups:
            param_group['lr'] = learning_rate
    
    running_loss = 0.0
    model.train()
    for images, targets in train_dataloader:
        images = images.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        outputs = outputs.view(-1, 7, 7, 30)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        with torch.no_grad():
            running_loss += loss.item() * images.size(0)
    epoch_loss = running_loss / len(train_data)
    print('Train Loss: {0:.3f}'.format(epoch_loss))
    
    running_loss = 0.0
    model.eval()
    with torch.no_grad():
        for images, targets in test_dataloader:
            images = images.to(device)
            targets = targets.to(device)
            outputs = model(images)
            outputs = outputs.view(-1, 7, 7, 30)
            loss = criterion(outputs, targets)
            running_loss += loss.item() * images.size(0)
        epoch_loss = running_loss / len(test_data)
        print('Test Loss: {0:.3f}'.format(epoch_loss))
        if best_loss > epoch_loss:
            best_loss = epoch_loss
            best_model_wts = copy.deepcopy(model.state_dict())
    endd = time.time()
    print('Train & Test Time: {0:.3f}'.format(endd - since))
print('Train complete!')
print('Best loss: {0:.3f}'.format(best_loss))
model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), 'YOLOv1_Resnet50.pth')

Start training!
----------
Epoch 0/135
----------
Train Loss: 13.705
Test Loss: 9.751
Train & Test Time: 108.482
----------
Epoch 1/135
----------
Train Loss: 9.810
Test Loss: 9.252
Train & Test Time: 110.005
----------
Epoch 2/135
----------
Train Loss: 9.387
Test Loss: 8.909
Train & Test Time: 107.472
----------
Epoch 3/135
----------


KeyboardInterrupt: 