In [None]:
import os

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
import torchvision

from tqdm import tqdm
from tensorboardX import SummaryWriter

from dataset import voc
from retinanet import model


In [None]:
# consts
tag = 'debug'
split_name = 'voc-1'
root_path = '/home/voyager/data/root/voc/'

device_name = 'cuda'
batch_size = 4
epochs = 30
depth = 50
lr = 1e-5
patience = 3
image_size=512

# info and deps
now = datetime.now()

result_path = './{}_{}_{}'.format(
    tag,
    split_name,
    now.strftime('%Y%m%d_%H%M%S')
)

summary_writer = SummaryWriter(result_path)

In [None]:
# data loader

# train

# TODO : transforms for train
train_trans = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor()
])

train_set = voc.VOCDetection(
    root_path,
    image_set="{}_train".format(split_name),
    transforms=train_trans
)

train_loader = DataLoader(
    train_set,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=voc.collate
)

# val

# TODO : transforms for val
val_trans = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor()
])

val_set = voc.VOCDetection(
    root_path,
    image_set="{}_val".format(split_name),
    transforms=val_trans
)

val_loader = DataLoader(
    val_set,
    batch_size=batch_size,
    shuffle=False,
    collate_fn=voc.collate
)


In [None]:
# model, loss and optimizer

device = torch.device(device_name)

if depth == 34:
    net = model.resnet34(train_set.num_classes(), pretrained=True)
elif depth == 50:
    net = model.resnet50(train_set.num_classes(), pretrained=True)
elif depth == 101:
    net = model.resnet101(train_set.num_classes(), pretrained=True)
elif depth == 152
    net = model.resnet12(train_set.num_classes(), pretrained=True)
    
net = net.to(device)
net = torch.nn.DataParallel(net).to(device)
net.training = True

optimizer = optim.Adam(net.parameters(), lr=lr)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    patience=patience,
    verbose=True
)

net.train()
net.module.freezee_bn()


In [None]:
# train-val loop
for epoch in range(epochs):
    net.train()
    net.module.freeze_bn()
    
    epoch_loss = []
    for i, data in enumerate(train_loader):
        try:
            optimizer.zero_grad()
            
            classification_loss, regression_loss = retinanet([
                data['imgs'].to(device),
                data['annos']
            ])
            
            classification_loss = classification_loss.mean()
            regression_loss = regression_loss.mean()
            
            loss = classification_loss + regression_loss
            epoch_loss.append(loss.item())
            
            if loss == 0:
                continue
                
            loss.backward()
            
            torch.nn.utils.clip_grad_norm_(net.parameters(), 0.1)
            
            optimizer.step()
            
            # TODO : log
            
            del classification_loss
            del regression_loss
        except Exception as e:
            print(e)
            break
            
    for i, data in enumerate(val_loader):
        # TODO : val
        pass
    
    scheduler.step(np.mean(epoch_loss))
    
    # TODO : save checkpoint
    
    # TODO : write summary for tensorboardX
    
    