In [1]:
import os
import random
import shutil
import time
import warnings

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets

from models import PNet
from dataset import FaceDataset
from train_utils import *

from config import args

In [2]:
normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5],
                                 std=[0.5, 0.5, 0.5])

train_dataset = FaceDataset(
    args.data,
    transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ]))


train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=args.batch_size, shuffle=True,
    num_workers=args.workers, pin_memory=True)

In [3]:
model = PNet()

if args.gpu is not None:
    model = model.cuda()
    
optimizer = torch.optim.SGD(model.parameters(), args.lr)
cudnn.benchmark = True

criterion = nn.CrossEntropyLoss().cuda(args.gpu)

In [4]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, labels, reg) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if args.gpu is not None:
            input = input.cuda(args.gpu)
            reg = reg.cuda(args.gpu)
            labels = labels.cuda(args.gpu)

        # compute output
        logits, prob, reg = model(input)
        
        loss = criterion(logits.view(-1, 2), labels)

        
        
        # measure accuracy and record loss
        prec1 = accuracy(logits.view(-1, 2), labels, topk=(1,))
        losses.update(loss.item(), input.shape[0])
        top1.update(prec1[0], input.shape[0])

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        
        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1_val:.3f} ({top1_avg:.3f})'.format(
                   epoch, i, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses, top1_val=top1.val.item(), top1_avg=top1.avg.item()))
    return top1.avg.item()

In [5]:
best_prec = 0

for epoch in range(args.epochs):
    adjust_learning_rate(optimizer, epoch, args.lr)

    # train for one epoch
    prec = train(train_loader, model, criterion, optimizer, epoch)
    
    save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'best_prec': best_prec,
        }, prec > best_prec)
    
    best_prec = max(prec, best_prec)

Epoch: [0][0/3121]	Time 1.609 (1.609)	Data 0.383 (0.383)	Loss 0.7328 (0.7328)	Prec@1 24.609 (24.609)


Process Process-4:
Process Process-1:
Process Process-3:
Process Process-2:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/b.lestsov/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/b.lestsov/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/b.lestsov/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/b.lestsov/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/b.lestsov/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/b.lestsov/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/b.lestsov/anaconda3/lib/pyth

Epoch: [0][200/3121]	Time 0.003 (0.018)	Data 0.000 (0.008)	Loss 0.5325 (0.5847)	Prec@1 75.781 (71.560)
Traceback (most recent call last):
  File "/home/b.lestsov/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-5-19ba260d3e89>", line 7, in <module>
    prec = train(train_loader, model, criterion, optimizer, epoch)
  File "<ipython-input-4-e4791a1f4c45>", line 12, in train
    for i, (input, labels, reg) in enumerate(train_loader):
  File "/home/b.lestsov/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 280, in __next__
    idx, batch = self._get_batch()
  File "/home/b.lestsov/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 259, in _get_batch
    return self.data_queue.get()
  File "/home/b.lestsov/anaconda3/lib/python3.6/queue.py", line 164, in get
    self.not_empty.wait()
  File "/home/b.lestsov/anaconda3/l

KeyboardInterrupt: 

In [7]:
# TODO:
# 1) BBOX regression with "part" images (label == -1)
# 2) Validation set
# 3) Tune the parameters
# 4) Augmentations