In [1]:
import os
import random
import shutil
import time
import warnings

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.multiprocessing as mp
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from torch.autograd import Function, Variable

from PIL import Image

import numpy as np
import math
import matplotlib.pyplot as plt

import cv2

import io
import requests
from torch.nn import functional as F
import pdb

In [2]:
pretrained = False
arch = 'resnet18'
distributed = False
lr = 0.1
momentum = 0.9
weight_decay = 1e-3
resume = None
data = '/home/yzy/Desktop/SIBS/SIBS/images/resized_baoxing'
batch_size = 64
workers = 4
evaluate = False
start_epoch = 0
epochs = 100
print_freq = 50
label_dir = '/home/yzy/Desktop/biaoxing'
gpu = None

In [3]:
best_acc1 = 0;

names = []

# my model to read jpg
def default_loader(path):
    try:
        img = Image.open(path)
        return img.convert('RGB')
    except:
        print("Can not open {0}".format(path))
        
class myDataset(torch.utils.data.DataLoader):
    def __init__(self,img_dir, img_txt, transform = None,loader = default_loader):
        global names
        img_list = []
        img_labels = []
        names = []
        
        fp = open(img_txt,'r')
        for line in fp.readlines():
            mylist = line.split()
            if os.path.exists(os.path.join(img_dir, mylist[0] + '.jpg')):
                img_list.append(mylist[0] + '.jpg')
                names.append(mylist[0])
                # update: normalize the age label
                img_labels.append(int(mylist[1]) - 1)
        self.imgs = [os.path.join(img_dir, file) for file in img_list]
        self.labels = img_labels
        self.transform = transform 
        self.loader = loader 
        
    def __len__(self):
        return len(self.imgs)
    
    def __getitem__(self,index):
        img_path = self.imgs[index]
        label = torch.from_numpy(np.array(self.labels[index],dtype=np.int64))
        img = self.loader(img_path)
        if self.transform is not None:
            try:
                img = self.transform(img)
            except:
                print('Cannot transform image: {}'.format(img_path))
        return img,label

def main_func():
    global best_acc1
    
    # create model
    if pretrained:
        print("=> using pre-trained model '{}'".format(arch))
        model = models.__dict__[arch](num_classes = 2, pretrained = True)
    else:
        print("=> creating model '{}'".format(arch))
        model = models.__dict__[arch](num_classes = 2)
    
    if distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        model.cuda()
        # DistributedDataParallel will divide and allocate batch_size to all
        # available GPUs if device_ids are not set
        model = torch.nn.parallel.DistributedDataParallel(model)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if arch.startswith('alexnet') or arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()
            
    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    # criterion = nn.SmoothL1Loss().cuda()
    optimizer = torch.optim.SGD(model.parameters(), lr,
                                momentum = momentum,
                                weight_decay = weight_decay)
    
    if resume:
        if os.path.isfile(resume):
            print("=> loading checkpoint '{}'".format(resume))
            checkpoint = torch.load(resume)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(resume))

    cudnn.benchmark = True
    
    # Data loading code
    traindir = data
    valdir = data
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    
    
    train_dataset = myDataset(img_dir = traindir, 
                              img_txt = label_dir + '/biaoxing_train.txt',
                              transform = transforms.Compose([
                                    #transforms.RandomCrop((1500, 1000)),
                                    #transforms.RandomHorizontalFlip(),
                                    transforms.ToTensor(),
                                    normalize,
                            ]))

    if distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size = batch_size, shuffle = (train_sampler is None),
        num_workers = workers, pin_memory = True, sampler = train_sampler)

    
    val_loader = torch.utils.data.DataLoader(
        myDataset(valdir, label_dir + '/biaoxing_test.txt', transforms.Compose([
            #transforms.Resize(256),
            #transforms.RandomCrop((1500, 1000)),
            transforms.ToTensor(),
            normalize,
        ])),
        batch_size = batch_size, shuffle = False,
        num_workers = workers, pin_memory = True)
    
    if evaluate:
        ret = validate(val_loader, model, criterion)
        return

    for epoch in range(start_epoch, epochs):
        if distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        ret = train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        acc1, ret = validate(val_loader, model, criterion)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        save_checkpoint(model, {
            'epoch': epoch + 1,
            'arch': arch,
            'state_dict': model.state_dict(),
            'best_acc1': best_acc1,
            'optimizer' : optimizer.state_dict(),
        }, is_best)
        
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(train_loader),
        [batch_time, data_time, losses, top1, top5],
        prefix = "Epoch: [{}]".format(epoch))

    # switch to train mode
    model.train()
    
    ret = 0

    end = time.time()
    for i, (images, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        target = target.cuda(None, non_blocking = True)

        # compute output
        output = model(images)
        loss = criterion(output, target)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target, topk = (1, 2))
        losses.update(loss.item(), images.size(0))
        top1.update(acc1[0], images.size(0))
        top5.update(acc5[0], images.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            progress.display(i)
            
    return ret
            
def validate(val_loader, model, criterion):
    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(val_loader),
        [batch_time, losses, top1, top5],
        prefix = 'Test: ')

    # switch to evaluate mode
    model.eval()
    
    ret = 0

    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            target = target.cuda(None, non_blocking = True)

            # compute output
            output = model(images)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk = (1, 2))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % print_freq == 0:
                progress.display(i)
                
    return top1.avg, ret

def save_checkpoint(model, state, is_best, filename = 'checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')
        torch.save(model.state_dict(), 'model_state.pth.tar')
        torch.save(model, 'model.pth.tar')
        
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n = 1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)
    

class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix = ""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'
    
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr_update = lr * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr_update


def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [4]:
main_func()

=> creating model 'resnet18'
Epoch: [0][ 0/45]	Time 23.281 (23.281)	Data  1.017 ( 1.017)	Loss 6.4125e-01 (6.4125e-01)	Acc@1  67.19 ( 67.19)	Acc@5 100.00 (100.00)
Test: [0/1]	Time  1.917 ( 1.917)	Loss 6.1918e-01 (6.1918e-01)	Acc@1  68.33 ( 68.33)	Acc@5 100.00 (100.00)
Epoch: [1][ 0/45]	Time  1.877 ( 1.877)	Data  1.496 ( 1.496)	Loss 6.1457e-01 (6.1457e-01)	Acc@1  67.19 ( 67.19)	Acc@5 100.00 (100.00)
Test: [0/1]	Time  1.441 ( 1.441)	Loss 6.7336e-01 (6.7336e-01)	Acc@1  60.00 ( 60.00)	Acc@5 100.00 (100.00)
Epoch: [2][ 0/45]	Time  1.574 ( 1.574)	Data  1.437 ( 1.437)	Loss 6.3857e-01 (6.3857e-01)	Acc@1  67.19 ( 67.19)	Acc@5 100.00 (100.00)
Test: [0/1]	Time  1.483 ( 1.483)	Loss 3.8508e+00 (3.8508e+00)	Acc@1  68.33 ( 68.33)	Acc@5 100.00 (100.00)
Epoch: [3][ 0/45]	Time  1.657 ( 1.657)	Data  1.515 ( 1.515)	Loss 4.5745e-01 (4.5745e-01)	Acc@1  79.69 ( 79.69)	Acc@5 100.00 (100.00)
Test: [0/1]	Time  1.463 ( 1.463)	Loss 3.9633e-01 (3.9633e-01)	Acc@1  83.33 ( 83.33)	Acc@5 100.00 (100.00)
Epoch: [4][ 0/4

Test: [0/1]	Time  1.555 ( 1.555)	Loss 7.0253e-02 (7.0253e-02)	Acc@1  96.67 ( 96.67)	Acc@5 100.00 (100.00)
Epoch: [35][ 0/45]	Time  1.625 ( 1.625)	Data  1.450 ( 1.450)	Loss 4.5821e-03 (4.5821e-03)	Acc@1 100.00 (100.00)	Acc@5 100.00 (100.00)
Test: [0/1]	Time  1.482 ( 1.482)	Loss 1.0248e-01 (1.0248e-01)	Acc@1  95.00 ( 95.00)	Acc@5 100.00 (100.00)
Epoch: [36][ 0/45]	Time  1.615 ( 1.615)	Data  1.474 ( 1.474)	Loss 1.6066e-03 (1.6066e-03)	Acc@1 100.00 (100.00)	Acc@5 100.00 (100.00)
Test: [0/1]	Time  1.445 ( 1.445)	Loss 7.7543e-02 (7.7543e-02)	Acc@1  96.67 ( 96.67)	Acc@5 100.00 (100.00)
Epoch: [37][ 0/45]	Time  1.721 ( 1.721)	Data  1.598 ( 1.598)	Loss 3.5670e-03 (3.5670e-03)	Acc@1 100.00 (100.00)	Acc@5 100.00 (100.00)
Test: [0/1]	Time  1.495 ( 1.495)	Loss 7.9165e-02 (7.9165e-02)	Acc@1  96.67 ( 96.67)	Acc@5 100.00 (100.00)
Epoch: [38][ 0/45]	Time  1.653 ( 1.653)	Data  1.504 ( 1.504)	Loss 1.5228e-03 (1.5228e-03)	Acc@1 100.00 (100.00)	Acc@5 100.00 (100.00)
Test: [0/1]	Time  1.440 ( 1.440)	Loss 1.

Epoch: [69][ 0/45]	Time  1.795 ( 1.795)	Data  1.631 ( 1.631)	Loss 1.9908e-02 (1.9908e-02)	Acc@1 100.00 (100.00)	Acc@5 100.00 (100.00)
Test: [0/1]	Time  1.616 ( 1.616)	Loss 3.2986e-02 (3.2986e-02)	Acc@1  98.33 ( 98.33)	Acc@5 100.00 (100.00)
Epoch: [70][ 0/45]	Time  1.638 ( 1.638)	Data  1.485 ( 1.485)	Loss 4.1335e-03 (4.1335e-03)	Acc@1 100.00 (100.00)	Acc@5 100.00 (100.00)
Test: [0/1]	Time  1.559 ( 1.559)	Loss 3.3368e-02 (3.3368e-02)	Acc@1  98.33 ( 98.33)	Acc@5 100.00 (100.00)
Epoch: [71][ 0/45]	Time  1.662 ( 1.662)	Data  1.519 ( 1.519)	Loss 1.6895e-03 (1.6895e-03)	Acc@1 100.00 (100.00)	Acc@5 100.00 (100.00)
Test: [0/1]	Time  1.588 ( 1.588)	Loss 5.2478e-02 (5.2478e-02)	Acc@1  96.67 ( 96.67)	Acc@5 100.00 (100.00)
Epoch: [72][ 0/45]	Time  1.768 ( 1.768)	Data  1.605 ( 1.605)	Loss 4.1003e-04 (4.1003e-04)	Acc@1 100.00 (100.00)	Acc@5 100.00 (100.00)
Test: [0/1]	Time  1.580 ( 1.580)	Loss 3.7763e-02 (3.7763e-02)	Acc@1  98.33 ( 98.33)	Acc@5 100.00 (100.00)
Epoch: [73][ 0/45]	Time  1.789 ( 1.789)	