In [16]:
import init_paths
import argparse
import os
import time
import sys
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torch.optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.autograd import Variable
import math
import numpy as np
from utils import *
from validation import validate
#import torchvision.models as models
import models
from models.imagenet_resnet import BasicBlock, Bottleneck
from multiprocessing import Pool
#from torchvision.models.resnet import BasicBlock, Bottleneck
import pdb
import wandb

from apex import amp
import copy

os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'

In [23]:
# Create the model
arch = 'resnet50'
print("=> creating model '{}'".format(arch))
model = models.__dict__[arch]()
numberofclass = 1000

def init_dist_weights(model):
    for m in model.modules():
        if isinstance(m, BasicBlock):
            m.bn2.weight = nn.Parameter(torch.zeros_like(m.bn2.weight))
        if isinstance(m, Bottleneck):
            m.bn3.weight = nn.Parameter(torch.zeros_like(m.bn3.weight))
        if isinstance(m, nn.Linear):
            m.weight.data.normal_(0, 0.01)

init_dist_weights(model)
# Wrap the model into DataParallel
model.cuda()
model = torch.nn.DataParallel(model)

valdir = os.path.join('/workspace/dataset/ILSVRC2012/val')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

val_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(valdir, transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
    ])),
    batch_size=512, shuffle=False,
    num_workers=10, pin_memory=True)


# # Initiate data loaders
# data = '/workspace/dataset/ILSVRC2012-sz/352'
# crop_size = 224
# min_scale = 0.087
# batch_size = 448
# workers = 10
# traindir = os.path.join(data, 'train')
# valdir = os.path.join(data, 'val')

# train_transform = transforms.Compose([
#     transforms.RandomResizedCrop(crop_size, scale=(min_scale, 1.0)),
#     transforms.RandomHorizontalFlip(),
#     transforms.ToTensor()
# ])

# test_transform = transforms.Compose([
#     transforms.Resize(256),
#     transforms.CenterCrop(224),
#     transforms.ToTensor(),
# ])

# train_dataset = datasets.ImageFolder(traindir, train_transform)

# train_loader = torch.utils.data.DataLoader(train_dataset,
#                                            batch_size=batch_size,
#                                            shuffle=True,
#                                            num_workers=workers,
#                                            pin_memory=True,
#                                            sampler=None,
#                                            drop_last=True)

# val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(valdir, test_transform),
#                                          batch_size=batch_size,
#                                          shuffle=False,
#                                          num_workers=workers,
#                                          pin_memory=True,
#                                          drop_last=False)


=> creating model 'resnet50'


In [37]:
resume = ''
if os.path.isfile(resume):
    print("=> loading checkpoint '{}'".format(resume))
    checkpoint = torch.load(resume)
    start_epoch = checkpoint['epoch']
    best_prec1 = checkpoint['best_prec1']
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    print("=> loaded checkpoint '{}' (epoch {})".format(resume,
                                                        checkpoint['epoch']))
else:
    print("=> no checkpoint found at '{}'".format(resume))

=> loading checkpoint 'ResNet50_Baseline_23.68'
=> loaded checkpoint 'ResNet50_Baseline_23.68'
DataParallel(
  (module): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_

In [22]:
best_prec1

tensor(77.1940, device='cuda:0')

In [25]:
for exp in range(0,11):
    expname = 'cutmix_d' + str(exp/10)
    resume = 'trained_models/' + expname + '_phase1/model_best.pth.tar'
    if os.path.isfile(resume):
        print("=> loading checkpoint '{}'".format(resume))
        checkpoint = torch.load(resume)
        start_epoch = checkpoint['epoch']
        best_prec1 = checkpoint['best_prec1']
        model.load_state_dict(checkpoint['state_dict'])
#         optimizer.load_state_dict(checkpoint['optimizer'])
        print("=> loaded checkpoint '{}' (epoch {})".format(resume,
                                                            checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(resume))

    print('the number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()])))
    print('best prec1: {}'.format(best_prec1))

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    cudnn.benchmark = True
    correct_classes = torch.zeros(numberofclass).cuda()
    number_classes = torch.zeros(numberofclass).cuda()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        target = target.cuda()

        output = model(input)
        output = output.data
        topk=(1,)

        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        for c, n in zip(correct.reshape(-1).int(), target):
            correct_classes[n] += c
            number_classes[n] += 1

        utils.progress_bar(i, len(val_loader), 'eval')
    np.savetxt(expname + '_train.csv', np.transpose(correct_classes.cpu()), delimiter=',')
    np.savetxt(expname + '_train_number_classes.csv', np.transpose(number_classes.cpu()), delimiter=',')

=> loading checkpoint 'trained_models/cutmix_d0.0_phase1/model_best.pth.tar'
=> loaded checkpoint 'trained_models/cutmix_d0.0_phase1/model_best.pth.tar' (epoch 100)
the number of model parameters: 25557032


KeyboardInterrupt: 

In [15]:
output.shape, target.shape

(torch.Size([256, 1000]), torch.Size([256]))

In [14]:
import numpy as np
numberofclass = 1000
correct_classes = torch.empty(numberofclass, 2)

In [19]:
correct_classes[500][1]

tensor(1.8177e+31)