In [1]:
import torch
# torch.backends.cudnn.benchmark=False

In [2]:
import os
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  
os.environ["CUDA_VISIBLE_DEVICES"]=""

In [3]:
from torchvision.models import resnet18

mpath = "/gpfs/gpfs0/y.gusak/musco_models/resnet18_imagenet/grschedule1/tucker2_vbmf_wf0.8/iter_1-0/beforeft.pth"
spath = "/gpfs/gpfs0/y.gusak/musco_models/resnet18_imagenet/grschedule1/tucker2_vbmf_wf0.8/iter_1-0/best.pth.tar"

mym = torch.load(mpath)
mym.load_state_dict(torch.load(spath, map_location = 'cpu')['state_dict'])


m = resnet18(pretrained = True)


In [5]:
device = 'cpu'
x = torch.randn(1, 3, 224, 224).to(device)

mym.to(device)
m.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

In [8]:
%timeit  -o -n 20 -r 10 m(x)

99.7 ms ± 498 µs per loop (mean ± std. dev. of 10 runs, 20 loops each)


<TimeitResult : 99.7 ms ± 498 µs per loop (mean ± std. dev. of 10 runs, 20 loops each)>

In [9]:
%timeit  -o -n 20 -r 10 mym(x)

102 ms ± 93.6 µs per loop (mean ± std. dev. of 10 runs, 20 loops each)


<TimeitResult : 102 ms ± 93.6 µs per loop (mean ± std. dev. of 10 runs, 20 loops each)>

In [7]:
10.2/8.16

1.25

In [4]:
from collections import OrderedDict, defaultdict

import torch
import numpy as np

import sys
sys.path.append('../../ReducedOrderNet/src')
from models.dcp.pruned_resnet import PrunedResNet

sys.path.append('../')
from flopco import FlopCo
import dataloaders

from mobilenetv2 import MobileNetV2


def count_params(model):
    params = 0
    for p in model.parameters():
        params += p.numel()
        
    return params

## DCP models

In [5]:
def load_dcpmodel(initialdcpmodels_path, depth, pruning_rate):

    model = PrunedResNet(depth=depth, pruning_rate = pruning_rate, num_classes=1000)
    model_weights_path = "{}/resnet{}_pruned{}.pth".format(initialdcpmodels_path, depth, pruning_rate)                     

    model.eval()
    model_dict = model.state_dict() 

    pretrained_dict = torch.load(model_weights_path, map_location = 'cpu')

    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)
    
    return model

def get_dcp_flops_params(initialdcpmodels_path):
    
    flops = defaultdict()
    params = defaultdict()
    
    
    for depth in [18, 50]:
        for pruning_rate in [0.3, 0.5, 0.7]:
            model_name = 'resnet{}_pruned{}'.format(depth, pruning_rate)
            
            m = load_dcpmodel(initialdcpmodels_path, depth, pruning_rate)
            
            flopco_m = FlopCo(m)
            flops[model_name] = flopco_m.total_flops
            params[model_name] = count_params(m)
            
    
    return flops, params   

## MobileNet models

In [6]:
def load_mbmodel(mb_profile, mb_path, path_to_mbnet):
    mbnet = MobileNetV2(profile = mb_profile)

    model_dict = mbnet.state_dict()
    pretrained_dict = torch.load('{}/{}'.format(path_to_mbnet, mb_path),
                                 map_location='cpu')['state_dict']
    pretrained_dict = {k.strip('.module') : v for k,v in pretrained_dict.items()}
    model_dict.update(pretrained_dict)

    mbnet.load_state_dict(model_dict)
    
    return mbnet



def get_mb_flops_params(mb_profiles, mb_paths, path_to_mbnet):
    mb_flops = defaultdict()
    mb_params = defaultdict()
    
    
    for mb_profile, mb_path in zip(mb_profiles[::-1], mb_pathes[::-1]):

        mbnet = load_mbmodel(mb_profile, mb_path, path_to_mbnet)
        flopco_mbnet = FlopCo(model=mbnet)
        
        mb_flops[mb_profile] = flopco_mbnet.total_flops
        mb_params[mb_profile] = count_params(mbnet)
        
        

    return mb_flops, mb_params 

## Compute Flops

In [7]:
def get_baseline_flops_params(model_names, initialmodels_path):
    flops = defaultdict()
    params = defaultdict()
    
    
    for model_name in model_names:
        m = torch.load('{}/{}.pth'.format(initialmodels_path, model_name))
        
        flopco_m = FlopCo(m)
        flops[model_name] = flopco_m.total_flops
        params[model_name] = count_params(m)
        
        
    return flops, params

      

In [8]:
initialdcpmodels_path = "/gpfs/gpfs0/y.gusak/pretrained/DCP/imagenet"

dcp_names = ['resnet{}_pruned{}'.format(depth, pruning_rate) \
             for depth in [18, 50] for pruning_rate in [0.3, 0.5, 0.7]]

mb_profiles = ['normal', '0.7flops']
mb_pathes = ['mobilenetv2_imagenet_71.814.pth.tar',\
             'mobilenetv2_imagenet_0.7amc_70.854.pth.tar',\
#              'mobilenet_imagenet_0.5flops_70.5.pth.tar', \
#              'mobilenet_imagenet_0.5time_70.2.pth.tar'
            ]
path_to_mbnet = "/gpfs/gpfs0/y.gusak/pretrained/amc-compressed-models/checkpoints/torch"

initialmodels_path = "/gpfs/gpfs0/y.gusak/pretrained/torchvision/models_torch_1-0-1"
baseline_names = ['resnet18_imagenet', 'resnet50_imagenet']

In [9]:
dcp_flops, dcp_params = get_dcp_flops_params(initialdcpmodels_path) 
mb_flops, mb_params = get_mb_flops_params(mb_profiles, mb_pathes, path_to_mbnet)
baseline_flops, baseline_params = get_baseline_flops_params(baseline_names, initialmodels_path)

In [10]:
for k in dcp_flops.keys():
    if '18' in k:
        print(k, baseline_flops['resnet18_imagenet']/dcp_flops[k],
              baseline_params['resnet18_imagenet']/dcp_params[k])
    elif '50' in k:
        print(k, baseline_flops['resnet50_imagenet']/dcp_flops[k],
              baseline_params['resnet50_imagenet']/dcp_params[k])

resnet18_pruned0.3 1.3774212305389981 1.3898005071497461
resnet18_pruned0.5 1.854434829531902 1.8869707383028758
resnet18_pruned0.7 2.78150178438904 2.915578870074985
resnet50_pruned0.3 1.5504817817680177 1.5014889144231705
resnet50_pruned0.5 2.230428380489107 2.06406983633482
resnet50_pruned0.7 3.410479885382879 2.932876381888326


In [11]:
dcp_flops

defaultdict(None,
            {'resnet18_pruned0.3': 2644232198,
             'resnet18_pruned0.5': 1964060160,
             'resnet18_pruned0.7': 1309444268,
             'resnet50_pruned0.3': 5310827889,
             'resnet50_pruned0.5': 3691820800,
             'resnet50_pruned0.7': 2414423238})

In [12]:
for k in mb_flops.keys():
    print(k)
    
    for name in baseline_names:
        print('{}/{}'.format(name, k), baseline_flops[name]/mb_flops[k], baseline_params[name]/mb_params[k])
        
    for name in dcp_names:
        print('{}/{}'.format(name, k), dcp_flops[name]/mb_flops[k], dcp_params[name]/mb_params[k])

0.7flops
resnet18_imagenet/0.7flops 8.253542389808542 5.0163723376498535
resnet50_imagenet/0.7flops 18.659625329192522 10.967402946952115
resnet18_pruned0.3/0.7flops 5.992024957085097 3.6094189862814297
resnet18_pruned0.5/0.7flops 4.450705011775425 2.65842614081103
resnet18_pruned0.7/0.7flops 2.967297175982737 1.7205407780722595
resnet50_pruned0.3/0.7flops 12.034727236791465 7.304351595006935
resnet50_pruned0.5/0.7flops 8.365937903417764 5.3134844344351215
resnet50_pruned0.7/0.7flops 5.471260924061333 3.7394698987929305
normal
resnet18_imagenet/normal 5.797294745083786 3.335217948044893
resnet50_imagenet/normal 13.106535685784424 7.291858875302721
resnet18_pruned0.3/normal 4.208803099989426 2.399781789463353
resnet18_pruned0.5/normal 3.1261787433895125 1.7674984992319263
resnet18_pruned0.7/normal 2.084231898617016 1.1439299352444254
resnet50_pruned0.3/normal 8.45320199173125 4.8564187222814414
resnet50_pruned0.5/normal 5.876241443318754 3.5327578296725246
resnet50_pruned0.7/normal 3.84

## Evaluate

In [16]:
# DATA_ROOT = "/workspace/raid/data/datasets"
DATA_ROOT = "/gpfs/gpfs0/e.ponomarev/"
dataset_name = 'imagenet'

bs = 128
num_workers = 16

loaders = OrderedDict()
loaders["valid"] = dataloaders.get_loader(batch_size=bs,
                                    data_name = 'imagenet',
                                    data_root = DATA_ROOT,
                                    num_workers = num_workers, 
                                    pin_memory = True)['val']
# loaders["train"] = dataloaders.get_loader(batch_size=bs,
#                                 data_name = 'imagenet',
#                                 data_root = DATA_ROOT,
#                                 num_workers = num_workers, 
#                                 pin_memory = True)['train']

Building imagenet data loader with 16 workers


In [17]:
mpath = "/gpfs/gpfs0/y.gusak/musco_models/resnet18_imagenet/grschedule1/tucker2_vbmf_wf0.8/iter_1-0/beforeft.pth"
spath = "/gpfs/gpfs0/y.gusak/musco_models/resnet18_imagenet/grschedule1/tucker2_vbmf_wf0.8/iter_1-0/best.pth.tar"

mym = torch.load(mpath)

In [18]:
mym.load_state_dict(torch.load(spath, map_location = 'cpu')['state_dict'])

In [19]:
import train_validate

In [22]:
train_validate.validate(loaders['valid'], mym, device = 'cuda')

Test: [1/391]	ElapsedTime 0.097490 (0.097490)	Loss 0.6316 (0.6316)	Prec@1 85.937500 (85.937500)	Prec@5 94.531250 (94.531250)

Test: [10/391]	ElapsedTime 0.004148 (0.134069)	Loss 0.4596 (0.6559)	Prec@1 89.062500 (83.906250)	Prec@5 96.875000 (94.921875)

Test: [20/391]	ElapsedTime 0.004322 (0.129873)	Loss 1.2478 (0.9756)	Prec@1 68.750000 (75.781250)	Prec@5 89.062500 (92.031250)

Test: [30/391]	ElapsedTime 0.015177 (0.126860)	Loss 0.8981 (1.1152)	Prec@1 74.218750 (71.718750)	Prec@5 96.093750 (90.755211)

Test: [40/391]	ElapsedTime 0.360417 (0.118999)	Loss 0.7321 (1.0302)	Prec@1 79.687500 (74.296875)	Prec@5 94.531250 (91.464844)

Test: [50/391]	ElapsedTime 0.004002 (0.111278)	Loss 1.2641 (1.0533)	Prec@1 72.656250 (74.000000)	Prec@5 85.937500 (90.968750)

Test: [60/391]	ElapsedTime 0.033031 (0.111871)	Loss 1.1231 (0.9838)	Prec@1 67.968750 (75.755211)	Prec@5 92.968750 (91.679695)

Test: [70/391]	ElapsedTime 0.004336 (0.100378)	Loss 0.6969 (1.0178)	Prec@1 79.687500 (74.486610)	Prec@5 94.53125

(tensor(66.8380, device='cuda:0'),
 tensor(87.5400, device='cuda:0'),
 1.3564993466186523)

In [12]:
import  torch.nn.functional as F

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for i, (data, target) in enumerate(test_loader):
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target).item() # sum up batch loss
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
            
            if i%20 == 0:
                print(i)
            
    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
    del data
    torch.cuda.empty_cache()
    
    
def test_dcp_models(initialdcpmodels_path, device = 'cpu'):
    
    for depth in [18, 50]:
        for pruning_rate in [0.3, 0.5, 0.7]:
            model_name = 'resnet{}_pruned{}'.format(depth, pruning_rate)
            print("{}".format(model_name))
            
            model = load_dcpmodel(initialdcpmodels_path, depth, pruning_rate)
            
            test(model.to(device), device, loaders['valid'])
            
            del model
            torch.cuda.empty_cache()

In [13]:
test_dcp_models(initialdcpmodels_path, 'cuda')

resnet18_pruned0.3
0
20
40
60
80
100
120
140
160
180
200
220
240
260
280
300
320
340
360
380

Test set: Average loss: -0.1136, Accuracy: 34591/50000 (69%)

resnet18_pruned0.5
0
20
40
60
80
100
120
140
160
180
200
220
240
260
280
300
320
340
360
380

Test set: Average loss: -0.1116, Accuracy: 33660/50000 (67%)

resnet18_pruned0.7
0
20
40
60
80
100
120
140
160
180
200
220
240
260
280
300
320
340
360
380

Test set: Average loss: -0.1086, Accuracy: 32045/50000 (64%)

resnet50_pruned0.3
0
20
40
60


KeyboardInterrupt: 

In [None]:
!nvidia-smi

NameError: name 'torch' is not defined