In [1]:
import torch.nn as nn
import torch

import utils
from vgg import Vgg16

import argparse
import os
import sys
import time
import re
''
import numpy as np
import torch
from torch.optim import Adam
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
import torch.onnx

from MobileStyleNet import MobileStyleNet
from transformer_net import TransformerNet
import torch.autograd.profiler as profiler

In [2]:
device = "cpu"
model = MobileStyleNet().to(device)
img = torch.rand((1, 3, 256, 256)).to(device)
model(img)

tensor([[[[-0.3234, -0.8915, -0.3468,  ..., -0.5997,  0.0661, -0.4455],
          [-0.4115, -0.7342, -0.5517,  ..., -0.6160, -0.4807, -0.7328],
          [-0.4218, -0.6166, -0.4249,  ..., -0.4423, -0.1550, -1.0208],
          ...,
          [-0.6320, -0.6064, -0.6522,  ..., -0.4617, -0.2905, -0.0110],
          [-0.2738, -0.6949, -0.3121,  ..., -0.0756, -0.1705,  0.5775],
          [-0.7915, -0.8221, -0.6593,  ..., -0.0921, -0.5124,  0.1107]],

         [[ 0.7309, -0.0973, -0.1697,  ...,  0.4622,  0.4867,  0.0511],
          [ 0.0990,  0.3719,  0.4291,  ...,  0.5338,  0.6029,  0.0825],
          [ 0.0463, -0.0603,  0.5326,  ...,  0.8698,  0.5337,  0.7298],
          ...,
          [ 0.3435,  0.1452,  0.6654,  ...,  0.9428,  0.5697,  0.2085],
          [ 0.6817,  0.0702,  0.0489,  ..., -0.1669,  0.3009,  0.0747],
          [ 0.5535,  0.2660, -0.0136,  ...,  0.1331,  0.0502,  0.3991]],

         [[-0.2663,  0.2883, -0.2050,  ..., -0.0705,  0.3005,  0.4059],
          [-0.4334,  0.4653, -

In [4]:
%%timeit
model(img)

96.6 ms ± 4.02 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


# Style Transfer

In [7]:
device = "cuda"
model_name = "model_mobile_5e"
content_image = utils.load_image("/home/kevin/Pictures/neuseeland.jpg", scale=2)

content_transform = transforms.Compose([
    transforms.Resize((512, 1024)),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.mul(255))
])
content_image = content_transform(content_image).unsqueeze(0).to(device)

with torch.no_grad():
    model = MobileStyleNet()
    state_dict = torch.load(model_name)
    # remove saved deprecated running_* keys in InstanceNorm from the checkpoint
    for k in list(state_dict.keys()):
        if re.search(r'in\d+\.running_(mean|var)$', k):
            del state_dict[k]
    model.load_state_dict(state_dict)
    model.to(device)
    output = model(content_image).cpu()
    
utils.save_image("test4.jpg", output[0])

# Performance Measurement

In [2]:
device = "cpu"
model = TransformerNet().to(device)
img = torch.rand((1, 3, 256, 256)).to(device)

In [3]:
%%timeit
model(img)

182 ms ± 24.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Mobilenet

In [4]:
mobilenet = MobileStyleNet().to(device)

In [5]:
%%timeit
mobilenet(img)

102 ms ± 4.73 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [5]:
with profiler.profile(record_shapes=True) as prof:
    with profiler.record_function("model_inference"):
        model(img)
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                            Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                 model_inference         1.62%       3.244ms        99.98%     200.688ms     200.688ms             1  
                    aten::conv2d         0.05%     101.101us        63.96%     128.381ms       8.024ms            16  
               aten::convolution         0.05%     108.442us        63.91%     128.279ms       8.017ms            16  
              aten::_convolution         0.14%     290.809us        63.85%     128.171ms       8.011ms            16  
        aten::mkldnn_convolution        63.61%     127.685ms        63.70%     127.855ms       7.991ms            16  
          aten::reflection_pad2d        21.61%  

In [6]:
with profiler.profile(record_shapes=True) as prof:
    with profiler.record_function("model_inference"):
        mobilenet(img)
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                            Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg    # of Calls  
--------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  
                 model_inference         1.68%       2.628ms        99.98%     156.315ms     156.315ms             1  
                    aten::conv2d         0.05%      84.772us        56.31%      88.044ms       5.870ms            15  
               aten::convolution         0.06%      87.785us        56.26%      87.959ms       5.864ms            15  
              aten::_convolution         0.15%     233.064us        56.20%      87.871ms       5.858ms            15  
        aten::mkldnn_convolution        55.95%      87.471ms        56.04%      87.619ms       5.841ms            15  
          aten::reflection_pad2d        14.87%  