In [1]:
from torchvision import  transforms
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import glob
import numpy as np
from PIL import Image

In [2]:
# self.classifier = nn.Sequential(
#     nn.Dropout(),
#     nn.Linear(256 * 6 * 6, 4096),
#     nn.ReLU(inplace=True),
#     nn.Dropout(),
#     nn.Linear(4096, 4096),
#     nn.ReLU(inplace=True),
#     nn.Linear(4096, num_classes),
# )
class AlexNet(nn.Module):

    def __init__(self, output_layer='fc6'):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        if output_layer == 'fc6':
            self.classifier = nn.Sequential(
                nn.Dropout(),
                nn.Linear(256 * 6 * 6, 4096),
                nn.ReLU(inplace=True),
            )
        else:
            assert output_layer == 'fc7'
            self.classifier = nn.Sequential(
                nn.Dropout(),
                nn.Linear(256 * 6 * 6, 4096),
                nn.ReLU(inplace=True),
                nn.Dropout(),
                nn.Linear(4096, 4096),
                nn.ReLU(inplace=True),
            )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.classifier(x)
        return x

def alexnet(pretrained=False, **kwargs):
    r"""AlexNet model architecture from the
    `"One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = AlexNet(**kwargs)
    if pretrained:
        state = model.state_dict()
        pretrained_weights = model_zoo.load_url('https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth')
        assert all(k in pretrained_weights for k in state)
        for k,v in pretrained_weights.items():
            if k in state:
                state[k] = v
        model.load_state_dict(state)
    return model

In [3]:
alexnet_fc6 = alexnet(pretrained=True, output_layer='fc6')

In [4]:
alexnet_fc6.eval()

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace)
  )
)

In [5]:
alexnet_fc7 = alexnet(pretrained=True, output_layer='fc7')

In [6]:
alexnet_fc7.eval()

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace)
    (3): Dropout(p=0.5)
    (4): Linear(in_features=4096,

In [7]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.autograd import Variable

In [8]:
class RemoveAlphaChannel(object):
    def __call__(self, image_tensor):        
        if image_tensor.shape[0] == 4:
            image_tensor = image_tensor[:3]
        assert image_tensor.shape[0] == 3
        return image_tensor

In [9]:
class ImageDataset(Dataset):
    def __init__(self):
        self.image_files = glob.glob("/mnt/workspace/Ugallery/images/*.jpg")        
        self.artwork_ids = [int(f[f.rfind('/')+1:-4]) for f in self.image_files]
        self.transform = transforms.Compose([
#             transforms.Scale(256),
#             transforms.CenterCrop(224),
            transforms.Scale([224,224]),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
            ),
            RemoveAlphaChannel()
        ])

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        return {'id': self.artwork_ids[idx],
                'image': self.transform(Image.open(self.image_files[idx]))}

In [10]:
image_dataset = ImageDataset()

In [11]:
dataloader = DataLoader(image_dataset, batch_size=1024, num_workers=4)

In [12]:
N = len(image_dataset)
output_ids = np.empty((N,), dtype=int)
output_fc6 = np.empty((N,4096), dtype=float)
output_fc7 = np.empty((N,4096), dtype=float)

In [13]:
from time import time

In [14]:
offset = 0
start_time = time()
for i_batch, batch in enumerate(dataloader):    
    assert offset < N
    
    batch_ids = batch['id'].numpy()
    batch_var = Variable(batch['image'])
    batch_size = len(batch_ids)
    
    batch_fc6 = alexnet_fc6(batch_var).data.numpy()
    batch_fc7 = alexnet_fc7(batch_var).data.numpy()
    
    output_ids[offset:offset+batch_size] = batch_ids
    output_fc6[offset:offset+batch_size] = batch_fc6
    output_fc7[offset:offset+batch_size] = batch_fc7
    
    elapsed_time = time() - start_time    
    offset += batch_size
    
    print('i_batch = %d, offset = %d, batch_size = %d, elapsed_time = %.2f' % (i_batch, offset, batch_size, elapsed_time))

i_batch = 0, offset = 1024, batch_size = 1024, elapsed_time = 39.50
i_batch = 1, offset = 2048, batch_size = 1024, elapsed_time = 68.87
i_batch = 2, offset = 3072, batch_size = 1024, elapsed_time = 98.30
i_batch = 3, offset = 4096, batch_size = 1024, elapsed_time = 128.13
i_batch = 4, offset = 5120, batch_size = 1024, elapsed_time = 157.83
i_batch = 5, offset = 6144, batch_size = 1024, elapsed_time = 186.23
i_batch = 6, offset = 7168, batch_size = 1024, elapsed_time = 214.57
i_batch = 7, offset = 8192, batch_size = 1024, elapsed_time = 242.88
i_batch = 8, offset = 9216, batch_size = 1024, elapsed_time = 271.32
i_batch = 9, offset = 10240, batch_size = 1024, elapsed_time = 299.65
i_batch = 10, offset = 11264, batch_size = 1024, elapsed_time = 327.93
i_batch = 11, offset = 12288, batch_size = 1024, elapsed_time = 356.21
i_batch = 12, offset = 13297, batch_size = 1009, elapsed_time = 384.07


In [15]:
output_fc6.mean(), output_fc7.mean()

(0.8294545454770808, 0.2906904877248657)

In [16]:
import os
dir_path = "/mnt/workspace/Ugallery/AlexNet/"
os.makedirs(dir_path, exist_ok=True)
output_fc6.dump(dir_path + "fc6.npy")
output_fc7.dump(dir_path + "fc7.npy")
output_ids.dump(dir_path + "ids.npy")