In [1]:
# Copyright 2021 NVIDIA Corporation. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

<img src="http://developer.download.nvidia.com/compute/machine-learning/frameworks/nvidia_logo.png" style="width: 90px; float: right;">

# Movie Poster Feature Extraction with ResNet

In this notebook, we will use a pretrained ResNet-50 network to extract image features from the movie poster images. 

Note: this notebook should be executed from within the `nvidia_resnet50` container, built as follows
```
git clone https://github.com/NVIDIA/DeepLearningExamples
cd DeepLearningExamples/PyTorch/Classification/ConvNets
docker build . -t nvidia_resnet50
```

Start the container, mounting the current directory:

```
nvidia-docker run --rm --net=host -it -v $PWD:/workspace --ipc=host nvidia_resnet50
```

Then from within the container:

```
cd /workspace
jupyter-lab --allow-root --ip='0.0.0.0'

```

## Download a pretrained ResNet-50 from NVIDIA GPU cloud

In [1]:
from PIL import Image
import argparse
import numpy as np
import json
import torch
from torch.cuda.amp import autocast
import torch.backends.cudnn as cudnn

from image_classification import models
import torchvision.transforms as transforms

In [2]:
from image_classification.models import (
    resnet50,
    resnext101_32x4d,
    se_resnext101_32x4d,
    efficientnet_b0,
    efficientnet_b4,
    efficientnet_widese_b0,
    efficientnet_widese_b4,
    efficientnet_quant_b0,
    efficientnet_quant_b4,
)

In [3]:
def available_models():
    models = {
        m.name: m
        for m in [
            resnet50,
            resnext101_32x4d,
            se_resnext101_32x4d,
            efficientnet_b0,
            efficientnet_b4,
            efficientnet_widese_b0,
            efficientnet_widese_b4,
            efficientnet_quant_b0,
            efficientnet_quant_b4,
        ]
    }
    return models

In [4]:
def load_jpeg_from_file(path, image_size, cuda=True):
    img_transforms = transforms.Compose(
        [
            transforms.Resize(image_size + 32),
            transforms.CenterCrop(image_size),
            transforms.ToTensor(),
        ]
    )

    img = img_transforms(Image.open(path))
    with torch.no_grad():
        # mean and std are not multiplied by 255 as they are in training script
        # torch dataloader reads data into bytes whereas loading directly
        # through PIL creates a tensor with floats in [0,1] range
        mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)
        std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)

        if cuda:
            mean = mean.cuda()
            std = std.cuda()
            img = img.cuda()
        img = img.float()

        if img.shape[0] == 1: #mono image
            #pad channels
            img = img.repeat([3, 1, 1])
        input = img.unsqueeze(0).sub_(mean).div_(std)

    return input

In [5]:
def check_quant_weight_correctness(checkpoint_path, model):
    state_dict = torch.load(checkpoint_path, map_location=torch.device('cpu'))
    state_dict = {k[len("module."):] if k.startswith("module.") else k: v for k, v in state_dict.items()}
    quantizers_sd_keys = {f'{n[0]}._amax' for n in model.named_modules() if 'quantizer' in n[0]}
    sd_all_keys = quantizers_sd_keys | set(model.state_dict().keys())
    assert set(state_dict.keys()) == sd_all_keys, (f'Passed quantized architecture, but following keys are missing in '
                                                   f'checkpoint: {list(sd_all_keys - set(state_dict.keys()))}')

In [6]:
!pip install ipywidgets

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [7]:
imgnet_classes = np.array(json.load(open("./LOC_synset_mapping.json", "r")))

model_args = {}
model_args["pretrained_from_file"] = './nvidia_resnet50_200821.pth.tar'
model = available_models()['resnet50'](model_args)

model = model.cuda()
model.eval()

Downloading: "https://api.ngc.nvidia.com/v2/models/nvidia/resnet50_pyt_amp/versions/20.06.0/files/nvidia_resnet50_200821.pth.tar" to /root/.cache/torch/hub/checkpoints/nvidia_resnet50_200821.pth.tar


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=102491118.0), HTML(value='')))




ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

## Extract features for all movies

Next, we will extract feature for all movie posters, using the last layer just before the classification head, containing 2048 float values.

In [12]:
import glob

filelist = glob.glob('./poster_small/*.jpg')
len(filelist)

61951

In [13]:
filelist[:10]

['./poster_small/0105812.jpg',
 './poster_small/0103893.jpg',
 './poster_small/0104684.jpg',
 './poster_small/0103905.jpg',
 './poster_small/0104815.jpg',
 './poster_small/0105477.jpg',
 './poster_small/0104291.jpg',
 './poster_small/0104040.jpg',
 './poster_small/0105046.jpg',
 './poster_small/0103759.jpg']

In [14]:
from tqdm import tqdm

batchsize = 64
num_bathces = len(filelist)//batchsize
batches = np.array_split(filelist, num_bathces)


In [17]:
### strip the last layer
feature_extractor = torch.nn.Sequential(*list(model.children())[:-1])

feature_dict = {}
error = 0
for batch in tqdm(batches):
    inputs = []
    imgs = []
    for i, f in enumerate(batch):
        try:
            img = load_jpeg_from_file(f, 224, cuda=True)
            imgs.append(f.split('/')[-1].split('.')[0])
            inputs.append(img.squeeze())
        except Exception as e:
            print(e)
            error +=1
    features = feature_extractor(torch.stack(inputs, dim=0)).cpu().detach().numpy().squeeze()  
    for i, f in enumerate(imgs):
        feature_dict[f] =features[i,:]

print('Unable to extract features for %d images'%error)

  9%|▉         | 87/967 [00:18<03:04,  4.76it/s]

cannot identify image file './poster_small/0112453.jpg'


 31%|███       | 300/967 [00:52<01:42,  6.48it/s]

cannot identify image file './poster_small/0059758.jpg'


 37%|███▋      | 353/967 [01:09<07:15,  1.41it/s]

cannot identify image file './poster_small/0202415.jpg'


 91%|█████████▏| 884/967 [06:01<00:57,  1.45it/s]

cannot identify image file './poster_small/0303151.jpg'


100%|██████████| 967/967 [06:44<00:00,  2.39it/s]

Erro: 4





In [19]:
import pickle
with open('movies_poster_features.pkl', 'wb') as f:
    pickle.dump({"feature_dict": feature_dict}, f, protocol=pickle.HIGHEST_PROTOCOL)

In [20]:
len(feature_dict)

61947