# Vectorize Images

In [1]:
import os
import glob
import csv
import pickle

import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm

import torch
from torch.nn import functional as nn_F
from torchvision.models import resnet34
from torchvision.transforms import functional as tran_F

#### Parameters

In [2]:
logos_path = os.path.join(os.environ['DATASETS'], 'logos', 'logos', 'all_crops')
logos_128_path = os.path.join(os.environ['DATASETS'], 'logos', 'logos', 'all_crops_128x128')
exemplars_path = os.path.join(os.environ['DATASETS'], 'logos', 'exemplars')
exemplars_128_path = os.path.join(os.environ['DATASETS'], 'logos', 'exemplars_128x128')

activation_layer = 'avgpool'
device = 'cuda:1'
model_out = 512

#### Load and hook model

In [3]:
def hook_activation(model, activation_dic, activation_layer):
    model.avgpool.register_forward_hook(save_activation(activation_dic, activation_layer))

def save_activation(activation_dic, activation_layer):
    def hook(model, input, output):
        activation_dic[activation_layer] = output.detach()
    return hook

In [4]:
activation_dic = {}
model = resnet34(pretrained=True).eval().to(device)
hook_activation(model, activation_dic, activation_layer)

In [5]:
def img_to_tensor(image):
    return tran_F.to_tensor(image).unsqueeze(0).to(device, dtype=torch.float)

def get_activation(activation_dic, activation_layer):
    act_vec = nn_F.normalize(activation_dic[activation_layer], p=2, dim=1)
    return act_vec.squeeze().detach().cpu().numpy()

In [6]:
x = np.random.randint(0, 255, (64, 48, 3))
model(img_to_tensor(x))
activation = get_activation(activation_dic, activation_layer)

print(np.shape(activation.squeeze()))
activation[:3]

(512,)


array([0.        , 0.        , 0.00054843], dtype=float32)

#### Vectorize images

In [7]:
def vectorize_images(path, model_out, model, activation_dic, activation_layer, resize=False):
    # Extract vectors:
    filenames = glob.glob(path + '/*.jpg')
    vectors = np.zeros((len(filenames), model_out))
    for idx, img_path in enumerate(tqdm(filenames)):
        
        # Load image:
        if resize:
            image = Image.open(img_path).convert('RGB').resize((128, 128))
        else:
            image = Image.open(img_path).convert('RGB')
            
        # Vectorize image:        
        model(img_to_tensor(image))
        vectors[idx, :] = get_activation(activation_dic, activation_layer)
        
    return vectors, filenames

In [8]:
def clean_name(name):
    '''
    >> ' '.join(sorted(set(''.join(list(set(brands))))))
    >> "& ' + - 1 2 3 4 ? a b c d e f g h i j kl m n
        o p q r s t u v w x y z \udcbc \udcc3 \udcfc"
    '''
    filename = name.split('/')[-1]
    brand = filename.split('_')[0]
    return brand.encode('ascii', 'replace').decode()

In [9]:
def save_df(vectors, filenames, path):
    vectors_list = [v for v in vectors]
    brands = [clean_name(n) for n in filenames]
    logos_df = pd.DataFrame({'brand': brands, 'img_vec': vectors_list})
    # Save data:
    logos_df.to_pickle(path + '.zip')

#### Process logos

In [10]:
logos_vectors, logos_filenames = vectorize_images(
    logos_path, model_out, model, activation_dic, activation_layer)
save_df(logos_vectors, logos_filenames, logos_path)

100%|██████████| 76017/76017 [10:00<00:00, 126.49it/s]


#### Process logos (128x128)

In [11]:
logos_128_vectors, logos_128_filenames = vectorize_images(
    logos_128_path, model_out, model, activation_dic, activation_layer, resize=True)
save_df(logos_128_vectors, logos_128_filenames, logos_128_path)

100%|██████████| 76017/76017 [10:19<00:00, 122.66it/s]


#### Process exemplars

In [12]:
exemplars_vectors, exemplars_filenames = vectorize_images(
    exemplars_path, model_out, model, activation_dic, activation_layer)
save_df(exemplars_vectors, exemplars_filenames, exemplars_path)

100%|██████████| 352/352 [00:04<00:00, 74.69it/s] 


#### Process exemplars (128x128)

In [13]:
exemplars_128_vectors, exemplars_128_filenames = vectorize_images(
    exemplars_128_path, model_out, model, activation_dic, activation_layer, resize=True)
save_df(exemplars_128_vectors, exemplars_128_filenames, exemplars_128_path)

100%|██████████| 352/352 [00:06<00:00, 57.68it/s]
