# Vectorize Images

In [1]:
import os
import glob
import csv
import pickle

import numpy as np
import pandas as pd
from PIL import Image

import torch
from torchvision.models import resnet34
from torchvision.transforms import functional as F

#### Parameters

In [2]:
name = 'all_crops'  # {all_crops, all_crops_128x128}
path = os.path.join(os.environ['DATASETS'], 'logos', 'logos')
logos_path = os.path.join(path, name)
logos_filenames = sorted(glob.glob(logos_path + '/*.jpg'))

#### Load and hook model

In [3]:
def hook_activation(model, name):
    model.avgpool.register_forward_hook(save_activation(name))

def save_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

In [4]:
activation = {}
model = resnet34(pretrained=True).to('cuda:3')

hook_activation(model, 'avgpool')

In [5]:
def img_to_tensor(img):
    return F.to_tensor(img).unsqueeze(0).to('cuda:3', dtype=torch.float)

def get_activation(model, name):
    return activation[name].squeeze().detach().cpu().numpy()

In [6]:
x = np.random.randint(0, 255, (640, 480, 3))
model(img_to_tensor(x))
activation = get_activation(model, 'avgpool')

print(np.shape(activation.squeeze()))
activation[:3]

(512,)


array([0.9466233, 0.8789756, 0.9460537], dtype=float32)

#### Vectorize images

In [7]:
vectors = np.zeros((len(logos_filenames), 512))
for idx, logo in enumerate(logos_filenames):
    
    # Load image:
    img = Image.open(logo).convert('RGB').resize((128, 128))
    img = F.to_tensor(img).unsqueeze(0).to('cuda:3', dtype=torch.float)
    
    # Vectorize image:
    model(img_to_tensor(img))
    vectors[idx, :] = get_activation(model, 'avgpool')




In [8]:
def clean_name(name):
    '''
    >> ' '.join(sorted(set(''.join(list(set(brands))))))
    >> "& ' + - 1 2 3 4 ? a b c d e f g h i j kl m n
        o p q r s t u v w x y z \udcbc \udcc3 \udcfc"
    '''
    filename = name.split('/')[-1]
    brand = filename.split('_')[0]
    return brand.encode('ascii', 'replace').decode()

#### Save data

In [9]:
vectors_list = [v for v in vectors]
brands = [clean_name(p) for p in logos_filenames]
logos_df = pd.DataFrame({'brand': brands, 'img_vec': vectors_list})

logos_df.to_pickle(logos_path + '.zip')


