# Exemplo de "Bag of Visual Words"

Vocês estão recebendo este código do professor e devem dar o crédito devido, para que não se caracterize a situação de tentar passar esforço dos outros como sendo seu (a.k.a. plágio). Divirtam-se!

In [1]:
import cv2
import os
import os.path
import numpy as np

DATA_DIR = 'data//pkmn'
CATEGORY_LIST = ['dalmatian', 'Faces_easy']
NUM_IMAGES_TRAIN_PER_CATEGORY = 30
NUM_IMAGES_TEST_PER_CATEGORY = 15
NUM_CLUSTERS = 300

def get_images_from_category(category, num_train, num_test, data_dir):
    category_dir = os.path.join(DATA_DIR, category)
    num_total = num_train + num_test
    filenames_train = []
    filenames_test = []
    for k, filename in enumerate(os.listdir(category_dir)):
        if k < num_train:
            filenames_train.append(os.path.join(category_dir, filename))
        elif k < num_total:
            filenames_test.append(os.path.join(category_dir, filename))
        else:
            break
    return filenames_train, filenames_test

def get_images_from_category_list(category_list, num_train, num_test, data_dir):
    filenames_train_all = []
    filenames_test_all = []
    for category in category_list:
        filenames_train, filenames_test = get_images_from_category(category, num_train, num_test, data_dir)
        filenames_train_all.extend(filenames_train)
        filenames_test_all.extend(filenames_test)
    return filenames_train_all, filenames_test_all

def cria_vocabulario(imagens, num_clusters):
    km = cv2.BOWKMeansTrainer(num_clusters)
    akaze = cv2.KAZE_create()
    for p in imagens:
        img = cv2.imread(p, cv2.IMREAD_GRAYSCALE)
        mask = np.ones(img.shape)
        kp, desc = akaze.detectAndCompute(img, mask)
        km.add(desc)
    return km.cluster()

def representa(vocab, img):
    kaze = cv2.KAZE_create()
    kp = kaze.detect(img)
    bowdesc = cv2.BOWImgDescriptorExtractor(kaze, cv2.FlannBasedMatcher())
    bowdesc.setVocabulary(vocab)
    return bowdesc.compute(img, kp)

def transforma_imagens(imagens, vocab):
    X = []
    for p in imagens:
        img = cv2.imread(p, cv2.IMREAD_GRAYSCALE)
        X.append(representa(vocab, img).flatten())
    return np.array(X)

In [2]:
imagens_train, imagens_test = get_images_from_category_list(CATEGORY_LIST, 
                                                            NUM_IMAGES_TRAIN_PER_CATEGORY, 
                                                            NUM_IMAGES_TEST_PER_CATEGORY, 
                                                            DATA_DIR)
vocab = cria_vocabulario(imagens_train, NUM_CLUSTERS)
X_train = transforma_imagens(imagens_train, vocab)
X_test = transforma_imagens(imagens_test, vocab)
y_train = np.hstack([np.ones(NUM_IMAGES_TRAIN_PER_CATEGORY), -np.ones(NUM_IMAGES_TRAIN_PER_CATEGORY)])
y_test = np.hstack([np.ones(NUM_IMAGES_TEST_PER_CATEGORY), -np.ones(NUM_IMAGES_TEST_PER_CATEGORY)])

In [3]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(60, 300)
(30, 300)
(60,)
(30,)


In [4]:
im = cv2.imread(os.path.join(DATA_DIR, 'dalmatian', 'image_0021.jpg'))
cv2.imshow('Exemplo de imagem', im)
print('Pressione qualquer tecla para continuar.')
cv2.waitKey(0)
cv2.destroyAllWindows()

a = representa(vocab, im)
print(a)

Pressione qualquer tecla para continuar.
[[0.00364299 0.00910747 0.00364299 0.01092896 0.01092896 0.
  0.         0.         0.00728597 0.         0.00364299 0.
  0.00364299 0.         0.00546448 0.00546448 0.00546448 0.00364299
  0.00182149 0.00182149 0.         0.         0.00364299 0.00364299
  0.00182149 0.00546448 0.00182149 0.00364299 0.         0.
  0.00364299 0.00182149 0.         0.00910747 0.         0.00364299
  0.00182149 0.00546448 0.         0.00182149 0.01275046 0.00182149
  0.         0.         0.         0.         0.00728597 0.00182149
  0.00546448 0.00364299 0.         0.00728597 0.         0.00182149
  0.00728597 0.00546448 0.00546448 0.01639344 0.         0.
  0.00728597 0.         0.02003643 0.         0.         0.00728597
  0.00182149 0.         0.00546448 0.00364299 0.00182149 0.
  0.00182149 0.00182149 0.00364299 0.         0.00728597 0.
  0.         0.         0.00182149 0.00364299 0.00364299 0.00182149
  0.00364299 0.00182149 0.         0.00910747 0.       

## Bibliografia:
- Modelo Bag of Visual Words produzido por Fábio Ayres.
- Dataset: [Pokémon Gen One](https://www.kaggle.com/thedagger/pokemon-generation-one/data) da plataforma Kaggle.com
