# Exemplo de "Bag of Visual Words"

Vocês estão recebendo este código do professor e devem dar o crédito devido, para que não se caracterize a situação de tentar passar esforço dos outros como sendo seu (a.k.a. plágio). Divirtam-se!

In [36]:
!pip install opencv-contrib-python
import cv2
import os
import os.path
import numpy as np

DATA_DIR = 'data'
CATEGORY_LIST = ['Charmander', 'Bulbasaur']
NUM_IMAGES_TRAIN_PER_CATEGORY = 50
NUM_IMAGES_TEST_PER_CATEGORY = 10
NUM_CLUSTERS = 200

def get_images_from_category(category, num_train, num_test, data_dir):
    category_dir = os.path.join(DATA_DIR, category)
    num_total = num_train + num_test
    filenames_train = []
    filenames_test = []
    for k, filename in enumerate(os.listdir(category_dir)):
        if k < num_train:
            filenames_train.append(os.path.join(category_dir, filename))
        elif k < num_total:
            filenames_test.append(os.path.join(category_dir, filename))
        else:
            break
    return filenames_train, filenames_test

def get_images_from_category_list(category_list, num_train, num_test, data_dir):
    filenames_train_all = []
    filenames_test_all = []
    for category in category_list:
        filenames_train, filenames_test = get_images_from_category(category, num_train, num_test, data_dir)
        filenames_train_all.extend(filenames_train)
        filenames_test_all.extend(filenames_test)
    return filenames_train_all, filenames_test_all

def cria_vocabulario(imagens, num_clusters):
    km = cv2.BOWKMeansTrainer(num_clusters)
    akaze = cv2.KAZE_create()
    for p in imagens:
        img = cv2.imread(p, cv2.IMREAD_GRAYSCALE)
        mask = np.ones(img.shape)
        kp, desc = akaze.detectAndCompute(img, mask)
        km.add(desc)
    return km.cluster()

def representa(vocab, img):
    kaze = cv2.KAZE_create()
    kp = kaze.detect(img)
    bowdesc = cv2.BOWImgDescriptorExtractor(kaze, cv2.FlannBasedMatcher())
    bowdesc.setVocabulary(vocab)
    return bowdesc.compute(img, kp)

def transforma_imagens(imagens, vocab):
    X = []
    for p in imagens:
        img = cv2.imread(p, cv2.IMREAD_GRAYSCALE)
        X.append(representa(vocab, img).flatten())
    return np.array(X)



In [37]:
imagens_train, imagens_test = get_images_from_category_list(CATEGORY_LIST, 
                                                            NUM_IMAGES_TRAIN_PER_CATEGORY, 
                                                            NUM_IMAGES_TEST_PER_CATEGORY, 
                                                            DATA_DIR)
vocab = cria_vocabulario(imagens_train, NUM_CLUSTERS)
X_train = transforma_imagens(imagens_train, vocab)
X_test = transforma_imagens(imagens_test, vocab)
y_train = np.hstack([np.ones(NUM_IMAGES_TRAIN_PER_CATEGORY), -np.ones(NUM_IMAGES_TRAIN_PER_CATEGORY)])
y_test = np.hstack([np.ones(NUM_IMAGES_TEST_PER_CATEGORY), -np.ones(NUM_IMAGES_TEST_PER_CATEGORY)])

In [38]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(100, 200)
(20, 200)
(100,)
(20,)


In [39]:
im = cv2.imread(os.path.join(DATA_DIR, 'Abra', '0282b2f3a22745f1a436054ea15a0ae5.jpg'))
cv2.imshow('Exemplo de imagem', im)
print('Pressione qualquer tecla para continuar.')
cv2.waitKey(0)
cv2.destroyAllWindows()

a = representa(vocab, im)
print(a)

Pressione qualquer tecla para continuar.
[[0.         0.         0.00898204 0.         0.00299401 0.
  0.01197605 0.         0.01197605 0.         0.00299401 0.01497006
  0.         0.         0.00299401 0.00898204 0.00598802 0.
  0.02095808 0.00598802 0.00598802 0.         0.         0.00598802
  0.00598802 0.01796407 0.         0.00898204 0.         0.
  0.00598802 0.         0.00299401 0.00299401 0.         0.
  0.00598802 0.         0.01796407 0.00299401 0.         0.
  0.00898204 0.         0.         0.         0.00299401 0.00299401
  0.01497006 0.         0.         0.00898204 0.00299401 0.
  0.         0.         0.         0.         0.00299401 0.
  0.01197605 0.         0.00898204 0.         0.         0.
  0.00598802 0.00299401 0.         0.         0.         0.00299401
  0.00598802 0.         0.         0.         0.00299401 0.03592815
  0.00299401 0.00598802 0.         0.02095808 0.         0.
  0.         0.         0.         0.         0.00898204 0.
  0.         0.    

In [40]:
from sklearn.ensemble import RandomForestClassifier
# Set random seed
np.random.seed(0)

# Create a random forest Classifier. By convention, clf means 'Classifier'
clf = RandomForestClassifier(n_jobs=2, random_state=0)

# Train the Classifier to take the training features and learn how they relate
# to the training y (the species)
clf.fit(X_train, y_train)
clf.score(X_test, y_test)



0.95

## Bibliografia:
- Modelo Bag of Visual Words produzido por Fábio Ayres.
- Dataset: [Pokémon Gen One](https://www.kaggle.com/thedagger/pokemon-generation-one/data) da plataforma Kaggle.com
