# Exemplo de "Bag of Visual Words"

Vocês estão recebendo este código do professor e devem dar o crédito devido, para que não se caracterize a situação de tentar passar esforço dos outros como sendo seu (a.k.a. plágio). Divirtam-se!

In [6]:
!pip install opencv-contrib-python
import cv2
import os
import os.path
import numpy as np
import math

DATA_DIR = 'data_resized'
CATEGORY_LIST = ['Pikachu', 'Mewtwo', 'Charmander', 'Bulbasaur', 'Squirtle']
NUM_IMAGES_TRAIN_PER_CATEGORY = 50
NUM_IMAGES_TEST_PER_CATEGORY = 15
NUM_CLUSTERS = 90
YTRAIN = []
YTEST = []

def get_images_from_category(category, num_train, num_test, data_dir):
    category_dir = os.path.join(DATA_DIR, category)
    num_total = num_train + num_test
    filenames_train = []
    filenames_test = []
    global YTRAIN 
    global YTEST 
    
    for k, filename in enumerate(os.listdir(category_dir)):
        if k < num_train:
            filenames_train.append(os.path.join(category_dir, filename))
            YTRAIN.append(category)
            print(os.path.join(category_dir, filename))
        elif k < num_total:
            filenames_test.append(os.path.join(category_dir, filename))
            YTEST.append(category)
        else:
            break
    return filenames_train, filenames_test

def get_images_from_category_list(category_list, num_train, num_test, data_dir):
    filenames_train_all = []
    filenames_test_all = []
    for category in category_list:
        filenames_train, filenames_test = get_images_from_category(category, num_train, num_test, data_dir)
        filenames_train_all.extend(filenames_train)
        filenames_test_all.extend(filenames_test)
    return filenames_train_all, filenames_test_all

def cria_vocabulario(imagens, num_clusters):
    km = cv2.BOWKMeansTrainer(num_clusters)
    akaze = cv2.KAZE_create()
    for p in imagens:
        img = cv2.imread(p, cv2.IMREAD_GRAYSCALE)
        mask = np.ones(img.shape)
        kp, desc = akaze.detectAndCompute(img, mask)
        km.add(desc)
    return km.cluster()

def representa(vocab, img):
    kaze = cv2.KAZE_create()
    kp = kaze.detect(img)
    bowdesc = cv2.BOWImgDescriptorExtractor(kaze, cv2.FlannBasedMatcher())
    bowdesc.setVocabulary(vocab)
    return bowdesc.compute(img, kp)

def transforma_imagens(imagens, vocab):
    X = []
    for p in imagens:
        img = cv2.imread(p, cv2.IMREAD_GRAYSCALE)
        X.append(representa(vocab, img).flatten())
    return np.array(X)



In [7]:
imagens_train, imagens_test = get_images_from_category_list(CATEGORY_LIST, 
                                                            NUM_IMAGES_TRAIN_PER_CATEGORY, 
                                                            NUM_IMAGES_TEST_PER_CATEGORY, 
                                                            DATA_DIR)
vocab = cria_vocabulario(imagens_train, NUM_CLUSTERS)
X_train = transforma_imagens(imagens_train, vocab)
X_test = transforma_imagens(imagens_test, vocab)
y_train = np.hstack([np.ones(NUM_IMAGES_TRAIN_PER_CATEGORY), -np.ones(NUM_IMAGES_TRAIN_PER_CATEGORY)])
y_test = np.hstack([np.ones(NUM_IMAGES_TEST_PER_CATEGORY), -np.ones(NUM_IMAGES_TEST_PER_CATEGORY)])

data_resized\Pikachu\6975.png
data_resized\Pikachu\6976.png
data_resized\Pikachu\6977.png
data_resized\Pikachu\6978.png
data_resized\Pikachu\6979.png
data_resized\Pikachu\6980.png
data_resized\Pikachu\6981.png
data_resized\Pikachu\6982.png
data_resized\Pikachu\6983.png
data_resized\Pikachu\6984.png
data_resized\Pikachu\6985.png
data_resized\Pikachu\6986.png
data_resized\Pikachu\6987.png
data_resized\Pikachu\6988.png
data_resized\Pikachu\6989.png
data_resized\Pikachu\6990.png
data_resized\Pikachu\6991.png
data_resized\Pikachu\6992.png
data_resized\Pikachu\6993.png
data_resized\Pikachu\6994.png
data_resized\Pikachu\6995.png
data_resized\Pikachu\6996.png
data_resized\Pikachu\6997.png
data_resized\Pikachu\6998.png
data_resized\Pikachu\6999.png
data_resized\Pikachu\7000.png
data_resized\Pikachu\7001.png
data_resized\Pikachu\7002.png
data_resized\Pikachu\7003.png
data_resized\Pikachu\7004.png
data_resized\Pikachu\7005.png
data_resized\Pikachu\7006.png
data_resized\Pikachu\7007.png
data_resiz

In [8]:
from sklearn.ensemble import RandomForestClassifier
# Set random seed
np.random.seed(0)

# Create a random forest Classifier. By convention, clf means 'Classifier'
clf = RandomForestClassifier(n_jobs=-1, random_state=0, n_estimators = 500)

# Train the Classifier to take the training features and learn how they relate
# to the training y (the species)
clf.fit(X_train, YTRAIN)
clf.score(X_test, YTEST)

0.7866666666666666

## Bibliografia:
- Modelo Bag of Visual Words produzido por Fábio Ayres.
- Dataset: [Pokémon Gen One](https://www.kaggle.com/thedagger/pokemon-generation-one/data) da plataforma Kaggle.com


In [9]:
# np.fromstring(palavra, dtype = int)

palavra = str(X_train.tolist())

with open ('image_text.txt', 'w') as image:
    conteudo = image.write(palavra)
    
with open('image_text.txt' , 'r') as image:
    content = image.read()


In [60]:
DATA_DIR2 = 'Testes'
CATEGORY_LIST2 = 'Testes'
NUM_IMAGES_TRAIN_PER_CATEGORY2 = 5
NUM_CLUSTERS2 = 9
YTRAIN2 = []


def get_images_from_category(category, num_train, data_dir):
    category_dir = os.path.join(DATA_DIR2, CATEGORY_LIST2)
    num_total = num_train
    filenames_train2 = []
    global YTRAIN2
    
    for k, filename in enumerate(os.listdir(category_dir)):
        if k <= num_train:
            filenames_train2.append(os.path.join(category_dir, filename))
            YTRAIN2.append(category)
        else:
            break
    return filenames_train2

def get_images_from_category_list2(category_list, num_train, data_dir):
    filenames_train_all2 = []
    for category in category_list:
        filenames_train = get_images_from_category(category, num_train, data_dir)
        filenames_train_all2.extend(filenames_train)
    return filenames_train_all2

def cria_vocabulario(imagens, num_clusters):
    km = cv2.BOWKMeansTrainer(num_clusters)
    akaze = cv2.KAZE_create()
    for p in imagens:
        img = cv2.imread(p, cv2.IMREAD_GRAYSCALE)
        mask = np.ones(img.shape)
        kp, desc = akaze.detectAndCompute(img, mask)
        km.add(desc)
    return km.cluster()

def representa(vocab, img):
    kaze = cv2.KAZE_create()
    kp = kaze.detect(img)
    bowdesc = cv2.BOWImgDescriptorExtractor(kaze, cv2.FlannBasedMatcher())
    bowdesc.setVocabulary(vocab)
    return bowdesc.compute(img, kp)

def transforma_imagens(imagens, vocab):
    X2 = []
    for p in imagens:
        img = cv2.imread(p, cv2.IMREAD_GRAYSCALE)
        X2.append(representa(vocab, img).flatten())
    return np.array(X2)

In [None]:
imagens_train2  = get_images_from_category_list2(CATEGORY_LIST2, NUM_IMAGES_TRAIN_PER_CATEGORY2, DATA_DIR2)
X_train2 = transforma_imagens(imagens_train2, vocab)
clf.predict(X_train2)