# Bag Of Visual Words Approach

In [1]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
import random
import pylab as pl
from sklearn.metrics import confusion_matrix,accuracy_score
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
import pickle
import pandas as pd

In [21]:
number_clusters = 512 # Number of Clusters for k-means clustering for BOVW
train_number = 1 # Number Of Batches to be included for training
Labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] #Can use this list to infer the class of that label since the given label is number Label[label] will give us the class/type of object.

# Unpacking and Reading the images and labels of the dataset

In [3]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='latin1')
        imgs = dict['data'].reshape(len(dict['data']), 3, 32, 32).transpose(0, 2, 3, 1)
        labels = dict['labels']
    return (imgs, labels)

In [4]:
def load_data_batches(train_number):
    imgs = []
    labels = []
    for i in range(1, train_number+1):
        imgs1, labels1 = unpickle('./cifar-10-batches-py/data_batch_' + str(i))
        imgs.append(imgs1)
        labels.append(labels1)
    
    return imgs, labels

# Extracting the descriptors of the images

In [5]:
def get_descriptors(imgs):
    extractor = cv2.SIFT_create()
    descriptors = np.asarray([])

    for img in imgs:
        gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        kps, descs = extractor.detectAndCompute(gray, None)
        if type(descs) == np.ndarray :
            if descriptors.shape[0] == 0:
                descriptors = descs
            else:
                descriptors = np.concatenate((descriptors, descs), axis=0)

    return descriptors

In [6]:
def get_descriptors_for_data(imgs):
    descriptors = np.asarray([])
    for imgs1 in imgs:
        if(descriptors.shape[0] == 0):
            descriptors = get_descriptors(imgs1)
        else:
            descriptors = np.concatenate((descriptors, get_descriptors(imgs1)), axis = 0)

    return descriptors

# Creating the Vocabulary

In [7]:
def get_words(descriptors):
    kmeans = KMeans(n_clusters = number_clusters)
    kmeans.fit(descriptors)

    return kmeans

# Constructing Bag Of KeyPoints/Histograms

In [8]:
def build_histograms(imgs, words, number_clusters):
    histograms = []
    extractor = cv2.SIFT_create()
    for img in imgs:
        histogram = [0]*number_clusters
        gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        kps, descs = extractor.detectAndCompute(gray, None)

        if(type(descs) == np.ndarray):
            prediction = words.predict(descs)
            for i in prediction:
                histogram[i] = histogram[i] + 1
        
        histograms.append(histogram)

    return np.asarray(histograms)

# Extracting the Test and Train Data

In [9]:
def get_training_test_data(words, imgs, labels, number_clusters):
    training_set = np.asarray([])
    training_labels = []
    test_set = np.asarray([])

    for imgs1, labels1 in zip(imgs, labels):
        if(training_set.shape[0] == 0):
            training_set = build_histograms(imgs1, words, number_clusters)
            training_labels = labels1
        else:
            training_set = np.concatenate((training_set, build_histograms(imgs1)), axis = 0)
            training_labels.extend(labels1)
    
    test_imgs, test_labels = unpickle('./cifar-10-batches-py/test_batch')
    test_set = build_histograms(test_imgs, words, number_clusters)

    return training_set, np.asarray(training_labels), test_set, np.asarray(test_labels)


In [10]:
def train(svm_c, svm_kernel, svm_gamma, training_set, training_labels):
    model = svm.SVC(C = svm_c, kernel = svm_kernel, gamma = svm_gamma)
    model.fit(training_set, training_labels)

    return model

In [11]:
imgs, labels = load_data_batches(train_number)
descriptors = get_descriptors_for_data(imgs)
words = get_words(descriptors)
training_set, train_labels, test_set, test_labels = get_training_test_data(words, imgs, labels, number_clusters)

# Models
# 1. SVM

In [12]:
model = train(0.01, 'linear', 0.01, training_set, train_labels)

In [13]:
accuracy = model.score(test_set, test_labels)
accuracy

0.2676

# 2. Logistic Regression

In [14]:
def train1(max_iter, training_set, training_labels):
    model = LogisticRegression(max_iter = 1000)
    model.fit(training_set, training_labels)

    return model

In [15]:
model1 = train1(1000, training_set, train_labels)

In [16]:
accuracy1 = model1.score(test_set, test_labels)
accuracy1

0.2505

# 3. K-Nearest Neighbors

In [17]:
def train2(n_neighbors, training_set, training_labels):
    model = KNeighborsClassifier(n_neighbors = n_neighbors)
    model.fit(training_set, training_labels)

    return model

In [18]:
model2 = train2(5, training_set, train_labels)

In [19]:
accuracy2 = model2.score(test_set, test_labels)
accuracy2

0.1218