<a href="https://colab.research.google.com/github/ShoaibSheriff/Computer-Vision/blob/master/Scene%20Recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# import packages here
import cv2
import numpy as np
import matplotlib.pyplot as plt
import glob
import itertools


from sklearn.feature_extraction import image


In [0]:
class_names = [name[11:] for name in glob.glob('data/train/*')]
class_names = dict(zip(xrange(len(class_names)), class_names))

def load_dataset(path, num_per_class=-1):
    data = []
    labels = []
    for id, class_name in class_names.iteritems():
        img_path_class = glob.glob(path + class_name + '/*.jpg')
        if num_per_class > 0:
            img_path_class = img_path_class[:num_per_class]
        labels.extend([id]*len(img_path_class))
        for filename in img_path_class:
            data.append(cv2.imread(filename, 0))
    return data, labels

# load training dataset
train_data, train_label = load_dataset('data/train/')
train_num = len(train_label)

# load testing dataset
test_data, test_label = load_dataset('data/test/', 100)
test_num = len(test_label)

## Tiny Image Representation + Nearest Neighbor Classifier


In [0]:
%%time
from sklearn import preprocessing
tiny_img_dim = 16

# load training dataset
train_data, train_label = load_dataset('data/train/')
train_num = len(train_label)

# load testing dataset
test_data, test_label = load_dataset('data/test/')
test_num = len(test_label)


# feature extraction
def extract_feat(raw_data):
    feat_dim = tiny_img_dim * tiny_img_dim
    feat = np.zeros((len(raw_data), feat_dim), dtype=np.float32)
    for i in xrange(feat.shape[0]):
        tiny_img_i = cv2.resize(raw_data[i], (tiny_img_dim, tiny_img_dim))
        temp = np.reshape(tiny_img_i, (1, tiny_img_dim * tiny_img_dim))
        feat[i] = (temp - np.mean(temp)) / np.std(temp)
    return feat

train_feat = extract_feat(train_data)
test_feat = extract_feat(test_data)

# from sklearn import preprocessing
# train_feat = preprocessing.normalize(train_feat)
# test_feat = preprocessing.normalize(test_feat)


from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(8)

def train(X, Y):
    knn.fit(X, Y)
    return knn

model = train(train_feat, train_label)

# prediction: take feature and model, return label
def predict(model, x):
    return model.predict(x) # dummy implementation

predictions = predict(model, test_feat)

accuracy = sum(np.array(predictions) == test_label) / float(test_num)

pred1, label1 = predictions, test_label
print "The accuracy of my dummy model is {:.2f}%".format(accuracy*100)

The accuracy of my dummy model is 22.24%
CPU times: user 5.89 s, sys: 393 ms, total: 6.28 s
Wall time: 11.9 s


## Metrics
Accuracy : 22.24%
Time : 5.44 s

## Bag of SIFT Representation + Nearest Neighbor Classifer


In [0]:
readFromPickle = True

import pickle
sift = cv2.xfeatures2d.SIFT_create()

from sklearn.feature_extraction.image import extract_patches_2d

# load training dataset
train_data, train_label = load_dataset('data/train/')
train_num = len(train_label)

# load testing dataset
test_data, test_label = load_dataset('data/test/')
test_num = len(test_label)


def buildDictonary(X_train) :
    
    #all sift features across training images
    total_sift_features = np.zeros(shape=(0,128))
    step_size = 24

    for img_idx in range(len(X_train)) :
#     for img_idx in range(300) :
        img = X_train[img_idx]

#         patches = extract_patches_2d(img, (step_size, step_size), max_patches = 1000)
#         kp = [cv2.KeyPoint(x, y, step_size) for y in range(0, patches.shape[0], step_size) 
#                         for x in range(0, patches.shape[1], step_size)]
    
        kp = [cv2.KeyPoint(x, y, step_size) for x in range(0, img.shape[0], step_size) 
                        for y in range(0, img.shape[1], step_size)]
        
        #676 kps per image. Each descriptor is 128
        keypoints, dense_feat = sift.compute(img, kp)
        total_sift_features = np.vstack([total_sift_features, dense_feat])
        
    from sklearn.cluster import KMeans
    kmeans = KMeans(n_clusters = 80, random_state = 0,  n_jobs = 3)
    kmeans.fit(total_sift_features)
    return kmeans;


def loadDict(f_name) :

    f = open(f_name, 'rb')
    obj = pickle.load(f)
    f.close()
    return obj

def saveDict(model, f_name) :

    f = open(f_name, 'wb')
    pickle.dump(model, f)
    f.close()
    return

# Disable this is readin from model.
if (readFromPickle == False) :
    model = buildDictonary(train_data)
    saveDict(model, 'dict_step_24_cl_80.pkl')

if (readFromPickle == True) :
    model = loadDict('dict_step_24_cl_80.pkl')
    

def getBagOfWordsRepresentation(dictionary, X) :
    
    total_sift_features_per_image = []
    X_bow_per_image = np.zeros((0, 80))
    
    step_size = 4

    for img_idx in range(len(X)) :
#     for img_idx in range(300) :

        img = X[img_idx]
    
    
#         patches = extract_patches_2d(img, (step_size, step_size), max_patches = 1000)

#         kp = [cv2.KeyPoint(x, y, step_size) for y in range(0, patches.shape[0], step_size) 
#                         for x in range(0, patches.shape[1], step_size)]
        
        #676 kps per image. Each descriptor is 128
        kp = [cv2.KeyPoint(x, y, step_size) for x in range(0, img.shape[0], step_size) 
                        for y in range(0, img.shape[1], step_size)]
        
        #676 kps per image. Each descriptor is 128
        keypoints, dense_feat = sift.compute(img, kp)
        X_bows =  dictionary.predict(dense_feat)
        hist, bins = np.histogram(X_bows, bins=80)
        hist = hist.astype('float32')
        hist = (hist - hist.mean()) / hist.std()
#         hist = hist/np.sum(hist)
        
        X_bow_per_image = np.vstack([X_bow_per_image, hist])

    return X_bow_per_image
   

In [0]:
# load training dataset
train_data, train_label = load_dataset('data/train/', 70)
train_num = len(train_label)

In [0]:
%%time
X_train_bow = getBagOfWordsRepresentation(model, train_data)
print(X_train_bow.shape)

In [0]:
%%time
X_test_bow = getBagOfWordsRepresentation(model, test_data)
print(X_test_bow.shape)

(2985, 80)
CPU times: user 16min 5s, sys: 7min 53s, total: 23min 59s
Wall time: 6min 11s


In [0]:
%%time
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(4)
def train(model, X, Y):
    knn.fit(X, Y)
    return knn

model = train(knn, X_train_bow, train_label)

# prediction: take feature and model, return label
def predict(model, x):
    return model.predict(x) # dummy implementation

predictions = predict(model, X_test_bow)

accuracy = sum(np.array(predictions) == test_label) / float(test_num)

pred2, label2 = predictions, test_label
print "The accuracy of my dummy model is {:.2f}%".format(accuracy * 100)

The accuracy of my dummy model is 51.42%
CPU times: user 336 ms, sys: 0 ns, total: 336 ms
Wall time: 336 ms


## Metrics
Accuracy : 51.42% <br>
Best value for k = 80(k means), step size = 24(dense keypoints), k(knn) = 4. For training phase, step size of 4 is used with 70% data(for memory issues).<br>
Time to build dictionary : 35min 17s <br>
Time to get train data bag of words representation : 7min 59s <br>
Time to get test data bag of words representation : 23min 59s <br>
Time to predict with knn :  336 ms <br>

## Bag of SIFT Representation + one-vs-all SVMs

In [0]:
readFromPickle = True

import pickle
sift = cv2.xfeatures2d.SIFT_create()

from sklearn.feature_extraction.image import extract_patches_2d

# load training dataset
train_data, train_label = load_dataset('data/train/')
train_num = len(train_label)

# load testing dataset
test_data, test_label = load_dataset('data/test/')
test_num = len(test_label)


def buildDictonary(X_train) :
    
    #all sift features across training images
    total_sift_features = np.zeros(shape=(0,128))
    step_size = 24

    for img_idx in range(len(X_train)) :
#     for img_idx in range(300) :
        img = X_train[img_idx]

#         patches = extract_patches_2d(img, (step_size, step_size), max_patches = 1000)
#         kp = [cv2.KeyPoint(x, y, step_size) for y in range(0, patches.shape[0], step_size) 
#                         for x in range(0, patches.shape[1], step_size)]
    
        kp = [cv2.KeyPoint(x, y, step_size) for x in range(0, img.shape[0], step_size) 
                        for y in range(0, img.shape[1], step_size)]
        
        #676 kps per image. Each descriptor is 128
        keypoints, dense_feat = sift.compute(img, kp)
        total_sift_features = np.vstack([total_sift_features, dense_feat])
        
    from sklearn.cluster import KMeans
    kmeans = KMeans(n_clusters = 80, random_state = 0,  n_jobs = 3)
    kmeans.fit(total_sift_features)
    return kmeans;


def loadDict(f_name) :

    f = open(f_name, 'rb')
    obj = pickle.load(f)
    f.close()
    return obj

def saveDict(model, f_name) :

    f = open(f_name, 'wb')
    pickle.dump(model, f)
    f.close()
    return

# Disable this is readin from model.
if (readFromPickle == False) :
    model = buildDictonary(train_data)
    saveDict(model, 'dict_step_24_cl_80.pkl')

if (readFromPickle == True) :
    model = loadDict('dict_step_24_cl_80.pkl')
    

def getBagOfWordsRepresentation(dictionary, X) :
    
    total_sift_features_per_image = []
    X_bow_per_image = np.zeros((0, 80))
    
    step_size = 4

    for img_idx in range(len(X)) :
#     for img_idx in range(300) :

        img = X[img_idx]
    
    
#         patches = extract_patches_2d(img, (step_size, step_size), max_patches = 1000)

#         kp = [cv2.KeyPoint(x, y, step_size) for y in range(0, patches.shape[0], step_size) 
#                         for x in range(0, patches.shape[1], step_size)]
        
        #676 kps per image. Each descriptor is 128
        kp = [cv2.KeyPoint(x, y, step_size) for x in range(0, img.shape[0], step_size) 
                        for y in range(0, img.shape[1], step_size)]
        
        #676 kps per image. Each descriptor is 128
        keypoints, dense_feat = sift.compute(img, kp)
        X_bows =  dictionary.predict(dense_feat)
        hist, bins = np.histogram(X_bows, bins=80)
        hist = hist.astype('float32')
        hist = (hist - hist.mean()) / hist.std()
#         hist = hist/np.sum(hist)
        
        X_bow_per_image = np.vstack([X_bow_per_image, hist])

    return X_bow_per_image

ends here
CPU times: user 2.7 s, sys: 151 ms, total: 2.85 s
Wall time: 2.85 s


In [0]:
# load training dataset
train_data, train_label = load_dataset('data/train/', 70)
train_num = len(train_label)

In [0]:
%%time
X_train_bow = getBagOfWordsRepresentation(model, train_data)
print(X_train_bow.shape)

(1050, 80)
CPU times: user 5min 42s, sys: 2min 47s, total: 8min 29s
Wall time: 2min 11s


In [0]:
%%time
X_test_bow = getBagOfWordsRepresentation(model, test_data)
print(X_test_bow.shape)

(2985, 80)
CPU times: user 16min 50s, sys: 7min 48s, total: 24min 38s
Wall time: 6min 23s


In [0]:
%%time
# train_labels_split = []
# for i in range(len(train_label)):
#     train_labels_split.append(encodeLabelsForClass(train_label, i))

# from sklearn.metrics.pairwise import chi2_kernel
# K_tr = chi2_kernel([X_train_bow, X_train_bow], gamma=10)
# K_tst = chi2_kernel(X_test_bow, gamma=10)


# training a linear SVM classifier 
from sklearn.svm import  SVC 
svm_model_linear = SVC(kernel='linear', C = 1.19)
svm_model_linear.fit(X_train_bow, train_label) 
svm_predictions = svm_model_linear.predict(X_test_bow) 

accuracy = sum(np.array(svm_predictions) == test_label) / float(test_num)
print "The accuracy of  model is {:.2f}%".format(accuracy*100)

pred3, label3 = svm_predictions, test_label

The accuracy of  model is 61.07%
CPU times: user 382 ms, sys: 59.1 ms, total: 441 ms
Wall time: 382 ms


## Metrics
Accuracy : 61.07% <br>
Found best value for C = 1.19, step size = 24 and k = 80. For training phase, step size of 4 is used with 70% data(for memory issues)..(due to memory issues)<br>
Time to build dictionary : 35min 17s <br>
Time to get train data bag of words representation : 10min 59s<br>
Time to get test data bag of words representation : 20min 31s <br>
Time to predict with knn :  335 ms <br>