In [2]:
pip install opencv-contrib-python==3.4.2.17

Collecting opencv-contrib-python==3.4.2.17
[?25l  Downloading https://files.pythonhosted.org/packages/12/32/8d32d40cd35e61c80cb112ef5e8dbdcfbb06124f36a765df98517a12e753/opencv_contrib_python-3.4.2.17-cp37-cp37m-manylinux1_x86_64.whl (30.6MB)
[K     |████████████████████████████████| 30.6MB 153kB/s 
Installing collected packages: opencv-contrib-python
  Found existing installation: opencv-contrib-python 4.1.2.30
    Uninstalling opencv-contrib-python-4.1.2.30:
      Successfully uninstalled opencv-contrib-python-4.1.2.30
Successfully installed opencv-contrib-python-3.4.2.17


In [3]:
# import packages here
import cv2
import numpy as np
import matplotlib.pyplot as plt
import glob
import itertools
import time
import zipfile
import torch
import torchvision
import gc
import pickle
from sklearn import svm
from skimage import color
from skimage import io
from torch.utils.data import Dataset, DataLoader
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import KMeans
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import accuracy_score

print(cv2.__version__) # verify OpenCV version

3.4.2


## Data Preparation

In [7]:
class_names = [name[13:] for name in glob.glob('./data/train/*')]
class_names = dict(zip(range(len(class_names)), class_names))
print("class_names: %s " % class_names)
n_train_samples_per_class = 150
n_test_samples_per_class = 50

# To load images from the path provided
def load_dataset(path, num_per_class=-1):
    data = []
    labels = []
    for id, class_name in class_names.items():
        print("Loading images from class: %s" % id)
        img_path_class = glob.glob(path + class_name + '/*.jpg')
        if num_per_class > 0:
            img_path_class = img_path_class[:num_per_class]
        labels.extend([id]*len(img_path_class))
        for filename in img_path_class:
            data.append(cv2.imread(filename, 0))
    return data, labels

# load training dataset
# train_data, train_label = load_dataset('./data/train/')
train_data, train_label = load_dataset('./data/train/', n_train_samples_per_class)
n_train = len(train_label)
print("n_train: %s" % n_train)

# load testing dataset
# test_data, test_label = load_dataset('./data/test/')
test_data, test_label = load_dataset('./data/test/', n_test_samples_per_class)
n_test = len(test_label)
print("n_test: %s" % n_test)

class_names: {0: 'Forest', 1: 'Industrial', 2: 'Flower', 3: 'Coast', 4: 'InsideCity', 5: 'Office', 6: 'Bedroom', 7: 'Highway', 8: 'Street', 9: 'TallBuilding', 10: 'LivingRoom', 11: 'Suburb', 12: 'OpenCountry', 13: 'Mountain', 14: 'Kitchen', 15: 'Store'} 
Loading images from class: 0
Loading images from class: 1
Loading images from class: 2
Loading images from class: 3
Loading images from class: 4
Loading images from class: 5
Loading images from class: 6
Loading images from class: 7
Loading images from class: 8
Loading images from class: 9
Loading images from class: 10
Loading images from class: 11
Loading images from class: 12
Loading images from class: 13
Loading images from class: 14
Loading images from class: 15
n_train: 2400
Loading images from class: 0
Loading images from class: 1
Loading images from class: 2
Loading images from class: 3
Loading images from class: 4
Loading images from class: 5
Loading images from class: 6
Loading images from class: 7
Loading images from class: 8


In [8]:
# As loading the data from the source for the first time is time consuming, so you can pkl or save the data in a compact way such that subsequent data loading is faster
# Save intermediate image data into disk
file = open('train.pkl','wb')
pickle.dump(train_data, file)
pickle.dump(train_label, file)
file.close()

file = open('test.pkl','wb')
pickle.dump(test_data, file)
pickle.dump(test_label, file)
file.close()

In [9]:
# Load intermediate image data from disk
file = open('train.pkl', 'rb')
train_data = pickle.load(file)
train_label = pickle.load(file)
file.close()

file = open('test.pkl', 'rb')
test_data = pickle.load(file)
test_label = pickle.load(file)
file.close()

print(len(train_data), len(train_label)) # Verify number of training samples
print(len(test_data), len(test_label))   # Verify number of testing samples

2400 2400
400 400


In [10]:
# plt.imshow(train_data[1], cmap='gray') # Verify image
img_new_size = (240, 240)

# resizing the images
train_data = list(map(lambda x: cv2.resize(x, img_new_size), train_data))
train_data = np.stack(train_data)
train_label = np.array(train_label)

test_data = list(map(lambda x: cv2.resize(x, img_new_size), test_data))
test_data = np.stack(test_data)
test_label = np.array(test_label)


In [None]:
# # Verify image
# plt.imshow(cv2.resize(train_data[1], img_new_size), cmap='gray')
# print(train_data[0].dtype)

In [11]:
n_train = len(train_label)
n_test = len(test_label)

# feature extraction
def extract_feat(raw_data):
    print(len(raw_data))
    feat_dim = 1000
    feat = np.zeros((len(raw_data), feat_dim), dtype=np.float32)
    for i in np.arange(feat.shape[0]):
        feat[i] = np.reshape(raw_data[i], (raw_data[i].size))[:feat_dim] # dummy implemtation
    print("feat",len(feat))
    
    return feat

train_feat = extract_feat(train_data)
test_feat = extract_feat(test_data)

# model training: take feature and label, return model
def train(X, Y):
    return 0 # dummy implementation

# prediction: take feature and model, return label
def predict(model, x):
    return np.random.randint(16) # dummy implementation

# evaluation
predictions = [-1]*len(test_feat)
for i in np.arange(n_test):
    predictions[i] = predict(None, test_feat[i])
    
accuracy = sum(np.array(predictions) == test_label) / float(n_test)

print("The accuracy of my dummy model is {:.2f}%".format(accuracy*100))

2400
feat 2400
400
feat 400
The accuracy of my dummy model is 8.25%


## Tiny Image Representation + Nearest Neighbor Classifier

**resize the image to 16x16** and the tiny image is made to have zero mean and unit length (normalization).

In [12]:
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.neighbors import NearestNeighbors

def resize_data(train_data, test_data):
    # for resizing the input images
    img_s = 16
    img_new_size = (img_s, img_s)
    # resizing the images
    train_X = list(map(lambda x: cv2.resize(x, img_new_size).flatten(), train_data))
    train_X = np.stack(train_X)
    test_X = list(map(lambda x: cv2.resize(x, img_new_size).flatten(), test_data))
    test_X = np.stack(test_X)

    # for normalizing the images by moving the mean to zero and unit length
    train_X = train_X.astype(float)
    test_X = test_X.astype(float)
    for i in range(train_X.shape[0]):
        train_X[i] = (train_X[i]-train_X[i].mean())/train_X[i].std()

    for i in range(test_X.shape[0]):
        test_X[i] = (test_X[i]-test_X[i].mean())/test_X[i].std()

    return train_X, test_X

def tiny_image_predict(train_data, train_label, test_data, test_label):
    train_X, test_X = resize_data(train_data, test_data)
    # K-nearest neighbors classifier model
    # model = KNeighborsClassifier(n_neighbors=27, algorithm='ball_tree', weights= 'distance')
    model = KNeighborsClassifier(n_neighbors=45)

    # training the model
    model.fit(train_X, train_label)
    # prediction using the trained model
    predicted_labels = model.predict(test_X)
    return predicted_labels
  
pred1 = tiny_image_predict(train_data, train_label, test_data, test_label)
label1 = test_label
# accuracy of the model
prediction_accuracy = accuracy_score(pred1, test_label)
print("Accuracy: {:.2f}%".format(prediction_accuracy * 100))

# pred1, label1 = # train_and_test(...

Accuracy: 22.50%


## Bag of SIFT Representation + Nearest Neighbor Classifer
A vocabulary of visual words is formed by sampling many local features from our training set and then cluster them with k-means. The number of k-means clusters is the size of our vocabulary and the size of our features. For example clustering many SIFT descriptors into k=50 clusters. For any new SIFT feature, we can figure out which region it belongs to with reference to the centroids of our original clusters. Those centroids are our visual word vocabulary.

For each image SIFT descriptors are sampled and instead of storing hundreds of SIFT descriptors, we simply count how many SIFT descriptors fall into each cluster in our visual word vocabulary. This is done by finding the nearest neighbor k-means centroid for every SIFT feature. Thus, if we have a vocabulary of 50 visual words, and we detect 220 distinct SIFT features in an image, our bag of SIFT representation will be a histogram of 50 dimensions where each bin counts how many times a SIFT descriptor was assigned to that cluster. The total of all the bin-counts is 220. The histogram should be normalized so that image size does not dramatically change the bag of features magnitude.

Using the Bag of SIFT feature representation of the images a KNN classifier is trained.

In [None]:
import gc
import cv2
from sklearn.cluster import KMeans
import time

# To generate keypoints of an image
def generate_keypoints(ref_img, step_size = 10):
    # keypoint generation from step_size
    keypoints = [cv2.KeyPoint(x_cord, y_cord, step_size) for y_cord in range(0, ref_img.shape[0], step_size) 
                                               for x_cord in range(0, ref_img.shape[1], step_size)]
    return keypoints

#To extract sift descriptors from the given images using key points  
def extract_sift_descriptors(train_data, test_data, key_points):
    train_descriptor_list = []
    test_descriptor_list = []
    # initializing sift extractor
    sift_feature_extractor = cv2.xfeatures2d.SIFT_create()

    # extracting descriptors for the selected keypoints from train and test data
    for image in train_data:
      kp, descriptors = sift_feature_extractor.compute(image, keypoints)
      train_descriptor_list.append(descriptors)

    for image in test_data:
      kp, descriptors = sift_feature_extractor.compute(image, keypoints)
      test_descriptor_list.append(descriptors)

    # garbage collection just in case
    gc.collect()
      
    # modifying the shape  
    train_descriptor_array = np.array(train_descriptor_list[0])
    for des in train_descriptor_list[1:]:
      train_descriptor_array = np.vstack((train_descriptor_array, des))
    
    test_descriptor_array = np.array(test_descriptor_list[0])
    for des in test_descriptor_list[1:]:
      test_descriptor_array = np.vstack((test_descriptor_array, des)) 

    return train_descriptor_list, train_descriptor_array, test_descriptor_list, test_descriptor_array
# pred2, label2 = # train_and_test(...

In [None]:
# clustering the descriptors using KMeans
def cluster_descriptors(train_descriptor_array, test_descriptor_array, n_clusters = 50):
    kmeans_model = KMeans(n_clusters = n_clusters)
    train_sift_centroids = kmeans_model.fit_predict(train_descriptor_array)  
    test_sift_centroids = kmeans_model.predict(test_descriptor_array)

    return train_sift_centroids, test_sift_centroids

In [None]:
from sklearn.preprocessing import StandardScaler   

# generating histograms from descriptor cluster labels
def generate_sift_histograms(train_data, train_descriptor_array, train_descriptor_list, test_data, test_descriptor_array, test_descriptor_list, n_clusters=50):
    # clustering the descriptors using KMeans
    kmeans_model = KMeans(n_clusters = n_clusters)
    train_sift_centroids = kmeans_model.fit_predict(train_descriptor_array)  
    test_sift_centroids = kmeans_model.predict(test_descriptor_array)
    
    image_count = train_data.shape[0]
    # Training histogram
    train_histogram = np.array([np.zeros(n_clusters) for i in range(image_count)])
    old_count = 0
    # looping over the images
    for i in range(image_count):
      image_desc_count = len(train_descriptor_list[i])
      # updating the histogram array
      for j in range(image_desc_count):
        idx = train_sift_centroids[old_count + j]
        train_histogram[i][idx] = train_histogram[i][idx] + 1
      old_count = old_count + image_desc_count

    image_count = test_data.shape[0]
    # Testing histogram
    test_histogram = np.array([np.zeros(n_clusters) for i in range(image_count)])
    old_count = 0
    # looping over the images
    for i in range(image_count):
      image_desc_count = len(test_descriptor_list[i])
      # updating the histogram array
      for j in range(image_desc_count):
        idx = test_sift_centroids[old_count + j]
        test_histogram[i][idx] = test_histogram[i][idx] + 1
      old_count = old_count + image_desc_count

    # Normalizing the values
    scale = StandardScaler().fit(train_histogram)
    train_histogram = scale.transform(train_histogram)
    scale = StandardScaler().fit(test_histogram)
    test_histogram = scale.transform(test_histogram)
    
    return train_histogram, test_histogram, kmeans_model

In [None]:
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import accuracy_score

keypoints = generate_keypoints(train_data[0], 10)
train_descriptor_list, train_descriptor_array, test_descriptor_list, test_descriptor_array = extract_sift_descriptors(train_data, test_data, keypoints)
train_histogram, test_histogram, kmeans_model = generate_sift_histograms(train_data, train_descriptor_array, train_descriptor_list, test_data, test_descriptor_array, test_descriptor_list, 50)

# Initializing the model
n_neighbors = 48
model = KNeighborsClassifier(n_neighbors=n_neighbors, algorithm='ball_tree', weights= 'distance')

# training the model
model.fit(train_histogram, train_label)
# prediction using the trained model
pred2 = model.predict(test_histogram)
label2 = test_label
prediction_accuracy = accuracy_score(pred2, test_label)

# accuracy of the model
print("Accuracy: {:.2f}%".format(prediction_accuracy * 100))


Accuracy: 54.50%


##Bag of SIFT Representation + one-vs-all SVMs

We train 16 binary, one-vs-all SVMs (ex: 'forest' vs 'non-forest').

In [None]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score
from sklearn.svm import LinearSVC

def predict_SVM(train_histogram, train_label, test_histogram, test_label, C = 0.0068, max_iter=20000):
    # getting unique label and its count
    unique_labels = np.unique(train_label)
    num_labels = unique_labels.shape[0]

    train_size = train_histogram.shape[0]
    test_size = test_histogram.shape[0]
    model_list = dict.fromkeys(unique_labels)

    # initializing and training binary SVM classifiers
    for i in range(num_labels):
        category = unique_labels[i]
        category_indices = train_label != category
        labels = np.ones(train_size)
        labels[ category_indices] = 0
        model = LinearSVC(C=C, max_iter=max_iter)
        model.fit(train_histogram, labels)
        # storing the models
        model_list[category] = model

    # prediction on the test data    
    predict_labels = []
    for i in range(test_size):
      score_dict = dict.fromkeys(unique_labels)
      for j in unique_labels:
        score_dict[j] = model_list[j].decision_function(test_histogram[i].reshape(1,test_histogram[i].shape[0]))[0]
      # selecting the label with max votes
      predict_labels.append(sorted(score_dict.items(), key=lambda score_dict: score_dict[1], reverse=True)[0][0])  
    
    return predict_labels
    
# training the SVM classifier
predict_labels = predict_SVM(train_histogram, train_label, test_histogram, test_label)
# calculating the prediction accuracy
prediction_accuracy = accuracy_score(np.asarray(predict_labels), test_label)
pred3 = predict_labels
label3 = test_label
# accuracy of the model
print("Accuracy: {:.2f}%".format(prediction_accuracy * 100))   

# pred3, label3 = # train_and_test(...

Accuracy: 65.25%
