In [11]:
import cv2
import numpy as np
import os
from scipy.cluster.vq import kmeans, vq
from sklearn.preprocessing import StandardScaler

In [12]:
def imglist(path):
    return [os.path.join(path, f) for f in os.listdir(path)]

In [54]:
def getAllRectangles(im_x,im_y,step,window):
    
    #Initialise Patch Size 
    minx = 0
    maxx = im_x
    miny = 0
    maxy =im_y
    step_x = step
    step_y = step
    window_width = window
    window_height = window
    rectangle = [] 
    x = minx;
    y = miny;
    hasNext = True

    while hasNext:
        nextX = x + step_x;
        nextY = y;
        if (nextX + window_width > maxx):
            nextX = minx;
            nextY += step_y;
        rec_dim = [x, y, window_width, window_height]
        #print(rec_patch)
        rectangle.append(rec_dim);
        x = nextX;
        y = nextY;

        if (y + window_height > maxy):
            hasNext = False
    #print("All rectangular patches retrieved.......")
    return rectangle

In [55]:
def featureExtraction(im, rectangle):
    feature = []
    for x,y,w,h in rectangle:
        patch_img = im[x:x+w,y:y+w]
        img_array = np.array(patch_img)
        flat_arr = img_array.ravel()
        vector = flat_arr.tolist()
        feature.append(vector)
    return feature

In [60]:
def descriptorFormation(descriptor_list):
    descriptors = descriptor_list[0][1]
    img_count=0
    #Stacking
    for image_path, descriptor in descriptor_list[1:]:
        img_count+=1
        descriptors = np.vstack((descriptors, descriptor))
        #print("Stacking of Descriptors of image {} complete......".format(img_count))
    print("Descriptors stacked successfully!")
    descriptors_float = descriptors.astype(float)
    return(descriptors_float)

In [57]:
def getDescriptors(image_paths):
    descriptor_list = []
    img_count=0
    
    for image_path in image_paths:
        img_count+=1
        im = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        m, n = im.shape
        rectangle = getAllRectangles(m,n,4,8) 
        feature = featureExtraction(im, rectangle)
        #print("Image features extracted for image {}......".format(img_count))
        descriptor_list.append((image_path, feature))
    print("Feature extraction done!")
    return(descriptor_list)

In [58]:
def quantisation(image_paths,descriptor_list):
    im_features = np.zeros((len(image_paths), k), "float32")
    for i in range(len(image_paths)):
        words, distance = vq(descriptor_list[i][1],voc)
        for w in words:
            im_features[i][w] += 1
    return(im_features)       

In [59]:
train_path = 'images/training'
training_names = os.listdir(train_path)
image_paths = []
#Class label for all images
image_classes = []
class_id = 0

for training_name in training_names:
    dir = os.path.join(train_path, training_name)
    class_path = imglist(dir)
    image_paths+=class_path
    image_classes+=[class_id]*len(class_path)
    class_id+=1
     
descriptor_list = getDescriptors(image_paths)

Feature extraction done!


In [52]:
#print(len(descriptor_list[0][1]))
#print(descriptor_list[0][1][-1:])

3185
[[34, 37, 31, 26, 28, 26, 20, 23, 41, 44, 43, 35, 32, 36, 31, 26, 42, 38, 41, 40, 33, 32, 32, 30, 49, 44, 39, 42, 41, 31, 31, 34, 45, 51, 46, 41, 45, 38, 35, 36, 37, 41, 45, 36, 33, 33, 33, 29, 49, 43, 48, 49, 42, 39, 39, 32, 51, 45, 40, 48, 46, 38, 37, 35]]


In [None]:
descriptors_float = descriptorFormation(descriptor_list)

In [9]:
k = 500  
voc, variance = kmeans(descriptors_float, k, 1) 

In [10]:
# Calculate the histogram of features and represent them as vector
im_features = quantisation(image_paths,descriptor_list)

# Scaling the words
stdSlr = StandardScaler().fit(im_features)
im_features = stdSlr.transform(im_features)

### The Next Block has to be replaced with the One for all Classifier

In [11]:
#data ->im_features
#label ->np.array(image_classes)
#Depending on the accuracy you can try tweaking k value too. But 500 seems to be fine as per the problem statement
#During our call, I had mentioned of planning to sample out few features. Thats a mistake in what I infered from the problem.
#We must use all the rectangular features.Thats what has been done now.

### To Validate using Test data

In [12]:
test_path = 'images/testing'
testing_names = os.listdir(test_path)

test_image_paths = []
test_image_classes = []
test_class_id = 0

for testing_name in testing_names:
    test_dir = os.path.join(test_path, testing_name)
    test_image_paths.append(test_dir)
    
test_descriptor_list = getDescriptors(test_image_paths)
test_descriptors_float = descriptorFormation(test_descriptor_list)

Image features extracted for image 1......
Image features extracted for image 2......
Image features extracted for image 3......
Image features extracted for image 4......
Image features extracted for image 5......
Image features extracted for image 6......
Image features extracted for image 7......
Image features extracted for image 8......
Image features extracted for image 9......
Image features extracted for image 10......
Image features extracted for image 11......
Image features extracted for image 12......
Image features extracted for image 13......
Image features extracted for image 14......
Image features extracted for image 15......
Image features extracted for image 16......
Image features extracted for image 17......
Image features extracted for image 18......
Image features extracted for image 19......
Image features extracted for image 20......
Image features extracted for image 21......
Image features extracted for image 22......
Image features extracted for image 23....

In [13]:
test_im_features = quantisation(image_paths,descriptor_list)
test_im_features = stdSlr.transform(test_im_features)