In [None]:
import glob
import sys
import os

from sklearn import svm
import numpy as np
from scipy.misc import imread

import hog

In [69]:
def get_set(small=True):
    """
    Return all variables needed to access either the small images dataset, or the normal one.
    """
    if small:
        return '_64x128_H96', 'train', 'test'
    else:
        return '', 'Train', 'Test'


folder = 'INRIAPerson'
folder_suffix, train_suffix, test_suffix = get_set(small=False)

In [75]:
def get_N_images(folder, folder_suffix, phase='train', validity='pos', N=None, verbose=False):
    """
    Loads N images from the selected 'phase' with the choosen 'validity'. If N is None,
    the function will load all existing images.
    """
    path_regex = os.path.join(folder, phase + folder_suffix, validity, '*.png')
    array = []
        
    for n, img_path in enumerate(glob.iglob(path_regex)):
        if N is not None and n >= N:
            return array
        if verbose and n % 50 == 0:
            print('{}: {}'.format(validity, n))
        array.append(get_image(img_path))
    
    if verbose:
        print('Finished {} - {}'.format(phase, validity))
    return array


def get_image(img_path):
    """
    Return HOG features of the image from 'img_path'.
    """
    return hog.get_hog_features(img_path)


def get_N_targets(validity='pos', N=100):
    """
    Return an 1d-array for the y-target, either 1 for a human positivity, else 0.
    """
    if validity == 'pos':
        return np.ones((N, 1))
    return np.zeros((N, 1))


def get_set(folder, folder_suffix, phase='train', N=None):
    """
    Return two arrays, one for input, one for target.
    """
    print('Loading in {}/{}{}'.format(folder, phase, folder_suffix))
    
    set_x_pos = get_N_images(folder, folder_suffix, phase=phase, validity='pos', verbose=True, N=N)
    set_x_neg = get_N_images(folder, folder_suffix, phase=phase, validity='neg', verbose=True, N=N)
    set_x = np.array(set_x_pos + set_x_neg).astype('float32')

    set_y = np.concatenate((get_N_targets(validity='pos', N=len(set_x_pos)),
                          get_N_targets(validity='neg', N=len(set_x_neg))))
    
    return set_x, set_y


def test_accuracy(clf, test_x, test_y):
    predictions = clf.predict(test_x)
    return sum(1 if real == prediction else 0
               for real, prediction in zip(test_y, predictions)) / len(test_x)

In [72]:
train_x, train_y = get_set(folder, folder_suffix, phase=train_suffix, N=None)
print(train_x.shape, train_y.shape)

pos: 0
pos: 50
pos: 100
pos: 150
pos: 200
pos: 250
pos: 300
pos: 350
pos: 400
pos: 450
pos: 500
pos: 550
pos: 600
Finished Train - pos
neg: 0
neg: 50
neg: 100
neg: 150
neg: 200
neg: 250
neg: 300
neg: 350
neg: 400
neg: 450
neg: 500
neg: 550
neg: 600
neg: 650
neg: 700
neg: 750
neg: 800
neg: 850
neg: 900
Finished Train - neg
(1526, 3780) (1526, 1)


In [73]:
clf = svm.LinearSVC(C=0.01)
clf.fit(train_x, train_y.ravel())

LinearSVC(C=0.01, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)

In [74]:
test_x, test_y = get_set(folder, folder_suffix, phase=test_suffix, N=None)
print(test_x.shape, test_y.shape)

pos: 0
pos: 50
pos: 100
pos: 150
pos: 200
pos: 250
Finished Test - pos
neg: 0
neg: 50
neg: 100
neg: 150
neg: 200
neg: 250
Finished Test - neg
(588, 3780) (588, 1)


In [76]:
test_accuracy(clf, test_x, test_y)

0.8367346938775511