In [1]:
import glob
import sys
import os

from sklearn import svm
import numpy as np
from scipy.misc import imread

import hog

In [2]:
folder = 'INRIAPerson'
folder_suffix = '_64x128_H96'

In [48]:
def get_N_images(folder, folder_suffix, phase='train', validity='pos', N=None, verbose=False):
    path_regex = os.path.join(folder, phase + folder_suffix, validity, '*.png')
    array = []
        
    for n, img_path in enumerate(glob.iglob(path_regex)):
        if N is not None and n >= N:
            return array
        if verbose and n % 50 == 0:
            print('{}: {}'.format(validity, n))
        array.append(get_image(img_path))
    
    if verbose:
        print('Finished {} - {}'.format(phase, validity))
    return array


def get_image(img_path):
    return hog.get_hog_features(img_path)


def get_N_targets(validity='pos', N=100):
    if validity == 'pos':
        return np.ones((N, 1))
    return np.zeros((N, 1))

In [51]:
train_x_pos = get_N_images(folder, folder_suffix, phase='train', validity='pos', verbose=True, N=None)
train_x_neg = get_N_images(folder, folder_suffix, phase='train', validity='neg', verbose=True, N=None)
train_x = np.array(train_x_pos + train_x_neg).astype('float32')

train_y = np.concatenate((get_N_targets(validity='pos', N=len(train_x_pos)),
                          get_N_targets(validity='neg', N=len(train_x_neg))))

pos: 0
pos: 50
pos: 100
pos: 150
pos: 200
pos: 250
pos: 300
pos: 350
pos: 400
pos: 450
pos: 500
pos: 550
pos: 600
pos: 650
pos: 700
pos: 750
pos: 800
pos: 850
pos: 900
pos: 950
pos: 1000
pos: 1050
pos: 1100
pos: 1150
pos: 1200
pos: 1250
pos: 1300
pos: 1350
pos: 1400
pos: 1450
pos: 1500
pos: 1550
pos: 1600
pos: 1650
pos: 1700
pos: 1750
pos: 1800
pos: 1850
pos: 1900
pos: 1950
pos: 2000
pos: 2050
pos: 2100
pos: 2150
pos: 2200
pos: 2250
pos: 2300
pos: 2350
pos: 2400
Finished train - pos
neg: 0
neg: 50
neg: 100
neg: 150
neg: 200
neg: 250
neg: 300
neg: 350
neg: 400
neg: 450
neg: 500
neg: 550
neg: 600
neg: 650
neg: 700
neg: 750
neg: 800
neg: 850
neg: 900
Finished train - neg


In [52]:
print(train_x.shape, train_y.shape)

(3328, 3780) (3328, 1)


In [53]:
clf = svm.LinearSVC(C=0.01)
clf.fit(train_x, train_y.ravel())

LinearSVC(C=0.01, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)

In [58]:
test_x_pos = get_N_images(folder, folder_suffix, phase='test', validity='pos', verbose=True, N=None)
test_x_neg = get_N_images(folder, folder_suffix, phase='test', validity='neg', verbose=True, N=None)
test_x = np.array(test_x_pos + test_x_neg).astype('float32')

test_y = np.concatenate((get_N_targets(validity='pos', N=len(test_x_pos)),
                          get_N_targets(validity='neg', N=len(test_x_neg))))

pos: 0
pos: 50
pos: 100
pos: 150
pos: 200
pos: 250
pos: 300
pos: 350
pos: 400
pos: 450
pos: 500
pos: 550
pos: 600
pos: 650
pos: 700
pos: 750
pos: 800
pos: 850
pos: 900
pos: 950
pos: 1000
pos: 1050
pos: 1100
Finished test - pos
neg: 0
neg: 50
neg: 100
neg: 150
neg: 200
neg: 250
Finished test - neg


In [60]:
print(test_x.shape, test_y.shape)

(1426, 3780) (1426, 1)


In [61]:
predictions = clf.predict(test_x)

In [63]:
sum(1 if real == prediction else 0 for real, prediction in zip(test_y, predictions)) / len(test_x)

0.967741935483871

In [64]:
t = 'INRIAPerson/test_64x128_H96/neg/no_person__no_bike_006.png'
clf.predict(np.array(get_image(t)).reshape(1, -1))

array([ 0.])

In [65]:
t = 'INRIAPerson/train_64x128_H96/pos/person_165a.png'
clf.predict(np.array(get_image(t)).reshape(1, -1))

array([ 1.])