In [1]:
from utils import load_Caltech256_data
from utils import extract_DenseSift_descriptors
from utils import build_codebook
from utils import input_vector_encoder

import numpy as np
import pandas as pd
import cv2

def build_spatial_pyramid(image, descriptor, level):
    """
    Rebuild the descriptors according to the level of pyramid
    """
    assert 0 <= level <= 2, "Level Error"
    step_size = DSIFT_STEP_SIZE
    from utils import DSIFT_STEP_SIZE as s
    assert s == step_size, "step_size must equal to DSIFT_STEP_SIZE\
                            in utils.extract_DenseSift_descriptors()"
    h = image.shape[0] / step_size
    w = image.shape[1] / step_size
    idx_crop = np.array(range(len(descriptor))).reshape(h,w)
    size = idx_crop.itemsize
    height, width = idx_crop.shape
    bh, bw = 2**(3-level), 2**(3-level)
    shape = (height/bh, width/bw, bh, bw)
    strides = size * np.array([width*bh, bw, width, 1])
    crops = np.lib.stride_tricks.as_strided(
            idx_crop, shape=shape, strides=strides)
    des_idxs = [col_block.flatten().tolist() for row_block in crops
                for col_block in row_block]
    pyramid = []
    for idxs in des_idxs:
        pyramid.append(np.asarray([descriptor[idx] for idx in idxs]))
    return pyramid

def spatial_pyramid_matching(image, descriptor, codebook, level):
    pyramid = []
    if level == 0:
        pyramid += build_spatial_pyramid(image, descriptor, level=0)
        code = [input_vector_encoder(crop, codebook) for crop in pyramid]
        return np.asarray(code).flatten()
    if level == 1:
        pyramid += build_spatial_pyramid(image, descriptor, level=0)
        pyramid += build_spatial_pyramid(image, descriptor, level=1)
        code = [input_vector_encoder(crop, codebook) for crop in pyramid]
        code_level_0 = 0.5 * np.asarray(code[0]).flatten()
        code_level_1 = 0.5 * np.asarray(code[1:]).flatten()
        return np.concatenate((code_level_0, code_level_1))
    if level == 2:
        pyramid += build_spatial_pyramid(image, descriptor, level=0)
        pyramid += build_spatial_pyramid(image, descriptor, level=1)
        pyramid += build_spatial_pyramid(image, descriptor, level=2)
        code = [input_vector_encoder(crop, codebook) for crop in pyramid]
        code_level_0 = 0.25 * np.asarray(code[0]).flatten()
        code_level_1 = 0.25 * np.asarray(code[1:5]).flatten()
        code_level_2 = 0.5 * np.asarray(code[5:]).flatten()
        return np.concatenate((code_level_0, code_level_1, code_level_2))

VOC_SIZE =100
PYRAMID_LEVEL = 2

DSIFT_STEP_SIZE = 4
# DSIFT_STEP_SIZE is related to the function
# extract_DenseSift_descriptors in utils.py


    
PATH = './img_list_mini.csv'
# It contains 20 classes. each class has 50 images
X, y = load_Caltech256_data(PATH)

X = np.asarray(X)
y = np.asarray(y)

N = X.shape[0]
idx = np.array(range(N))
np.random.shuffle(idx)

X = X[idx]
y = y[idx]

N_train = int(len(X)*0.9)
x_train, y_train = X[:N_train], y[:N_train]
x_test, y_test = X[N_train:], y[N_train:]

print 'load dataset: '
print 'x_train: ', x_train.shape
print 'x_test: ', x_test.shape

                                            img_path  label
0  ./256_ObjectCategories/139.megaphone/139_0010.jpg    139
1  ./256_ObjectCategories/139.megaphone/139_0024.jpg    139
2  ./256_ObjectCategories/139.megaphone/139_0075.jpg    139
3  ./256_ObjectCategories/139.megaphone/139_0036.jpg    139
4  ./256_ObjectCategories/139.megaphone/139_0013.jpg    139
load dataset: 
x_train:  (963, 256, 256, 3)
x_test:  (108, 256, 256, 3)


In [2]:
import gc

print "Dense SIFT feature extraction"
# memory...
x_train_feature = [extract_DenseSift_descriptors(img) for img in x_train]
x_train_kp, x_train_des = zip(*x_train_feature)
del x_train_feature
del x_train_kp
gc.collect()

x_test_feature = [extract_DenseSift_descriptors(img) for img in x_test]
x_test_kp, x_test_des = zip(*x_test_feature)
x_test_feature, x_test_kp = None, None
del x_test_feature
del x_test_kp
gc.collect()

print "Train/Test split: {:d}/{:d}".format(len(y_train), len(y_test))
print "Codebook Size: {:d}".format(VOC_SIZE)
print "Pyramid level: {:d}".format(PYRAMID_LEVEL)
print "Building the codebook, it will take some time"
codebook = build_codebook(x_train_des, VOC_SIZE)
import cPickle
# number of samples
N = 1000
with open('./spm_lv1_codebook.pkl','w') as f:
    cPickle.dump(codebook, f)

Dense SIFT feature extraction
Train/Test split: 963/108
Codebook Size: 100
Pyramid level: 2
Building the codebook, it will take some time


In [3]:
import cPickle
with open('./spm_lv1_codebook.pkl','rb') as f:
    x = cPickle.load(f)

print "Spatial Pyramid Matching encoding"
x_train_ = [spatial_pyramid_matching(x_train[i],
                                    x_train_des[i],
                                    codebook,
                                    level=PYRAMID_LEVEL)
                                    for i in range(x_train.shape[0])]

x_test_ = [spatial_pyramid_matching(x_test[i],
                                   x_test_des[i],
                                   codebook,
                                   level=PYRAMID_LEVEL) for i in range(x_test.shape[0])]

x_train_ = np.asarray(x_train_)
x_test_ = np.asarray(x_test_)

Spatial Pyramid Matching encoding


In [4]:
from sklearn.cross_validation import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.grid_search import GridSearchCV
from sklearn import svm
import numpy as np

def svm_classifier(x_train, y_train, x_test=None, y_test=None):
#     if x_test == None and y_test == None:
#         x_train, x_test, y_train, y_test = train_test_split(
#                 x_train, y_train, test_size=0.2, random_state=6)
#         print "Spliting train:{}/test:{} from training data".format(
#                 len(x_train), len(x_test))
    C_range = 10.0 ** np.arange(-3, 3)
    gamma_range = 10.0 ** np.arange(-3, 3)
    param_grid = dict(gamma=gamma_range.tolist(), C=C_range.tolist())

    # Grid search for C, gamma, 5-fold CV
    print("Tuning hyper-parameters\n")
    clf = GridSearchCV(svm.SVC(), param_grid, cv=5, n_jobs=-2)
    clf.fit(x_train, y_train)
    print("Best parameters set found on development set:\n")
    print(clf.best_estimator_)
    print("\nGrid scores on development set:\n")
    for params, mean_score, scores in clf.grid_scores_:
        print("%0.3f (+/-%0.03f) for %r"
              % (mean_score, scores.std() * 2, params))
    print("\nDetailed classification report:\n")
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.\n")
    y_true, y_pred = y_test, clf.predict(x_test)
    #print(classification_report(y_true, y_pred, target_names=get_label()))
    print(classification_report(y_true, y_pred))
    print(accuracy_score(y_true, y_pred))



In [5]:
svm_classifier(x_train_, y_train, x_test_, y_test)

Tuning hyper-parameters

Best parameters set found on development set:

SVC(C=10.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

Grid scores on development set:

0.052 (+/-0.003) for {'C': 0.001, 'gamma': 0.001}
0.052 (+/-0.003) for {'C': 0.001, 'gamma': 0.01}
0.052 (+/-0.003) for {'C': 0.001, 'gamma': 0.1}
0.052 (+/-0.003) for {'C': 0.001, 'gamma': 1.0}
0.052 (+/-0.003) for {'C': 0.001, 'gamma': 10.0}
0.052 (+/-0.003) for {'C': 0.001, 'gamma': 100.0}
0.052 (+/-0.003) for {'C': 0.01, 'gamma': 0.001}
0.052 (+/-0.003) for {'C': 0.01, 'gamma': 0.01}
0.052 (+/-0.003) for {'C': 0.01, 'gamma': 0.1}
0.052 (+/-0.003) for {'C': 0.01, 'gamma': 1.0}
0.052 (+/-0.003) for {'C': 0.01, 'gamma': 10.0}
0.052 (+/-0.003) for {'C': 0.01, 'gamma': 100.0}
0.052 (+/-0.003) for {'C': 0.1, 'gamma': 0.001}
0.052 (+/-0.003) for {'C': 0.1, 'gamma'

### load data multiprocess

In [8]:
import cv2
import sklearn
from sklearn.cluster import KMeans
import scipy.cluster.vq as vq
import numpy as np
import pandas as pd
import multiprocessing
import numpy as np

listpath = './img_list.csv'
datalist = pd.read_csv(listpath)
x, y = [], []
print datalist.head()
def read_img_and_label(i):
    path, label = datalist['img_path'][i], datalist['label'][i]
    img = cv2.imread(path)
    if img is not None:
        if img.shape[:2] != (256,256):
            img = cv2.resize(img, (256,256))
        return (img, label)
#         x.append(img)
#         y.append(int(label))

cores = multiprocessing.cpu_count()
pool = multiprocessing.Pool(processes=cores)

num = range(len(datalist))
for j in pool.imap_unordered(read_img_and_label, num):
#     print type(j)
#     break
    if type(j) is tuple:
        (img, label) = j
        x.append(img)
        y.append(int(label))
print np.asarray(x).shape
print np.asarray(y).shape

                                            img_path label
0  ./256_ObjectCategories/139.megaphone/139_0010.jpg   139
1  ./256_ObjectCategories/139.megaphone/139_0024.jpg   139
2  ./256_ObjectCategories/139.megaphone/139_0075.jpg   139
3  ./256_ObjectCategories/139.megaphone/139_0036.jpg   139
4  ./256_ObjectCategories/139.megaphone/139_0013.jpg   139
(30607, 256, 256, 3)
(30607,)


In [3]:
print len(x)

0
