## Linear SVM

In [1]:
from imp import reload
import glob
import os

import cv2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline

save_to = 'classifiers/'
if not os.path.exists(save_to):
    os.makedirs(save_to)

In [2]:
# Load the images
from utils import classifier; reload(classifier)

images_rgb, labels = classifier.load_data(cv2.COLOR_BGR2RGB)

vehicle = 8792 samples
non-vehicle = 8968 samples


In [3]:
%%time

from utils import classifier; reload(classifier)

features_list = []
resize_to = (32, 32)

for image in images_rgb:
    img_y_cr_cb = cv2.cvtColor(image, cv2.COLOR_RGB2YCR_CB)
    img_hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    
    # HOG feature
    hog_feature = classifier.get_hog_features(img_y_cr_cb, hog_channels=[0, 1, 2])

    # color hist feature
    color_hist_feature = classifier.get_color_hist_features(img_hsv)
    
    # image feature
    image_feature = classifier.get_image_features(img_y_cr_cb)

    # features_list.append(hog_feature)
    features_list.append(np.concatenate((hog_feature, color_hist_feature, image_feature)))

CPU times: user 1min 11s, sys: 866 ms, total: 1min 12s
Wall time: 1min 13s


In [4]:
from sklearn.utils import shuffle

X, y = shuffle(np.array(features_list), labels, random_state=0)
print('X.shape = {}'.format(X.shape))

X.shape = (17760, 8460)


In [5]:
# Scaling
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)

from sklearn.externals import joblib
joblib.dump(scaler, './classifiers/standard_scaler.pkl')

['./classifiers/standard_scaler.pkl']

In [6]:
# train, test split
from sklearn.model_selection import train_test_split

X_train_origin, X_test, y_train_origin, y_test = \
    train_test_split(X_scaled, y, random_state=0, stratify=y)

### Linear SVM

In [7]:
# Linear SVM
import time

from sklearn.model_selection import StratifiedKFold
from sklearn.svm import LinearSVC


# DataFrame to record cross validation result
df_cv_linear_svc = pd.DataFrame()

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)

for i, (train_index, test_index) in enumerate(skf.split(X_train_origin, y_train_origin)):
    print('Start Fold {}'.format(i + 1))
    t0_fold = time.time()

    X_train, y_train = X_train_origin[train_index], y_train_origin[train_index]
    X_val, y_val = X_train_origin[test_index], y_train_origin[test_index]

    clf = LinearSVC()
    t0_fit = time.time()
    clf.fit(X_train, y_train)
    print('Fit in {}'.format(time.time() - t0_fit))

    df_cv_linear_svc.loc[i, 'acc'] = clf.score(X_train, y_train)
    df_cv_linear_svc.loc[i, 'val_acc'] = clf.score(X_val, y_val)
    
    print('Fold {} in {}'.format(i + 1, time.time() - t0_fold))

print(df_cv_linear_svc)

Start Fold 1
Fit in 8.029785871505737
Fold 1 in 11.607387065887451
Start Fold 2
Fit in 3.3847382068634033
Fold 2 in 4.706390142440796
Start Fold 3
Fit in 3.3817667961120605
Fold 3 in 4.114090204238892
Start Fold 4
Fit in 3.3604509830474854
Fold 4 in 4.089205026626587
Start Fold 5
Fit in 3.2448959350585938
Fold 5 in 3.9707019329071045
   acc   val_acc
0  1.0  0.993246
1  1.0  0.990991
2  1.0  0.992492
3  1.0  0.990240
4  1.0  0.991739


In [8]:
%%time

# Train for test
clf = LinearSVC()
clf.fit(X_train_origin, y_train_origin)

# Model persistence
from sklearn.externals import joblib
joblib.dump(clf, os.path.join(save_to, 'linear_svm.pkl'))

CPU times: user 3.59 s, sys: 1.22 s, total: 4.82 s
Wall time: 4.97 s


In [9]:
# Test accuracy
%time print('test acc = ', clf.score(X_test, y_test))

test acc =  0.994144144144
CPU times: user 102 ms, sys: 440 ms, total: 543 ms
Wall time: 1.18 s
