## Linear SVM

In [1]:
from imp import reload
import glob
import os

import cv2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline

save_to = 'classifiers/'
if not os.path.exists(save_to):
    os.makedirs(save_to)

In [2]:
# Load the images
from utils import classifier; reload(classifier)

images_rgb, labels = classifier.load_data(cv2.COLOR_BGR2RGB)

vehicle = 8792 samples
non-vehicle = 8968 samples


In [24]:
%%time

from utils import classifier; reload(classifier)

features_list = []
resize_to = (32, 32)

for image in images_rgb:
    img_y_cr_cb = cv2.cvtColor(image, cv2.COLOR_RGB2YCR_CB)
    img_hls = cv2.cvtColor(image, cv2.COLOR_RGB2HLS)
    
    # HOG feature
    hog_feature = classifier.get_hog_features(img_y_cr_cb, hog_channels=[0, 1, 2])
    
    # image feature
    # img_feature = cv2.resize(image, resize_to).ravel()

    # color hist feature
    color_hist_feature = classifier.get_color_hist_features(img_hls)
    # features_list.append(hog_feature)
    features_list.append(np.concatenate((hog_feature, color_hist_feature)))

CPU times: user 1min 12s, sys: 3.28 s, total: 1min 15s
Wall time: 1min 16s


In [26]:
from sklearn.utils import shuffle

X, y = shuffle(np.array(features_list), labels, random_state=0)
print('X.shape = {}'.format(X.shape))

X.shape = (17760, 5388)


In [27]:
# Scaling
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)

from sklearn.externals import joblib
joblib.dump(scaler, './classifiers/standard_scaler.pkl')

['./classifiers/standard_scaler.pkl']

In [28]:
# train, test split
from sklearn.model_selection import train_test_split

X_train_origin, X_test, y_train_origin, y_test = \
    train_test_split(X_scaled, y, random_state=0, stratify=y)

### Linear SVM

In [29]:
# Linear SVM
import time

from sklearn.model_selection import StratifiedKFold
from sklearn.svm import LinearSVC


# DataFrame to record cross validation result
df_cv_linear_svc = pd.DataFrame()

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)

for i, (train_index, test_index) in enumerate(skf.split(X_train_origin, y_train_origin)):
    print('Start Fold {}'.format(i + 1))
    t0_fold = time.time()

    X_train, y_train = X_train_origin[train_index], y_train_origin[train_index]
    X_val, y_val = X_train_origin[test_index], y_train_origin[test_index]

    clf = LinearSVC()
    t0_fit = time.time()
    clf.fit(X_train, y_train)
    print('Fit in {}'.format(time.time() - t0_fit))

    df_cv_linear_svc.loc[i, 'acc'] = clf.score(X_train, y_train)
    df_cv_linear_svc.loc[i, 'val_acc'] = clf.score(X_val, y_val)
    
    print('Fold {} in {}'.format(i + 1, time.time() - t0_fold))

print(df_cv_linear_svc)

Start Fold 1
Fit in 13.358896970748901
Fold 1 in 14.78134799003601
Start Fold 2
Fit in 2.347630023956299
Fold 2 in 2.8840348720550537
Start Fold 3
Fit in 14.323546886444092
Fold 3 in 15.112668991088867
Start Fold 4
Fit in 11.983365058898926
Fold 4 in 12.52513599395752
Start Fold 5
Fit in 4.589630842208862
Fold 5 in 5.416479110717773
   acc   val_acc
0  1.0  0.992495
1  1.0  0.992868
2  1.0  0.989489
3  1.0  0.989114
4  1.0  0.990988


In [30]:
%%time

# Train for test
clf = LinearSVC()
clf.fit(X_train_origin, y_train_origin)

# Model persistence
from sklearn.externals import joblib
joblib.dump(clf, os.path.join(save_to, 'linear_svm.pkl'))

CPU times: user 13.3 s, sys: 1.28 s, total: 14.5 s
Wall time: 15.9 s


In [31]:
# Test accuracy
%time print('test acc = ', clf.score(X_test, y_test))

test acc =  0.993018018018
CPU times: user 62.9 ms, sys: 186 ms, total: 249 ms
Wall time: 454 ms
