## Linear SVM

In [1]:
from imp import reload
import glob
import os

import cv2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline

save_to = 'classifiers/'
if not os.path.exists(save_to):
    os.makedirs(save_to)

In [2]:
# Load the images
from utils import classifier; reload(classifier)

images_rgb, labels = classifier.load_data(cv2.COLOR_BGR2RGB)

vehicle = 8792 samples
non-vehicle = 8968 samples


In [3]:
%%time

features_list = []
resize_to = (32, 32)

for image in images_rgb:
    y_cr_cb = cv2.cvtColor(image, cv2.COLOR_RGB2YCR_CB)
    # hls = cv2.cvtColor(image, cv2.COLOR_RGB2HLS)
    # HOG feature
    hog_ycrcb = classifier.get_hog_features(y_cr_cb, hog_channels=[0, 1, 2])
    # image feature
    img_feature = cv2.resize(image, resize_to).ravel()
    features_list.append(np.concatenate((hog_ycrcb, img_feature)))

CPU times: user 59.6 s, sys: 637 ms, total: 1min
Wall time: 1min


In [4]:
from sklearn.utils import shuffle

X, y = shuffle(np.array(features_list), labels, random_state=0)
print('X.shape = {}'.format(X.shape))

X.shape = (17760, 8364)


In [5]:
# Scaling
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)

from sklearn.externals import joblib
joblib.dump(scaler, './classifiers/standard_scaler.pkl')

['./classifiers/standard_scaler.pkl']

In [6]:
# train, test split
from sklearn.model_selection import train_test_split

X_train_origin, X_test, y_train_origin, y_test = \
    train_test_split(X_scaled, y, random_state=0, stratify=y)

### Linear SVM

In [7]:
# Linear SVM
import time

from sklearn.model_selection import StratifiedKFold
from sklearn.svm import LinearSVC


# DataFrame to record cross validation result
df_cv_linear_svc = pd.DataFrame()

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)

for i, (train_index, test_index) in enumerate(skf.split(X_train_origin, y_train_origin)):
    print('Start Fold {}'.format(i + 1))
    t0_fold = time.time()

    X_train, y_train = X_train_origin[train_index], y_train_origin[train_index]
    X_val, y_val = X_train_origin[test_index], y_train_origin[test_index]

    clf = LinearSVC()
    t0_fit = time.time()
    clf.fit(X_train, y_train)
    print('Fit in {}'.format(time.time() - t0_fit))

    df_cv_linear_svc.loc[i, 'acc'] = clf.score(X_train, y_train)
    df_cv_linear_svc.loc[i, 'val_acc'] = clf.score(X_val, y_val)
    
    print('Fold {} in {}'.format(i + 1, time.time() - t0_fold))

print(df_cv_linear_svc)

Start Fold 1
Fit in 9.067813873291016
Fold 1 in 11.598341941833496
Start Fold 2
Fit in 6.359969139099121
Fold 2 in 8.272781133651733
Start Fold 3
Fit in 3.7733049392700195
Fold 3 in 4.5337629318237305
Start Fold 4
Fit in 18.730575799942017
Fold 4 in 19.99231219291687
Start Fold 5
Fit in 5.345907926559448
Fold 5 in 6.3388588428497314
   acc   val_acc
0  1.0  0.992120
1  1.0  0.991366
2  1.0  0.990991
3  1.0  0.989489
4  1.0  0.992114


In [8]:
%%time

# Train for test
clf = LinearSVC()
clf.fit(X_train_origin, y_train_origin)

# Model persistence
from sklearn.externals import joblib
joblib.dump(clf, os.path.join(save_to, 'linear_svm.pkl'))

CPU times: user 4.36 s, sys: 2.02 s, total: 6.38 s
Wall time: 7 s


In [9]:
# Test accuracy
%time print('test acc = ', clf.score(X_test, y_test))

test acc =  0.992117117117
CPU times: user 97.5 ms, sys: 411 ms, total: 509 ms
Wall time: 1.01 s
