## Model Training

In [1]:
from imp import reload
import glob
import os

import cv2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline

save_to = 'classifiers/'
if not os.path.exists(save_to):
    os.makedirs(save_to)

In [3]:
%%time

# Load the images
from utils import utils
reload(utils)

images_rgb, labels = utils.load_data(cv2.COLOR_BGR2RGB)

vehicle = 8792 samples
non-vehicle = 8968 samples


### Feature Extraction

In [5]:
%%time

from utils import feature_extraction
reload(feature_extraction)

features_list = []
convert_to = cv2.COLOR_RGB2YCR_CB
orientations = 9
pixels_per_cell = (8, 8)
cells_per_block = (2, 2)
bin_spatial_size = (32, 32)


for image in images_rgb:
    # Color conversion
    img_y_cr_cb = cv2.cvtColor(image, convert_to)

    # HOG feature
    hog_feature = feature_extraction.get_hog_features(
        img_y_cr_cb, 
        hog_channels=[0, 1, 2],
        orientations=orientations,
        pixels_per_cell=pixels_per_cell,
        cells_per_block=cells_per_block
        )

    # color hist feature
    color_hist_feature = feature_extraction.color_hist(img_y_cr_cb)
    
    # image feature
    image_feature = feature_extraction.bin_spatial(
        img_y_cr_cb, 
        bin_spatial_size=bin_spatial_size
    )

    features_list.append(np.concatenate((hog_feature, color_hist_feature, image_feature)))

CPU times: user 1min 12s, sys: 978 ms, total: 1min 13s
Wall time: 1min 14s


In [6]:
from sklearn.utils import shuffle

X, y = shuffle(np.array(features_list), labels, random_state=0)
print('X.shape = {}'.format(X.shape))

X.shape = (17760, 8460)


### Feature Scaling

In [7]:
# Scaling
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)

from sklearn.externals import joblib
joblib.dump(scaler, './classifiers/standard_scaler.pkl')

['./classifiers/standard_scaler.pkl']

### Train, Test Split

In [8]:
# train, test split
from sklearn.model_selection import train_test_split

X_train_origin, X_test, y_train_origin, y_test = \
    train_test_split(X_scaled, y, random_state=0, stratify=y)

### Training Linear SVM
- 5-Fold Cross Validation

In [9]:
# Linear SVM
import time

from sklearn.model_selection import StratifiedKFold
from sklearn.svm import LinearSVC


# DataFrame to record cross validation result
df_cv_linear_svc = pd.DataFrame()

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)

for i, (train_index, test_index) in enumerate(skf.split(X_train_origin, y_train_origin)):
    print('Start Fold {}'.format(i + 1))
    t0_fold = time.time()

    X_train, y_train = X_train_origin[train_index], y_train_origin[train_index]
    X_val, y_val = X_train_origin[test_index], y_train_origin[test_index]

    clf = LinearSVC()
    t0_fit = time.time()
    clf.fit(X_train, y_train)
    print('Fit in {}'.format(time.time() - t0_fit))

    df_cv_linear_svc.loc[i, 'acc'] = clf.score(X_train, y_train)
    df_cv_linear_svc.loc[i, 'val_acc'] = clf.score(X_val, y_val)
    
    print('Fold {} in {}'.format(i + 1, time.time() - t0_fold))

print(df_cv_linear_svc)

Start Fold 1
Fit in 8.35919713973999
Fold 1 in 12.69511604309082
Start Fold 2
Fit in 3.4131338596343994
Fold 2 in 5.279833793640137
Start Fold 3
Fit in 4.13627290725708
Fold 3 in 4.8702781200408936
Start Fold 4
Fit in 3.1502370834350586
Fold 4 in 3.8338019847869873
Start Fold 5
Fit in 3.2316789627075195
Fold 5 in 3.9542880058288574
   acc   val_acc
0  1.0  0.994371
1  1.0  0.991742
2  1.0  0.993243
3  1.0  0.990616
4  1.0  0.992114


### Test

In [10]:
%%time

# Train for test
clf = LinearSVC()
clf.fit(X_train_origin, y_train_origin)

# Model persistence
from sklearn.externals import joblib
joblib.dump(clf, os.path.join(save_to, 'linear_svm.pkl'))

CPU times: user 4.22 s, sys: 2.7 s, total: 6.91 s
Wall time: 9.34 s


In [11]:
# Test accuracy
%time print('test acc = ', clf.score(X_test, y_test))

test acc =  0.994594594595
CPU times: user 101 ms, sys: 452 ms, total: 554 ms
Wall time: 1.25 s
