# Build a classifer

The goal of this Jupyter notebook is to:
1. Extract features from the raw data set processed in *rawdata_exploration.ipynb* notebook
2. Pre-process the features, and, 
3. Train a classifier to recognise vehicles in an image

In [1]:
# Necessary imports

import numpy as np
import cv2
import glob
import pickle
import time
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook as tqdm

from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix
from skimage.feature import hog
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint as sp_randint
import pandas as pd

from helper_functions import *

In [2]:
import warnings
warnings.filterwarnings("ignore")

## 1. Feature Extraction

In [3]:
def extract_features(imgs, params):
    '''
    Extracts user specified features (either Raw Color values/ Histogram of Color values/ HOG or a combination of these)
    from a list of images
    :param img([string]): List of image paths
    :param params(dict): Dictionary of different params for feature extraction
    :return: List of features vectors
    '''
    if not params:
        raise Exception('ERROR: Please provide a valid params dict!')
        
    features = []
    for img_path in tqdm(imgs):
        img = mpimg.imread(img_path)
        
        features.append(single_img_features(
            img,
            color_space=params['color_space'],
            spatial_size=params['spatial_size'],
            hist_bins=params['hist_bins'],
            orient=params['orient'],
            pix_per_cell=params['pix_per_cell'],
            cell_per_block=params['cell_per_block'],
            hog_channel=params['hog_channel'],
            spatial_feat=params['spatial_feat'],
            hist_feat=params['hist_feat'],
            hog_feat=params['hog_feat']
        ))
        
    return features

In [4]:
# Define the params dictionary
params = {
    'color_space': 'YCrCb',   # Can be RGB, HSV, LAB, HLS, YUV, YCrCb
    'orient': 9,              # HOG orientations
    'pix_per_cell': 8,        # HOG pixels per cell
    'cell_per_block': 2,      # HOG cells per block
    'spatial_size': (16, 16), # Spatial binning dimensions
    'hist_bins': 24,          # Number of histogram bins
    'hog_channel': 'ALL',     # Can be 0, 1, 2, or "ALL"
    'spatial_feat': True,     # Spatial features on or off
    'hist_feat': True,        # Histogram features on or off
    'hog_feat': True,         # HOG features on or off
}
     

In [None]:
# Load pickled raw data set
with open('classifier_data.p', mode='rb') as f:
    data = pickle.load(f)
    
cars_train = data['cars_train']
notcars_train = data['notcars_train']

cars_test = data['cars_test']
notcars_test = data['notcars_test']

# Extract features
print('Extracting features...')
start = time.time()

cars_features_train = extract_features(cars_train, params)
notcars_features_train = extract_features(notcars_train, params)

cars_features_test = extract_features(cars_test, params)
notcars_features_test = extract_features(notcars_test, params)

end = time.time()

      Time taken to extract features (Spatial, Color Hist, HOG): 112.85023307800293

In [4]:
print('Length of feature vector: {}'.format(len(cars_features_train[0])))

Length of feature vector: 6132


## 2. Pre-process features

In [5]:
X_train = np.vstack([cars_features_train, notcars_features_train]).astype(np.float64) 
X_test = np.vstack([cars_features_test, notcars_features_test]).astype(np.float64) 

y_train = np.hstack([np.ones(len(cars_features_train)), np.zeros(len(notcars_features_train))])
y_test = np.hstack([np.ones(len(cars_features_test)), np.zeros(len(notcars_features_test))])

scaler = StandardScaler().fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# Sanity check
assert(len(X_train) == len(y_train))
assert(len(X_test) == len(y_test))

X_train, y_train = shuffle(X_train, y_train)
X_test, y_test = shuffle(X_test, y_test)

## 3. Train a classifier (Linear Support Vector Machine)

In [6]:
clf = LinearSVC()
# clf = LinearSVC(C = 0.00005)
# parameters = {'C': sp_randint(0, 20)}
# clf = RandomizedSearchCV(lsvc, param_distributions=parameters)

print('Starting training...')
start = time.time()
clf.fit(X_train, y_train)
end = time.time()

print('Training took: {:.4f}'.format(end-start))
print('Training accuracy: {:.4f}'.format(clf.score(X_train, y_train)))
print('Test accuracy: {:.4f}'.format(clf.score(X_test, y_test)))
print()

preds = clf.predict(X_test)
df = pd.DataFrame(confusion_matrix(preds, y_test))
print('Confusion Matrix:')
df

Starting training...
Training took: 15.9724
Training accuracy: 1.0000
Test accuracy: 0.9781

Confusion Matrix:


Unnamed: 0,0,1
0,879,22
1,17,859


In [7]:
# Once happy, pickle the data
try:
    with open('classifier_data.p', mode='wb') as f:
        pickle.dump({
            'clf': clf,
            'scaler': scaler,
            'orient': params['orient'],
            'pix_per_cell': params['pix_per_cell'],
            'cell_per_block': params['cell_per_block'],
            'spatial_size': params['spatial_size'],
            'hist_bins': params['hist_bins'],
            'color_space': params['color_space']
        }, f)
        
except Exception as e:
    print('ERROR: Failed to pickle the classifier and its params with exception: {}'.format(e))
    
print('Successfully pickled the classifier data!')

Successfully pickled the classifier data!
