In [2]:
from matplotlib import pyplot
from keras.datasets import cifar10

import numpy as np
import pandas as pd
import sklearn as sk
from sklearn import metrics, svm
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_curve, auc, confusion_matrix
import matplotlib.pyplot as plt
import timeit

### Baseline SVM Classifier 

Load and prepare the dataset

In [None]:
# load and prepare data
def load_data():

  # load dataset
  (train_data, train_labels), (test_data, test_labels) = cifar10.load_data()

  # convert from pixel data to floats and normalize from 0-1
  train_data_norm = train_data.astype('float32') / 255.0
  test_data_norm = test_data.astype('float32') / 255.0

  # flatten pixel data to 2d
  train_data_flatten = train_data_norm.reshape(50000,-1)
  test_data_flatten = test_data_norm.reshape(10000,-1)

  # reduce label data to 1d array
  train_labels_1d = train_labels.reshape(-1,)
  test_labels_1d = test_labels.reshape(-1,)

  # choose a subset of the training dataset for training the optimal model
  #val_data = train_data_flatten[41500:, :]
  #val_labels = train_labels_1d[41500:]

  train_data =  train_data_flatten[:, :]
  train_labels = train_labels_1d[:]

  test_data =test_data_flatten[:, :]
  test_labels = test_labels_1d[:]

  return train_data, train_labels, test_data, test_labels

# call function
train_data, train_labels, test_data, test_labels = load_data()
classesName = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
print(train_data.shape)
print(test_data.shape)


Parameter search for optimal SVM model

In [None]:
# indicate number of training observations to perform grid serach on
n = 2000

param_grid = {'C': [10], 'gamma': [0.1], 'kernel': ['linear', 'rbf', 'poly', 'sigmoid']}

svm_grid = GridSearchCV(svm.SVC(), param_grid, refit=True, verbose=2)
svm_grid.fit(train_data[:n,:], train_labels[:n])

svm_clf = svm_grid.best_estimator_

In [None]:
print(svm_grid.best_estimator_)

train simple SVM

In [None]:
# define model
svm_clf = svm.SVC(C=10, gamma=0.1, kernel='poly')

# start timer
start = timeit.default_timer()

# fit classifier
svm_clf.fit(train_data, train_labels)

# stop timer
stop = timeit.default_timer()
print('Training Time (Basic SVM): ', stop-start)

test simple SVM on test set

In [None]:
# start timer
start = timeit.default_timer()

# make predictions
predicted = svm_clf.predict(test_data)

# stop timer
stop = timeit.default_timer()
print('Infrence Time (Basic SVM): ', stop-start)

# print a classification report (built in to sklearn)
svm_metrics = metrics.classification_report(test_labels, predicted, digits=5, target_names=classesName)
print("Classification report: \n", svm_metrics)

# display confusion matrix
cm_svm = confusion_matrix(test_labels, predicted)
print(f"Confusion matrix:\n{cm_svm}")


# Open CV Saliency Maps

open CV saliency tutorial: https://www.pyimagesearch.com/2018/07/16/opencv-saliency-detection/

In [15]:
import cv2
from scipy.signal import convolve2d as conv2

In [16]:
# load and prepare data
def load_data_opencv():

  # load dataset
  (train_data, train_labels), (test_data, test_labels) = cifar10.load_data()

  # convert from pixel data to floats and normalize from 0-1
  train_data_norm = train_data.astype('float32') / 255.0
  test_data_norm = test_data.astype('float32') / 255.0

  # flatten pixel data to 2d
  #train_data_flatten = train_data_norm.reshape(50000,-1)
  #test_data_flatten = test_data_norm.reshape(10000,-1)

  # reduce label data to 1d array
  train_labels_1d = train_labels.reshape(-1,)
  test_labels_1d = test_labels.reshape(-1,)

  return train_data_norm, train_labels_1d, test_data_norm, test_labels_1d

# call function
train_data, train_labels, test_data, test_labels = load_data_opencv()
classesName = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

''' this was used for validation
# choose a subset of the training dataset for training the optimal model
val_data = train_data[41500:, :, :, :]
val_labels = train_labels[41500:]

train_data = train_data[:20000, :, :, :]
train_labels = train_labels[:20000]
'''

'\n# choose a subset of the training dataset for training the optimal model\nval_data = train_data[41500:, :, :, :]\nval_labels = train_labels[41500:]\n\ntrain_data = train_data[:20000, :, :, :]\ntrain_labels = train_labels[:20000]\n'

Define saliency function using pretrained module

In [17]:
# generate saliency maps using the OpenCV library
def saliency_spectral(image):
    saliency = cv2.saliency.StaticSaliencySpectralResidual_create()
    (success, saliencyMap) = saliency.computeSaliency(image)
    saliencyMap = (saliencyMap * 255).astype("uint8")
    
    output = image * saliencyMap.reshape(32, 32, 1)

    return saliencyMap, output


# function to find nxn subset of data with most saliency
def maximum_saliency_region(salMap, n, image):
    idx = conv2(salMap, np.ones((n, n), dtype=int), 'same').argmax()
    row, col = np.unravel_index(idx, salMap.shape)
    offset = n//2

    if row < offset:
      row = offset
    if row > (32-offset):
      row = 32-offset
    
    if col < offset:
      col = offset
    if col > (32-offset):
      col = 32-offset
    
    row_start = row - offset
    row_end = row+offset
    col_start = col - offset
    col_end = col + offset

    return image[row_start:row_end, col_start:col_end, :]



Run Saliency Subset method


*   Generate saliency maps per image
*   Run the maximum_saliency_region function to crop each image



In [18]:
n=28

train_size = 50000

saliencyMap = np.zeros((train_size,32,32))
saliencyOutput = np.zeros((train_size,32,32,3))
subset_image_data = np.zeros((train_size, n, n, 3))

# start timer
start = timeit.default_timer()

for i in range(train_size):
  image = train_data[i,:,:,:]
  
  map, _ = saliency_spectral(image)
  saliencyMap[i,:,:] = map
  
  ''' uncomment to do saliency subset calc'''
  # take nxn subset of data with most saliency
  subset_image = maximum_saliency_region(map, n, image)
  subset_image_data[i, :, :, :] = subset_image
  
  ''' uncomment to do center subset calc 
  # take same nxn subset of data for all images to compare
  subset_image = image[16-n//2: 16+n//2, 16-n//2: 16+n//2, :]
  subset_image_data[i, :, :, :] = subset_image
  '''

  ''' uncomment to visualize a few images 
  if i in [4,7,9,20]:
  
    print(classesName[train_labels[i]])
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3)
    ax1.imshow(image)
    ax2.imshow(map, cmap="gray")
    ax3.imshow(subset_image)
    plt.show()
  
  if i==20:
    break
  '''

# stop timer
stop = timeit.default_timer()

print('Saliency Map Forward Pass Time: ', stop-start)

Saliency Map Forward Pass Time:  106.0933062869999


In [19]:
# reshape saliency maps
subset_image_data = subset_image_data.reshape(train_size,-1) 

Train the same baseline SVM on the new saliency subset data

In [20]:
# define model
svm_clf_sal = SVC(C=10, gamma=0.1, kernel='polynomial')

# start timer
start = timeit.default_timer()

# fit classifier
svm_clf_sal.fit(subset_image_data, train_labels[:train_size])

# stop timer
stop = timeit.default_timer()
print('Training Time (SVM with saliency maps): ', stop-start)

Training Time (SVM with saliency maps):  2422.8266793760004


Run Saliency Subset method on the Validaiton Data

In [None]:
val_size = 8500

saliencyOutput_val = np.zeros((val_size,32,32,3))
saliencyMap_val = np.zeros((val_size,32,32))
subset_image_data_val = np.zeros((val_size,n,n,3))


# start timer
start = timeit.default_timer()

for i in range(val_size):
  image = val_data[i,:,:,:].reshape(32,32,3)
  map, _ = saliency_spectral(image)
  saliencyMap_val[i,:,:] = map

  ''' uncomment to do saliency subset calc
  # take nxn subset of data with most saliency
  subset_image = maximum_saliency_region(map, n, image)
  subset_image_data_val[i, :, :, :] = subset_image
  '''
  ''' uncomment to do center subset calc  '''
  # take same nxn subset of data for all images to compare
  subset_image = image[32-n:, 32-n:, :]
  subset_image_data_val[i, :, :, :] = subset_image


# stop timer
stop = timeit.default_timer()
print('Saliency Map Forward Pass Time: ', stop-start)


subset_image_data_val = subset_image_data_val.reshape(val_size,-1)

Test the SVM with Saliency Subset Feature Selection on Validation data

In [None]:
# start timer
start = timeit.default_timer()

# make predictions
predicted = svm_clf_sal.predict(subset_image_data_val)
print(predicted[:15])
print(val_labels[:15])

# stop timer
stop = timeit.default_timer()
print('Infrence Time (SVM with saliency): ', stop-start)

# print a classification report (built in to sklearn)
svm_metrics = metrics.classification_report(val_labels[:val_size], predicted, digits=5, target_names=classesName)
print("Classification report: \n", svm_metrics)

# display confusion matrix
cm_svm = confusion_matrix(val_labels[:val_size], predicted)
print(f"Confusion matrix:\n{cm_svm}")

[7 3 1 3 7 3 7 0 9 0 8 5 9 6 7]
[4 5 1 9 9 9 7 8 9 8 0 3 2 3 3]
Infrence Time (SVM with saliency):  120.44992922600068
Classification report: 
               precision    recall  f1-score   support

       plane    0.24311   0.29698   0.26736       862
         car    0.19170   0.11253   0.14181       862
        bird    0.14654   0.22553   0.17765       807
         cat    0.16779   0.28506   0.21124       877
        deer    0.22523   0.23952   0.23215       835
         dog    0.19468   0.18172   0.18797       886
        frog    0.23792   0.15986   0.19123       832
       horse    0.15130   0.15385   0.15256       832
        ship    0.35368   0.23231   0.28043       848
       truck    0.16917   0.10477   0.12940       859

    accuracy                        0.19929      8500
   macro avg    0.20811   0.19921   0.19718      8500
weighted avg    0.20822   0.19929   0.19726      8500

Confusion matrix:
[[256  66  77  74  73  47  22  72 132  43]
 [170  97  61  87  44  48  36 133  6

Test the SVM with Saliency Subset Feature Extraction mehtod on the Test Set data

In [None]:
test_size = 10000

# Saliency Subset Featuer Selection

saliencyOutput_test = np.zeros((test_size,32,32,3))
saliencyMap_test = np.zeros((test_size,32,32))
subset_image_data_test = np.zeros((test_size,n,n,3))


# start timer
start = timeit.default_timer()

for i in range(test_size):
  image = test_data[i,:,:,:].reshape(32,32,3)
  map, _ = saliency_spectral(image)
  saliencyMap_test[i,:,:] = map

  ''' uncomment to do saliency subset calc'''
  # take nxn subset of data with most saliency
  subset_image = maximum_saliency_region(map, n, image)
  subset_image_data_test[i, :, :, :] = subset_image

# stop timer
stop = timeit.default_timer()
print('Saliency Map Forward Pass Time: ', stop-start)

subset_image_data_test = subset_image_data_test.reshape(test_size,-1)
print(subset_image_data_test.shape)



# Test SVM

# start timer
start = timeit.default_timer()

# make predictions
predicted = svm_clf_sal.predict(subset_image_data_test)

# stop timer
stop = timeit.default_timer()
print('Infrence Time (SVM with saliency): ', stop-start)

# print a classification report (built in to sklearn)
svm_metrics = metrics.classification_report(test_labels[:test_size], predicted, digits=5, target_names=classesName)
print("Classification report: \n", svm_metrics)

# display confusion matrix
cm_svm = confusion_matrix(test_labels[:test_size], predicted)
print(f"Confusion matrix:\n{cm_svm}")


Saliency Map Forward Pass Time:  29.830723207996925
(10000, 2352)
Infrence Time (SVM with saliency):  807.0102195339969
Classification report: 
               precision    recall  f1-score   support

       plane    0.49954   0.54200   0.51990      1000
         car    0.53774   0.57700   0.55668      1000
        bird    0.34683   0.39400   0.36891      1000
         cat    0.29508   0.32400   0.30887      1000
        deer    0.42684   0.42300   0.42491      1000
         dog    0.37207   0.38100   0.37648      1000
        frog    0.53207   0.44800   0.48643      1000
       horse    0.52183   0.47800   0.49896      1000
        ship    0.59290   0.56800   0.58018      1000
       truck    0.53022   0.46500   0.49547      1000

    accuracy                        0.46000     10000
   macro avg    0.46551   0.46000   0.46168     10000
weighted avg    0.46551   0.46000   0.46168     10000

Confusion matrix:
[[542  49  73  27  50  24  15  45 125  50]
 [ 62 577  22  30  24  18  15  22  