# Window-Based Detection using KAZE Descriptor Featuring Matching

In [None]:
import numpy as np
import matplotlib as mpl
from matplotlib import pyplot as plt
import matplotlib.patches as patches
import cv2
import os
import time
from IPython.display import clear_output
from sklearn.utils import shuffle
from sklearn import metrics as sk_metrics
from sklearn.model_selection import RandomizedSearchCV
import pickle as pkl

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import xgboost

from dataset import *
from descriptor import *
from SlidingWindow import *
from BagOfWords import *

In [None]:
# Global Variables
run_sliding_window = False

curr_wd = os.getcwd()
example_path = os.path.join(curr_wd, 'datasets/', 'JPEGImages/', '004.jpg')
example_full_path = os.path.join(curr_wd, 'datasets/', 'JPEGImages/', '000.jpg')
train_annot_path = os.path.join(curr_wd, 'cache_anno/', 'train_annots.pkl')

# Paths for training data
goal_dir = os.path.join(curr_wd, 'datasets')

## Sliding Window

In order to perform window based detection on an image, we need to construct a sliding window subroutine in order to iterate over windows in an image. In the following demonstration, a multiscale sliding window routine using image pyramids is displayed

In [None]:
# Load example image 
example_image = plt.imread(example_path)
plt.rcParams['figure.figsize'] = (5.0, 5.0)
plt.imshow(example_image)

In [None]:
# Sliding window routine
window_size = (100, 100)
if run_sliding_window: # Toggle demo using this variable
    for sub_image in image_pyramid(example_image, scale=1.5):
        for coordinates, window in sliding_window(sub_image, step_size=100, window_size=window_size):
            # NOTE: Can apply sub routine to process each image here

            # Extract coordinates of current bounding box
            y, x, y_end, x_end = coordinates

            sub_image_copy = sub_image.copy() 
            cv2.rectangle(sub_image_copy, (x, y), (x_end, y_end), (0, 255, 0), 2)
            cv2.imshow("Example", sub_image_copy)
            cv2.waitKey(1)
            time.sleep(0.025)

# NOTE: Running this cell block may crash the kernel. Only do so to visualise the results of sliding window

### Load Dataset

Loads the patches from the `datasets/train/positives` and `datasets/train/extra` as training data for classifier model. The patches are in the type `ndarray`
<br> *detection calss allowed:* </br>
<br> **waldo_face_front** </br>
<br> **waldo_face_side** </br>
<br> **waldo_body_full** </br>
<br> **waldo_body_half** </br>
<br> **wenda_face_front** </br>
<br> **wenda_body_full** </br>
<br> **wenda_body_half** </br>
<br> **wizard_face_front** </br>
<br> **wizard_body_full** </br>
<br> **wizard_body_half** </br>
<br> **other_face_front** </br>
<br> **other_body_full** </br>
<br> **other_body_half**</br>

In [None]:
# Load data
# here we focus on faces
detection_class = ['waldo_face_front']
train_loader, valid_loader = prepare_classification_dataloader(detection_class, simple=True, neg_ratio=0.2)
train_instances, valid_instances = list(train_loader), list(valid_loader)

In [None]:
plt.rcParams['figure.figsize'] = (12.0, 12.0)
plt.subplot(1,2,1)
plt.imshow(train_instances[0][0])
plt.axis('off')
plt.title('Original Patch, ' + str(train_instances[0][1]))

# Convert to RGB
def to_rgb(img):
    return img[:, :, ::-1]

train_images = []
train_labels = []
valid_images = []
valid_labels = []

for img, gt in train_instances:
    train_images.append(to_rgb(img))
    train_labels.append(gt)
    
for img, gt in valid_instances:
    valid_images.append(to_rgb(img))
    valid_labels.append(gt)

plt.subplot(1,2,2)
plt.imshow(train_images[0])
plt.axis('off')
plt.title('RGB Converted, ' + str(train_labels[0]))

In [None]:
# Extract feature descriptors from training set
train_features = extract_features(train_images)

In [None]:
# Cluster the keypoints to obtain features
print('Number of KAZE descriptors:', len(train_features))
bag_of_words = cluster_features(train_features) # Default number of clusters is 50

In [None]:
train_histograms = extract_histograms(train_images, bag_of_words)
train_histograms, train_labels = shuffle(train_histograms, train_labels)

### Train Classifier

After extracting the Bag Of Words from the training patches, we then train our classifer on the histogram vector features from training data patches, and test it on our validation patches

In [None]:
# Initialize the validation set
print('Number of validation examples: ', len(valid_images))

In [None]:
valid_histograms = extract_histograms(valid_images, bag_of_words)
valid_histograms, valid_labels = shuffle(valid_histograms, valid_labels)

In [None]:
# Initialize classification model
# SVM Model
svm = SVC(kernel='rbf')
svm.fit(train_histograms, train_labels)

In [None]:
len(train_labels)

In [None]:
valid_predict = svm.predict(valid_histograms)
print(valid_labels)
precision = sk_metrics.precision_score(y_true=valid_labels, y_pred=valid_predict)
recall = sk_metrics.recall_score(y_true=valid_labels, y_pred=valid_predict)
f1_score = sk_metrics.f1_score(y_true=valid_labels, y_pred=valid_predict)
print('Precision: %.3f\nRecall: %.3f\nF1 Score: %.3f' % (precision, recall, f1_score))

In [None]:
# XGBoost Model
# NOTE: XGB requires heavy tuning 
xgb = xgboost.XGBClassifier(learning_rate=0.001, n_estimators=250, max_depth=5)
xgb.fit(train_histograms, train_labels)

In [None]:
val_predict = xgb.predict(val_histograms)
precision = sk_metrics.precision_score(y_true=val_labels, y_pred=val_predict)
recall = sk_metrics.recall_score(y_true=val_labels, y_pred=val_predict)
f1_score = sk_metrics.f1_score(y_true=val_labels, y_pred=val_predict)
print('Precision: %.3f\nRecall: %.3f\nF1 Score: %.3f' % (precision, recall, f1_score))

In [None]:
# Random Forest Model
rf = RandomForestClassifier()
rf.fit(train_histograms, train_labels)

In [None]:
val_predict = rf.predict(val_histograms)
precision = sk_metrics.precision_score(y_true=val_labels, y_pred=val_predict)
recall = sk_metrics.recall_score(y_true=val_labels, y_pred=val_predict)
f1_score = sk_metrics.f1_score(y_true=val_labels, y_pred=val_predict)
print('Precision: %.3f\nRecall: %.3f\nF1 Score: %.3f' % (precision, recall, f1_score))

### Sliding Window Scoring

We now use our trained classifer to score each window, and threshold windows that have scores beyond a certain benchmark

In [None]:
# Visualise window on training image
plt.rcParams['figure.figsize'] = (12.0, 12.0)
fig, ax = plt.subplots(1)

window_size = (200, 400)
step_size = 500

example_full = plt.imread(example_full_path)
ax.imshow(example_full)

rect = patches.Rectangle((4000, 3500), window_size[0], window_size[1], linewidth=2, edgecolor='b', facecolor='none')
ax.add_patch(rect)

plt.show()

In [None]:
# Perform detection
detections = detect(example_full, bag_of_words, svm)
print('Number of Bounding Boxes: ', len(detections))

In [None]:
# Visualise detections
ground_truth = train_annots['000']['waldo']
clone = example_full.copy()
for (x1, y1, x2, y2) in detections:
    cv2.rectangle(clone, (x1, y1), (x2, y2), (0, 0, 255), thickness=25)
cv2.rectangle(clone, (ground_truth[0], ground_truth[1]), (ground_truth[2], ground_truth[3]), (0, 255, 0), thickness=25)
plt.imshow(clone)