In [1]:
import process as pc
from IPython.core.interactiveshell import InteractiveShell

from sklearn.svm import LinearSVC
#from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score
from skimage import img_as_float
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.image as mpimg
import joblib
import cv2

from sklearn.decomposition import PCA
import numpy as np

InteractiveShell.ast_node_interactivity = "all"
%config Completer.use_jedi = False

In [2]:
positive_images_path = pc.DATA_PATH+"/img_align_celeba"
negative_images_path = pc.DATA_PATH+"/caltech-101/101_ObjectCategories"

pc.extract_dataset(positive_images_path)
bad_imgs_path_list = pc.extract_dataset(negative_images_path, folder="multi_folders")

Dataset already extracted


In [3]:
n_good_images = 20000
n_bad_images_folder=1000
size_w = size_h = 128

#count bad images:
bad_img_list = []
for fld in bad_imgs_path_list:
    tmp_img_list = pc.load_images(negative_images_path+"/"+fld, number_of_images=n_bad_images_folder, random_seed=7)
    bad_img_list += [negative_images_path+"/"+fld+'/'+img for img in tmp_img_list]

len(bad_img_list)




8275

In [4]:
#extract patch from bad images to increase the number of negative samples
bad_patches = pc.extract_patches(bad_img_list, (size_w, size_h), n_patches=25000, random_seed=7)
len(bad_patches)


3


26413

In [5]:
img_list = pc.load_images(positive_images_path, number_of_images=n_good_images, random_seed=7) 
images = np.empty((n_good_images, size_w, size_h), dtype=np.uint8)

for i, img in enumerate(img_list):
    image = cv2.imread(pc.DATA_PATH+"/img_align_celeba/"+img)
    images[i, ...] = pc.process_image(image, resize=True, img_resize=(size_w, size_h))

In [6]:
bad_images = np.empty((len(bad_patches), size_w, size_h), dtype=np.uint8)

for i, img in zip(range(len(bad_patches)), bad_patches):
    bad_images[i, ...] = pc.process_image(img, resize=True, img_resize=(size_w, size_h))

error: OpenCV(4.9.0) /io/opencv/modules/imgproc/src/smooth.dispatch.cpp:617: error: (-215:Assertion failed) !_src.empty() in function 'GaussianBlur'


In [None]:
fig, ax = plt.subplots(10, 10, figsize=(10, 10))
for i in range(10):
    for j in range(10):
        ax[i, j].imshow(images[i*10+j], cmap='gray')
        ax[i, j].axis('off')
plt.show();

In [None]:
fig, ax = plt.subplots(10, 10, figsize=(10, 10))
for i in range(10):
    for j in range(10):
        ax[i, j].imshow(bad_images[i*10+j], cmap='gray')
        ax[i, j].axis('off')
plt.show();

In [None]:
images.shape
bad_images.shape

In [None]:
dataset = np.concatenate((images, bad_images), axis=0)
dataset.shape

In [None]:
features_extractor = "SIFT"
good_samples = n_good_images
bad_samples = bad_images.shape[0]
n_keypoints = 40
n_features = 128#256
if features_extractor == "ORB":
   ftr_shape = n_keypoints*n_features
   init = False
   for i, img in enumerate(dataset):
      _, features = pc.extract_ORB_features(img, n_keypoints=32)
      if features is None:
         if i < good_samples:
               good_samples -= 1
         else:
            bad_samples -= 1
         continue
      features = features.flatten()
      if features.shape[0] == ftr_shape:
         if not init:
            X = np.array([features])
            init = True
         else:
            X = np.append(X, [features], axis=0)
      else:
         if i < good_samples:
            print("good sample")
            good_samples -= 1
         else:
            print("bad sample")
            bad_samples -= 1


if features_extractor == "SIFT":
   ftr_shape = n_keypoints*n_features
   init = False
   for i, img in enumerate(dataset):
      kp, features = pc.extract_SIFT_features(img, n_optimal_keypoints=n_keypoints)
      if len(kp) < n_keypoints:
         if i < good_samples:
               good_samples -= 1
         else:
            bad_samples -= 1
         continue

      features = features.flatten()
      if features.shape[0] == ftr_shape:
         if not init:
            X = np.array([features])
            init = True
         else:
            X = np.append(X, [features], axis=0)
      else:
         if i < good_samples:
            print("good sample")
            good_samples -= 1
         else:
            print("bad sample")
            bad_samples -= 1
         


In [None]:
features.shape[0] == ftr_shape

In [None]:
good_samples
bad_samples
y = np.concatenate((np.ones(good_samples), np.zeros(bad_samples))) 
X.shape
y.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.02, random_state=7)

In [None]:
X_train.shape
y_train.shape

X_train[0]
y_train[0]


In [None]:
pca = PCA(n_components=32, whiten=True, random_state=7)
pca.fit(X_train)
var = np.cumsum(pca.explained_variance_ratio_)
plt.plot(var)
plt.ylim(0, 1.02)
plt.xlabel('Number of components')
plt.ylabel('Explained variance')
plt.show()
var


In [None]:
idx = [i for i in range(len(var)) if var[i] > 0.95]
idx

In [None]:
if 0:
    pca = PCA(n_components=idx[-1], whiten=True, random_state=7)
    pca.fit(X_train)
    X_train_pca = pca.transform(X_train)
    X_test_pca = pca.transform(X_test)
    X_train_pca.shape
    y_train.shape
    print("PCA done")
else:
    X_train_pca = X_train
    X_test_pca = X_test

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train_pca)
X_test_scaled = scaler.transform(X_test_pca)
X_train_scaled[0]

In [None]:
param_grid = { 'C' : [np.power(10., i) for i in range(-2, 2)],
               'max_iter' : [7500],
               'random_state' : [7] }

grid_searh = GridSearchCV(LinearSVC(), param_grid, cv=5, n_jobs=2)
grid_searh.fit(X_train_pca, y_train)
grid_searh.best_params_

In [None]:
best_svc = grid_searh.best_estimator_

best_svc.score(X_train_pca, y_train)

best_svc.score(X_test_pca, y_test)

In [None]:
y_pred = best_svc.predict(X_train_pca)
accuracy = accuracy_score(y_train, y_pred)
print("Accuracy test:", accuracy)


y_pred = best_svc.predict(X_test_pca)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy test:", accuracy)

In [None]:
X_test_pca[0]

In [None]:
pipeline_save_path = pc.DATA_PATH+"/sift_features.pkl"

from sklearn.pipeline import Pipeline

pipeline = Pipeline(
    steps=[
        ('preprocess', pc.process_image),
        ('extract_features', pc.extract_ORB_features),
        ('pca', pca),
        ('svc', best_svc)  
    ])

joblib.dump(
    pipeline, pipeline_save_path
    )

In [None]:
pipeline_save_path = pc.DATA_PATH+"/sift_features.pkl"
image_path = pc.DATA_PATH+"/final/Valentino_Rossi_2017.jpg"

pipeline = joblib.load(pipeline_save_path)

image = mpimg.imread(image_path)

faces = pc.detect_faces(image, pipeline, threshold=0.5, window_size=(128, 128), step_size=(64, 64), n_keypoints=32, resize=False)
