 
# Importing libraries

In [36]:
import os
import glob

import numpy as np
import pandas as pd
import cv2
from skimage.feature import local_binary_pattern
from mahotas.features import haralick

from sklearn.preprocessing import StandardScaler

from sklearn.svm import SVC, LinearSVC

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.metrics import make_scorer

from tqdm import tqdm_notebook
from matplotlib import pyplot as plt
%matplotlib inline 

In [37]:
TRAIN_PATH = 'data/train'
TEST_PATH = 'data/test'
DEVEL_PATH = 'data/devel'

## Evaluation

Solution will be evaluated based on **Equal Error Rate (ERR)**

In [3]:
from evaluation import equal_error_rate 
err = make_scorer(equal_error_rate,greater_is_better=False)

## Feature extraction 

Using combination of some classical texture methods:  
  
**LBP + YCrCb\HSV Histograms**  
Paper: https://arxiv.org/pdf/1511.06316.pdf https://ieeexplore.ieee.org/document/7791171  
**Haralick texture features**  
Paper: https://ieeexplore.ieee.org/document/7791171  

In [4]:
from preprocessing_lbp_haralick import process_image, extract_features, process_data

In [5]:
train_real_filelist = glob.glob(os.path.join(TRAIN_PATH, 'real/*.jpg'))
train_photo_filelist = glob.glob(os.path.join(TRAIN_PATH, 'photo_attack/*.jpg'))
train_video_filelist = glob.glob(os.path.join(TRAIN_PATH, 'video_attack/*.jpg'))
train_real_features = process_data(train_real_filelist)
train_photo_features = process_data(train_photo_filelist)
train_video_features = process_data(train_video_filelist)










In [11]:
train_data_photo_attack = train_real_features + train_photo_features
train_labels_photo_attack = [0] * len(train_real_features) + [1] * len(train_photo_features)
train_data_video_attack = train_real_features + train_video_features
train_labels_video_attack = [0] * len(train_real_features) + [1] * len(train_video_features)
train_data_all = train_real_features + train_photo_features + train_video_features
train_labels_all = [0] * len(train_real_features) + [1] * len(train_photo_features) + [1] * len(train_video_features)

In [12]:
test_real_filelist = glob.glob(os.path.join(TEST_PATH, 'real/*.jpg'))
test_photo_filelist = glob.glob(os.path.join(TEST_PATH, 'photo_attack/*.jpg'))
test_video_filelist = glob.glob(os.path.join(TEST_PATH, 'video_attack/*.jpg'))
test_real_features = process_data(test_real_filelist)
test_photo_features = process_data(test_photo_filelist)
test_video_features = process_data(test_video_filelist)










In [14]:
test_data_photo_attack = test_real_features + test_photo_features
test_labels_photo_attack = [0] * len(test_real_features) + [1] * len(test_photo_features)
test_data_video_attack = test_real_features + test_video_features
test_labels_video_attack = [0] * len(test_real_features) + [1] * len(test_video_features)
test_data_all = test_real_features +test_photo_features + test_video_features
test_labels_all = [0] * len(test_real_features) + [1] * len(test_photo_features) + [1] * len(test_video_features)

In [30]:
devel_real_filelist = glob.glob(os.path.join(DEVEL_PATH, 'real/*.jpg'))
devel_photo_filelist = glob.glob(os.path.join(DEVEL_PATH, 'photo_attack/*.jpg'))
devel_video_filelist = glob.glob(os.path.join(DEVEL_PATH, 'video_attack/*.jpg'))
devel_real_features = process_data(devel_real_filelist)
devel_photo_features = process_data(devel_photo_filelist)
devel_video_features = process_data(devel_video_filelist)

In [31]:
devel_data_photo_attack = devel_real_features + devel_photo_features
devel_labels_photo_attack = [0] * len(devel_real_features) + [1] * len(devel_photo_features)
devel_data_video_attack = devel_real_features + devel_video_features
devel_labels_video_attack = [0] * len(devel_real_features) + [1] * len(devel_video_features)
devel_data_all = devel_real_features +devel_photo_features + devel_video_features
devel_labels_all = [0] * len(devel_real_features) + [1] * len(devel_photo_features) + [1] * len(devel_video_features)

## Scaling data 

In [375]:
scaler = StandardScaler()
scaled_train_all = scaler.fit_transform(train_data_all)
scaled_test_all = scaler.transform(test_data_all)
scaled_devel_all = scaler.transform(devel_data_all)

scaled_train_data_video_attack = scaler.transform(train_data_video_attack) 
scaled_devel_data_video_attack = scaler.transform(devel_data_video_attack) 
scaled_train_data_photo_attack = scaler.transform(train_data_photo_attack) 
scaled_devel_data_photo_attack = scaler.transform(devel_data_photo_attack) 

## Training simple model

In [192]:
svc = SVC(kernel='linear')
svc_params = {'C': np.logspace(-4, 4, 100),'class_weight':['balanced','']}
svc_grid = GridSearchCV(svc,svc_params,cv = 5,n_jobs = -1,scoring=err,return_train_score = True,verbose=True)
svc_grid.fit(scaled_train_all,train_labels_all)
pd.DataFrame(svc_grid.cv_results_).sort_values(by='mean_test_score',ascending=False)

Fitting 5 folds for each of 200 candidates, totalling 1000 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   28.5s
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 792 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  1.8min finished


Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_C,param_class_weight,params,rank_test_score,split0_test_score,split0_train_score,...,split2_test_score,split2_train_score,split3_test_score,split3_train_score,split4_test_score,split4_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
76,0.108909,0.012567,-0.006326,0.000000,0.117681,balanced,"{'C': 0.1176811952434999, 'class_weight': 'bal...",1,-0.001391,-0.000000,...,-0.022207,-0.000000,-0.000000,-0.000000,-0.002681,-0.000000,0.012590,0.001017,0.008135,0.000000
59,0.121475,0.015958,-0.006815,0.000000,0.0220513,,"{'C': 0.022051307399030457, 'class_weight': ''}",2,-0.001391,-0.000000,...,-0.024650,-0.000000,-0.000000,-0.000000,-0.002681,-0.000000,0.012447,0.001262,0.009091,0.000000
71,0.108510,0.014162,-0.006828,0.000000,0.0673415,,"{'C': 0.06734150657750829, 'class_weight': ''}",3,-0.001391,-0.000000,...,-0.024716,-0.000000,-0.000000,-0.000000,-0.002681,-0.000000,0.009719,0.001466,0.009117,0.000000
74,0.109906,0.012367,-0.006828,0.000000,0.097701,balanced,"{'C': 0.09770099572992257, 'class_weight': 'ba...",3,-0.001391,-0.000000,...,-0.024716,-0.000000,-0.000000,-0.000000,-0.002681,-0.000000,0.009236,0.000798,0.009117,0.000000
73,0.112897,0.013963,-0.006828,0.000000,0.0811131,,"{'C': 0.08111308307896872, 'class_weight': ''}",3,-0.001391,-0.000000,...,-0.024716,-0.000000,-0.000000,-0.000000,-0.002681,-0.000000,0.009596,0.002523,0.009117,0.000000
72,0.107911,0.012965,-0.006828,0.000000,0.0811131,balanced,"{'C': 0.08111308307896872, 'class_weight': 'ba...",3,-0.001391,-0.000000,...,-0.024716,-0.000000,-0.000000,-0.000000,-0.002681,-0.000000,0.011858,0.000892,0.009117,0.000000
61,0.120876,0.016357,-0.006828,0.000000,0.0265609,,"{'C': 0.026560877829466867, 'class_weight': ''}",3,-0.001391,-0.000000,...,-0.024716,-0.000000,-0.000000,-0.000000,-0.002681,-0.000000,0.009084,0.002054,0.009117,0.000000
70,0.113098,0.012766,-0.006828,0.000000,0.0673415,balanced,"{'C': 0.06734150657750829, 'class_weight': 'ba...",3,-0.001391,-0.000000,...,-0.024716,-0.000000,-0.000000,-0.000000,-0.002681,-0.000000,0.012450,0.000399,0.009117,0.000000
63,0.114892,0.015359,-0.006828,0.000000,0.0319927,,"{'C': 0.03199267137797385, 'class_weight': ''}",3,-0.001391,-0.000000,...,-0.024716,-0.000000,-0.000000,-0.000000,-0.002681,-0.000000,0.010693,0.001850,0.009117,0.000000
65,0.114693,0.013165,-0.006828,0.000000,0.0385353,,"{'C': 0.03853528593710531, 'class_weight': ''}",3,-0.001391,-0.000000,...,-0.024716,-0.000000,-0.000000,-0.000000,-0.002681,-0.000000,0.011301,0.000399,0.009117,0.000000


## Traing AND-rule model

In [420]:
svc_photo = SVC(kernel='linear')
svc_params_photo = {'C': np.logspace(-4, 4, 20),'class_weight':['balanced','']}
svc_grid_photo = GridSearchCV(svc_photo,svc_params_photo,cv = 5,n_jobs = -1,scoring=err,return_train_score = True,verbose=True)
svc_grid_photo.fit(scaled_train_data_photo_attack,train_labels_photo_attack)
pd.DataFrame(svc_grid_photo.cv_results_).sort_values(by='mean_test_score',ascending=False)

Fitting 5 folds for each of 40 candidates, totalling 200 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   14.2s
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:   18.9s
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:   19.2s finished


Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_C,param_class_weight,params,rank_test_score,split0_test_score,split0_train_score,...,split2_test_score,split2_train_score,split3_test_score,split3_train_score,split4_test_score,split4_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
4,0.39614,0.072806,-0.009721,-0.000349,0.000695193,balanced,"{'C': 0.0006951927961775605, 'class_weight': '...",1,-0.0,-0.0,...,-0.048593,-0.000581,-0.0,-0.0,-0.0,-0.0,0.026622,0.002959,0.019439,0.000465
5,0.363428,0.07121,-0.009721,-0.000233,0.000695193,,"{'C': 0.0006951927961775605, 'class_weight': ''}",1,-0.0,-0.0,...,-0.048593,-0.000581,-0.0,-0.0,-0.0,-0.0,0.012257,0.002492,0.019439,0.000285
6,0.209639,0.039295,-0.01117,-0.000233,0.00183298,balanced,"{'C': 0.0018329807108324356, 'class_weight': '...",3,-0.0,-0.0,...,-0.055838,-0.000581,-0.0,-0.0,-0.0,-0.0,0.008137,0.00257,0.022337,0.000285
7,0.207046,0.039495,-0.012124,-0.000233,0.00183298,,"{'C': 0.0018329807108324356, 'class_weight': ''}",4,-0.0,-0.0,...,-0.060606,-0.000581,-0.0,-0.0,-0.0,-0.0,0.00819,0.00257,0.024245,0.000285
3,0.621139,0.13145,-0.012541,-0.002665,0.000263665,,"{'C': 0.00026366508987303583, 'class_weight': ''}",5,-0.0,-0.002322,...,-0.053435,-0.001162,-0.00232,-0.003478,-0.006944,-0.000581,0.024244,0.006226,0.020607,0.00185
2,0.666019,0.137634,-0.014343,-0.003358,0.000263665,balanced,"{'C': 0.00026366508987303583, 'class_weight': ...",6,-0.0,-0.003478,...,-0.05102,-0.001742,-0.00232,-0.004056,-0.016055,-0.000581,0.026705,0.011579,0.019201,0.002173
8,0.135039,0.021143,-0.019422,-0.000116,0.00483293,balanced,"{'C': 0.004832930238571752, 'class_weight': 'b...",7,-0.0,-0.0,...,-0.097087,-0.0,-0.0,-0.0,-0.0,-0.0,0.010863,0.001323,0.038839,0.000233
9,0.131448,0.021343,-0.020831,-0.000116,0.00483293,,"{'C': 0.004832930238571752, 'class_weight': ''}",8,-0.0,-0.0,...,-0.101449,-0.0,-0.0,-0.0,-0.002681,-0.0,0.009981,0.001017,0.040329,0.000233
10,0.08597,0.012766,-0.024284,0.0,0.0127427,balanced,"{'C': 0.012742749857031334, 'class_weight': 'b...",9,-0.0,-0.0,...,-0.11639,-0.0,-0.00232,-0.0,-0.002681,-0.0,0.005997,0.001466,0.046074,0.0
11,0.087765,0.013564,-0.024284,0.0,0.0127427,,"{'C': 0.012742749857031334, 'class_weight': ''}",9,-0.0,-0.0,...,-0.11639,-0.0,-0.00232,-0.0,-0.002681,-0.0,0.003889,0.001621,0.046074,0.0


In [421]:
svc_video = SVC(kernel='linear')
svc_params_video = {'C': np.logspace(-4, 4, 20),'class_weight':['balanced','']}
svc_grid_video = GridSearchCV(svc,svc_params_video,cv = 5,n_jobs = -1,scoring=err,return_train_score = True,verbose=True)
svc_grid_video.fit(scaled_train_data_video_attack,train_labels_video_attack)
pd.DataFrame(svc_grid_video.cv_results_).sort_values(by='mean_test_score',ascending=False)

Fitting 5 folds for each of 40 candidates, totalling 200 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    5.5s
[Parallel(n_jobs=-1)]: Done 193 out of 200 | elapsed:    8.7s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:    8.8s finished


Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_C,param_class_weight,params,rank_test_score,split0_test_score,split0_train_score,...,split2_test_score,split2_train_score,split3_test_score,split3_train_score,split4_test_score,split4_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
0,0.539357,0.096941,0.0,0.0,0.0001,balanced,"{'C': 0.0001, 'class_weight': 'balanced'}",1,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.063965,0.009739,0.0,0.0
1,0.446008,0.120279,-0.000692,-0.00052,0.0001,,"{'C': 0.0001, 'class_weight': ''}",2,-0.0,-0.001733,...,-0.0,-0.000867,-0.0,-0.0,-0.0,-0.0,0.059041,0.026407,0.001384,0.000693
3,0.271075,0.058643,-0.002128,0.0,0.000263665,,"{'C': 0.00026366508987303583, 'class_weight': ''}",3,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.023647,0.002475,0.004255,0.0
2,0.297803,0.06383,-0.002653,0.0,0.000263665,balanced,"{'C': 0.00026366508987303583, 'class_weight': ...",4,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.045048,0.017297,0.005305,0.0
5,0.17513,0.03351,-0.017647,0.0,0.000695193,,"{'C': 0.0006951927961775605, 'class_weight': ''}",5,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.030315,0.00684,0.035294,0.0
4,0.165158,0.030917,-0.018978,0.0,0.000695193,balanced,"{'C': 0.0006951927961775605, 'class_weight': '...",6,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.026149,0.003026,0.037956,0.0
7,0.113696,0.015359,-0.022857,0.0,0.00183298,,"{'C': 0.0018329807108324356, 'class_weight': ''}",7,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.016044,0.002239,0.045714,0.0
6,0.095345,0.017952,-0.026168,0.0,0.00183298,balanced,"{'C': 0.0018329807108324356, 'class_weight': '...",8,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.015041,0.006308,0.052336,0.0
8,0.076796,0.01117,-0.026168,0.0,0.00483293,balanced,"{'C': 0.004832930238571752, 'class_weight': 'b...",8,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.015732,0.002129,0.052336,0.0
9,0.065824,0.008777,-0.026168,0.0,0.00483293,,"{'C': 0.004832930238571752, 'class_weight': ''}",8,-0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.010032,0.000399,0.052336,0.0


In [426]:
predicts = np.column_stack((svc_grid_video.predict(scaled_devel_all),svc_grid_photo.predict(scaled_devel_all)))
and_predict = []
for predict in predicts:
    if predict[0] == 0 and predict[1] == 0:
        and_predict.append(0)
    else:
        and_predict.append(1)

In [427]:
equal_error_rate(and_predict,devel_labels_all)

0.01466074414194096

Hypotesis: high false rejection rate

## Traing generalized model 

In [408]:
X_train_photo, X_val_photo, y_train_photo, y_val_photo = train_test_split(
    scaled_train_data_photo_attack, train_labels_photo_attack, test_size=0.26, random_state=42)

In [409]:
X_train_video, X_val_video, y_train_video, y_val_video = train_test_split(
    scaled_train_data_video_attack, train_labels_video_attack, test_size=0.26, random_state=42)

In [410]:
svc_lvl0 = SVC(kernel='linear',class_weight='balanced',C=1,probability=True)
svc_lvl0.fit(X_train_photo,y_train_photo)
svc_photo_pred_lvl0 = np.append(svc_lvl0.predict_proba(X_val_photo)[:,1],svc_lvl0.predict_proba(X_val_video)[:,1])
svc_photo_pred_lvl1 = svc_lvl0.predict_proba(scaled_devel_all)[:,1]

In [411]:
svc_lvl0 = SVC(kernel='linear',class_weight='balanced',C=1,probability=True)
svc_lvl0.fit(X_train_video,y_train_video)
svc_video_pred_lvl0 = np.append(svc_lvl0.predict_proba(X_val_photo)[:,1],svc_lvl0.predict_proba(X_val_video)[:,1])
svc_video_pred_lvl1 = svc_lvl0.predict_proba(scaled_devel_all)[:,1]

In [412]:
svc_pred_lvl0 = np.column_stack((svc_photo_pred_lvl0,svc_video_pred_lvl0))
svc_pred_lvl1 = np.column_stack((svc_photo_pred_lvl1,svc_video_pred_lvl1))
lvl0_labels = np.hstack((y_val_photo,y_val_video))


In [413]:
from sklearn.linear_model import SGDClassifier

In [414]:
reg_lvl1 =  SGDClassifier()
reg_lvl1.fit(svc_pred_lvl0,lvl0_labels)



SGDClassifier(alpha=0.0001, average=False, class_weight=None,
       early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
       l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=None,
       n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
       power_t=0.5, random_state=None, shuffle=True, tol=None,
       validation_fraction=0.1, verbose=0, warm_start=False)

In [415]:
equal_error_rate(reg_lvl1.predict(svc_pred_lvl1),devel_labels_all)

0.0030854193365493082