In [170]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC, LinearSVC
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
#from sklearn.experimental import enable_hist_gradient_boosting
#from sklearn.ensemble import HistGradientBoostingClassifier
from joblib import dump, load
from sklearn.naive_bayes import GaussianNB 
from VisionUtils import *
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.multiclass import OneVsRestClassifier
import pandas as pd
from mtcnn.mtcnn import MTCNN
from pathlib import Path
from sklearn.utils import shuffle
from tqdm import tqdm
#import autosklearn.classification

##### !curl https://raw.githubusercontent.com/automl/auto-sklearn/master/requirements.txt | xargs -n 1 -L 1 pip install

In [171]:
def trans(feat1, feat2):
    f1 = [feat1[i1].reshape(-1,1) for i1 in range(len(feat1))]
    f2 = [feat2[i2].reshape(-1,1) for i2 in range(len(feat2))]
    cos_d = np.array([feat_distance_cosine_scalar(f1[i].T, f2[i]) for i in range(len(feat1))])
    cos_d = cos_d.reshape(-1,1)
    sqr_diff = np.power(np.abs(feat1- feat2), 2)
    rat = feat1/feat2
    data = np.hstack([cos_d, sqr_diff, rat])
    return data

train_aug_feat1 = np.load("features/train_aug_feat12.npy")
train_aug_feat2 = np.load("features/train_aug_feat22.npy")
train_aug_labels = np.load("features/train_aug_labels2.npy")

train_parallel_feat1 = np.load("features/train_parallel_feat11.npy")
train_parallel_feat2 = np.load("features/train_parallel_feat21.npy")
train_parallel_labels = np.load("features/train_parallel_labels.npy")

train_spotlight_feat1 = np.load("features/train_spotlight_feat11.npy")
train_spotlight_feat2 = np.load("features/train_spotlight_feat21.npy")
train_spotlight_labels = np.load("features/train_spotlight_labels.npy")

In [172]:
train_feat1 = np.load("features2/train_feat1.npy")
train_feat2 = np.load("features2/train_feat2.npy")
train_labels = np.load("features2/train_labels.npy")

test_feat1 = np.load("features2/test_feat1.npy")
test_feat2 = np.load("features2/test_feat2.npy")
test_labels = np.load("features2/test_labels.npy")

In [173]:
labels = list(train_labels) + list(test_labels)

In [174]:
len(labels)

473600

In [175]:
xtrain = trans(train_feat1, train_feat2)
xtest = trans(test_feat1, test_feat2)

In [176]:
tr_len = len(xtrain)
tst_len = len(xtest)

In [177]:
data = np.vstack([xtrain, xtest])


In [178]:
train_feat1

array([[-1.4152974 , -0.4509149 , -0.15066573, ..., -1.0045165 ,
         0.26249442,  2.1781526 ],
       [-0.46679398, -0.01878167, -0.7963252 , ...,  0.59322894,
         0.25759584,  1.4256115 ],
       [-0.7544749 , -0.15702224, -0.6228995 , ..., -0.40557706,
         0.35926935,  0.22421257],
       ...,
       [-0.24145341, -0.64844954,  0.34552258, ..., -0.11110175,
         1.4846197 ,  0.5854943 ],
       [-1.2245702 , -1.246251  , -0.60789293, ...,  0.07093086,
         0.3926579 , -0.0261232 ],
       [-1.6080754 , -0.6366339 , -0.48899055, ..., -1.1670861 ,
         1.8307773 ,  0.98391616]], dtype=float32)

In [179]:
train_feat1.shape

(355200, 128)

In [180]:
train_feat2.shape

(355200, 128)

In [181]:
test_feat1.shape

(118400, 128)

In [182]:
test_feat2.shape

(118400, 128)

In [183]:
data.shape
#labels.shape

(473600, 257)

In [184]:
len(labels)

473600

In [190]:
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)
f_selector = SelectKBest(chi2, k = int(0.8*(len(data.T))))
data_ = f_selector.fit_transform(scaled_data, labels )

data_df = pd.DataFrame(data_)
image_paths_csv = pd.read_csv("features/image_paths.csv", index_col = False)

all_data_df = pd.concat([image_paths_csv,data_df], axis = 1)

len(all_data_df)

xtrain, xtest, ytrain, ytest = train_test_split(all_data_df, labels, shuffle = True, random_state = 42)

paths_dict = {
    "train_paths1": xtrain["path1"],
    "train_paths2": xtrain["path2"],
    "test_paths1": xtest["path1"],
    "test_paths2": xtest["path2"]
}
xtrain1 = xtrain.drop(["path1", "path2", "Unnamed: 0"], axis = 1).values
xtest1 = xtest.drop(["path1", "path2", "Unnamed: 0"], axis = 1).values

In [191]:
xtrain, xtest = data_[:tr_len], data_[-tst_len:]
ytrain, ytest = labels[:tr_len], labels[-tst_len:]
xtrain, ytrain = shuffle(xtrain, ytrain)

In [192]:
xtrain.shape

(355200, 205)

In [193]:
xtest.shape

(118400, 205)

Gradient Boosting Classifier

In [194]:
from sklearn.ensemble import BaggingClassifier, VotingClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import GridSearchCV


params_grid = {
    "loss" : ["deviance", "exponential"],
    "n_estimators": [10, 50,100, 150, 200],
              }
gboost = GradientBoostingClassifier()
gs = GridSearchCV(gboost, param_grid = params_grid, cv = 5 )
gs.fit(xtrain, ytrain)
report(gs.cv_results_)
gboost = gs.best_estimator_

In [195]:
gboost = GradientBoostingClassifier()

params_grid = {
    "loss" : ["deviance", "exponential"],
    "n_estimators": [10, 50,100, 150, 200],}

gs = GridSearchCV(gboost, param_grid = params_grid, cv = 5 )

In [196]:
gboost.fit(xtrain, ytrain)

GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=100,
              n_iter_no_change=None, presort='auto', random_state=None,
              subsample=1.0, tol=0.0001, validation_fraction=0.1,
              verbose=0, warm_start=False)

gboost = gs.best_estimator_

In [None]:
gboost.fit( xtrain, ytrain)

In [197]:
pred = gboost.predict(xtest)

In [199]:
from sklearn.metrics import accuracy_score, confusion_matrix, recall_score, precision_score, f1_score 
acc = accuracy_score(ytest,pred)
conf = confusion_matrix(ytest, pred)
rs = recall_score(ytest, pred)
ps = precision_score(ytest, pred)
f1 = f1_score(ytest, pred)
print("-The Accuracy of the the cllasifier:",acc)
print("-The recall Score: ", rs)
print("-The precision score: ", ps)
print("-The F1_score: ", f1)
print("-The confusion matrix:" )
print(conf)

-The Accuracy of the the cllasifier: 0.8774155405405405
-The recall Score:  0.8422031691272217
-The precision score:  0.9057379285259616
-The F1_score:  0.8728158572705446
-The confusion matrix:
[[54084  5183]
 [ 9331 49802]]


In [200]:
false_positive = []
false_positive_pred = []

false_negative = []
false_negative_pred = []

for i in range(len(pred)):
    ### False Positive check
    if ((pred[i]==1) & (ytest[i] ==0)):
        false_positive.append(i)
        false_positive_pred.append(pred[i])
    ### False Negative check
    elif ((pred[i] == 0) &(ytest[i]==1)):
        false_negative.append(i)
        false_negative_pred.append(pred[i])


In [201]:
print(len(false_positive))
print(len(false_negative))

5183
9331


False Positves

In [202]:
false_image_lst1 = list(np.array(paths_dict["test_paths1"])[false_positive])
false_image_lst2 = list(np.array(paths_dict["test_paths2"])[false_positive])

NameError: name 'paths_dict' is not defined

In [203]:
%matplotlib inline
import matplotlib.pyplot as plt 

def showfalse(idx, i):
    
    try:
        imgs = [ plt.imread("data/images/" + false_image_lst1[idx]), 
               plt.imread("data/images/" + false_image_lst2[idx])]
        pathls = ["#: {} data/images/".format(i) + false_image_lst1[idx], 
                  "#: {} data/images/".format(i) + false_image_lst2[idx]]
        for i in range(len(imgs)):
            bb = detect_faces(imgs[i], m)
            ax = show_img(imgs[i])
            
            for b in bb:
                write_txt(ax, (b[0], b[1]),pathls[i], 14)
                draw_bb(ax, b)
            write_txt(ax, (0, 0),"{}".format(i), 20)
        #print(50*"#")
    except FileNotFoundError:
        print(i)

In [204]:
dump(gboost, "demo/PipelineParts/GboostModel.joblib")
dump(scaler, "demo/PipelineParts/scaler.joblib")
dump(f_selector, "demo/PipelineParts/feature_selector.joblib")

['demo/PipelineParts/feature_selector.joblib']

In [91]:
gboost

GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=100,
              n_iter_no_change=None, presort='auto', random_state=None,
              subsample=1.0, tol=0.0001, validation_fraction=0.1,
              verbose=0, warm_start=False)

In [205]:
gboost = load("demo/PipelineParts/GboostModel.joblib")

In [206]:
pred = gboost.predict(xtest)

In [207]:
from sklearn.metrics import accuracy_score, confusion_matrix, recall_score, precision_score, f1_score 
acc = accuracy_score(ytest,pred)
conf = confusion_matrix(ytest, pred)
rs = recall_score(ytest, pred)
ps = precision_score(ytest, pred)
f1 = f1_score(ytest, pred)
print("-The Accuracy of the the cllasifier:",acc)
print("-The recall Score: ", rs)
print("-The precision score: ", ps)
print("-The F1_score: ", f1)
print("-The confusion matrix:" )
print(conf)

-The Accuracy of the the cllasifier: 0.8774155405405405
-The recall Score:  0.8422031691272217
-The precision score:  0.9057379285259616
-The F1_score:  0.8728158572705446
-The confusion matrix:
[[54084  5183]
 [ 9331 49802]]
