In [15]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC, LinearSVC
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
#from sklearn.experimental import enable_hist_gradient_boosting
#from sklearn.ensemble import HistGradientBoostingClassifier
from joblib import dump, load
from sklearn.naive_bayes import GaussianNB 
from VisionUtils import *
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import SelectKBest, chi2

In [16]:
feat1 = np.load("features/feat1.npy")
feat2 = np.load("features/feat2.npy")
labels = np.load("features/labels.npy")

In [17]:
f1 = [feat1[i1].reshape(-1,1) for i1 in range(len(feat1))]
f2 = [feat2[i2].reshape(-1,1) for i2 in range(len(feat2))]
cos_d = np.array([feat_distance_cosine_scalar(f1[i].T, f2[i]) for i in range(len(feat1))])
cos_d = cos_d.reshape(-1,1)
eucl_d = np.array([ np.linalg.norm(f1[i]- f2[i]) for i in range(len(feat1))]).reshape(-1,1)
sqr_diff = np.power(np.abs(feat1- feat2), 2)

In [18]:
data = np.hstack([cos_d, sqr_diff])
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)
f_selector = SelectKBest(chi2, k = int(0.8*(len(data.T))))
data_ = f_selector.fit_transform(scaled_data, labels)

In [19]:

xtrain, xtest, ytrain, ytest = train_test_split(data_, labels, shuffle = True, random_state = 42)

Gradient Boosting Classifier

In [20]:
from sklearn.ensemble import BaggingClassifier, VotingClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import SGDClassifier

params_grid = {
    "loss" : ["deviance", "exponential"],
    "n_estimators": [10, 50,100, 150, 200],
              }

In [21]:
gboost = GradientBoostingClassifier()
gs = GridSearchCV(gboost, param_grid = params_grid, cv = 5 )
gs.fit(xtrain, ytrain)
report(gs.cv_results_)


Model with rank: 1
Mean validation score: 0.882 (std: 0.010)
Parameters: {'loss': 'exponential', 'n_estimators': 150}

Model with rank: 2
Mean validation score: 0.881 (std: 0.010)
Parameters: {'loss': 'deviance', 'n_estimators': 200}

Model with rank: 3
Mean validation score: 0.881 (std: 0.011)
Parameters: {'loss': 'deviance', 'n_estimators': 100}

Model with rank: 3
Mean validation score: 0.881 (std: 0.009)
Parameters: {'loss': 'exponential', 'n_estimators': 200}



In [22]:
gboost = gs.best_estimator_

In [23]:
pred = gboost.predict(xtest)

In [24]:
from sklearn.metrics import accuracy_score, confusion_matrix, recall_score, precision_score, f1_score 
acc = accuracy_score(ytest,pred)
conf = confusion_matrix(ytest, pred)
rs = recall_score(ytest, pred)
ps = precision_score(ytest, pred)
f1 = f1_score(ytest, pred)
print("-The Accuracy of the the cllasifier:",acc)
print("-The recall Score: ", rs)
print("-The precision score: ", ps)
print("-The F1_score: ", f1)
print("-The confusion matrix:" )
print(conf)

-The Accuracy of the the cllasifier: 0.8796666666666667
-The recall Score:  0.851875808538163
-The precision score:  0.9089026915113871
-The F1_score:  0.879465776293823
-The confusion matrix:
[[1322  132]
 [ 229 1317]]


In [25]:
dump(gs, "demo/PipelineParts/gridsearch.joblib")
dump(gboost, "demo/PipelineParts/GboostModel.joblib")
dump(scaler, "demo/PipelineParts/scaler.joblib")
dump(f_selector, "demo/PipelineParts/feature_selector.joblib")

['demo/PipelineParts/feature_selector.joblib']