In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import pandas as pd
import metrics

from metrics import ClassificationMetrics
from handle_imbalance import Imbalance

from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn import linear_model, tree, ensemble, metrics
from skopt import BayesSearchCV
import xgboost as xgb
from xgboost import XGBClassifier
from catboost import CatBoostClassifier, Pool, cv

from skimage.feature import greycomatrix, greycoprops
from tqdm import tqdm
import functools
import time
import warnings
import numpy as np

In [2]:
df = pd.read_csv('features(binary_classify)(RGB).csv')

names = df['name'].values
X = df.iloc[:, 1:5].values
y = df['label'].values

X_resampled, y_resampled = Imbalance()('repeated_edited_nearest_neighbours', X, y)

X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.25,
                                                    random_state=42)

print(f'Using repeated_edited_nearest_neighbours')

Using repeated_edited_nearest_neighbours


In [3]:
X_resampled.shape, y_resampled.shape

((5025, 4), (5025,))

In [5]:
bayes_cv_tuner = BayesSearchCV(
    estimator = xgb.XGBClassifier(
        n_jobs = 1,
        objective = 'binary:logistic',
        eval_metric = 'auc',
        silent=1,
        tree_method='approx',
        n_estimators=200
    ),
    search_spaces = {
        'learning_rate': (0.01, 1.0, 'log-uniform'),
        'max_depth': (1, 50),
        'n_estimators': (50, 100),
    },    
    scoring = 'roc_auc',
    cv = StratifiedKFold(
        n_splits=3,
        shuffle=True,
        random_state=42
    ),
    n_jobs = 3,
    n_iter = 20,   
    verbose = 1,
    refit = True,
    random_state = 42
)

def status_print(optim_result):
    """Status callback durring bayesian hyperparameter search"""
    
    # Get all the models tested so far in DataFrame format
    all_models = pd.DataFrame(bayes_cv_tuner.cv_results_)    
    
    # Get current parameters and the best parameters    
    best_params = pd.Series(bayes_cv_tuner.best_params_)
    print('Model #{}\nBest ROC-AUC: {}\nBest params: {}\n'.format(
        len(all_models),
        np.round(bayes_cv_tuner.best_score_, 4),
        bayes_cv_tuner.best_params_
    ))
    
    # Save all model results
    clf_name = bayes_cv_tuner.estimator.__class__.__name__
    all_models.to_csv(clf_name+"_cv_results.csv")
#

In [6]:
result = bayes_cv_tuner.fit(X_train, y_train, callback=status_print)


Fitting 3 folds for each of 1 candidates, totalling 3 fits
[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.
[Parallel(n_jobs=3)]: Done   3 out of   3 | elapsed:    2.3s finished
Model #1
Best ROC-AUC: 0.921
Best params: OrderedDict([('learning_rate', 0.06610098295419149), ('max_depth', 37), ('n_estimators', 97)])

Fitting 3 folds for each of 1 candidates, totalling 3 fits
[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.
[Parallel(n_jobs=3)]: Done   3 out of   3 | elapsed:    0.3s finished
[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.
Model #2
Best ROC-AUC: 0.921
Best params: OrderedDict([('learning_rate', 0.06610098295419149), ('max_depth', 37), ('n_estimators', 97)])

Fitting 3 folds for each of 1 candidates, totalling 3 fits
[Parallel(n_jobs=3)]: Done   3 out of   3 | elapsed:    0.3s finished
[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.
Model #3
Best ROC-AUC: 0.921
Best par

In [8]:
result.score(X_test, y_test)

0.9285341772151898

In [10]:
metric = ClassificationMetrics()
'accuracy: ', metric('accuracy', y_test, result.predict(X_test))
'precision', metric('precision', y_test, result.predict(X_test))


('precision', 0.8733552631578947)