In [1]:
import xgboost as xgb
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV, KFold, RandomizedSearchCV
from sklearn.metrics import precision_score, recall_score, accuracy_score, cohen_kappa_score, confusion_matrix

import pandas as pd
import numpy as np
import cv2

import os, glob

from joblib import dump, load

In [None]:
def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█'):
    """
    Call in a loop to create terminal progress bar
    @params:
        iteration   - Required  : current iteration (Int)
        total       - Required  : total iterations (Int)
        prefix      - Optional  : prefix string (Str)
        suffix      - Optional  : suffix string (Str)
        decimals    - Optional  : positive number of decimals in percent complete (Int)
        length      - Optional  : character length of bar (Int)
        fill        - Optional  : bar fill character (Str)
    """
    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
    filledLength = int(length * iteration // total)
    bar = fill * filledLength + '-' * (length - filledLength)
    print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end = '\r')
    # Print New Line on Complete
    if iteration == total: 
        print()

In [2]:
class HOG:

    @staticmethod
    def extract(img):
        cell_size = (4, 4)
        block_size = (16, 16)
        nbins = 9
        hog = cv2.HOGDescriptor(_winSize=(img.shape[1] // cell_size[1] * cell_size[1],
                                    img.shape[0] // cell_size[0] * cell_size[0]),
                            _blockSize=(block_size[1] * cell_size[1],
                                        block_size[0] * cell_size[0]),
                            _blockStride=(cell_size[1], cell_size[0]),
                            _cellSize=(cell_size[1], cell_size[0]),
                            _nbins=nbins)
        h = hog.compute(img)
        h = h.T[0]
        return h

In [None]:
def report_best_scores(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results['rank_test_score'] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
                  results['mean_test_score'][candidate],
                  results['std_test_score'][candidate]))
            print("Parameters: {0}".format(results['params'][candidate]))
            print("")

In [None]:
class XGBoost_Classifier:
    
    def __init__(self, qtd_classes, best_scores):
        self.num_class = qtd_classes
        self.best_scores = best_scores
    
    def search_model(self, X, y, grid, steps):        
        xgb_model = xgb.XGBClassifier(objective='multi:softprob', random_state=42)
        
        search = RandomizedSearchCV(xgb_model, param_distributions=grid, random_state=42, n_iter=200, cv=3, verbose=1, n_jobs=1, return_train_score=True)

        search.fit(X, y)
        
        self.best_scores(search.cv_results_, 1)
    
    def train_single(self, X_train, Y_train, X_test, Y_test, params, steps):
        model = xgb.train(param, D_train, steps)
        preds = model.predict(D_test)
        preds = np.asarray([np.argmax(line) for line in preds])
        acc = accuracy_score(Y_test, preds)
        kpp = cohen_kappa_score(Y_test, preds)
        print("Accuracy = {}".format(acc))
        print("Precision = {}".format(precision_score(Y_test, preds, average='macro')))
        print("Recall = {}".format(recall_score(Y_test, preds, average='macro')))
        print("Kappa = {}".format(kpp))

In [3]:
class SVM_Classifier:
    
    def __init__(self, best_scores):
        self.best_scores = best_scores
    
    def search_model(self, X, y, grid):        
        svc = SVC(probability=True)
        
        search = RandomizedSearchCV(svc, param_distributions=grid, random_state=42, n_iter=2, cv=3, verbose=3, n_jobs=1, return_train_score=True)
        
        search.fit(X, y)
        
        self.best_scores(search.cv_results_, 1)
    
    def train_single(self, X_train, Y_train, X_test, Y_test, params):
        svc = SVC(probability=True)
        svc.set_params(params)
        
        model = svc.fit(X_train, Y_train)
        y_predicted = model.predict(X_test)
        
        acc = sk.metrics.accuracy_score(Y_test, y_predicted)
        prec = sk.metrics.precision_score(Y_test, y_predicted, average=None)[1]
        rec = sk.metrics.recall_score(Y_test, y_predicted, average=None)[1]
        kpp = sk.metrics.cohen_kappa_score(Y_test, y_predicted)
        print("Accuracy: {:.1%}".format(acc))
        print("Precision: {:.1%}".format(prec))
        print("Recall: {:.1%}".format(rec))
        print("Kappa: {:.1%}".format(kpp))
        

In [None]:
class KNN_Classifier:
    
    def __init__(self, best_scores):
        self.best_scores = best_scores
    
    def search_model(self, X, y, grid):        
        knn = KNeighborsClassifier()
        
        search = RandomizedSearchCV(knn, param_distributions=grid, random_state=42, n_iter=200, cv=3, verbose=1, n_jobs=1, return_train_score=True)
        
        search.fit(X, y)
        
        self.best_scores(search.cv_results_, 1)
    
    def train_single(self, X_train, Y_train, X_test, Y_test, params):
        knn = KNeighborsClassifier()
        knn.set_params(params)
        
        model = knn.fit(X_train, Y_train)
        y_predicted = model.predict(X_test)
        
        acc = sk.metrics.accuracy_score(Y_test, y_predicted)
        prec = sk.metrics.precision_score(Y_test, y_predicted, average=None)[1]
        rec = sk.metrics.recall_score(Y_test, y_predicted, average=None)[1]
        kpp = sk.metrics.cohen_kappa_score(Y_test, y_predicted)
        print("Accuracy: {:.1%}".format(acc))
        print("Precision: {:.1%}".format(prec))
        print("Recall: {:.1%}".format(rec))
        print("Kappa: {:.1%}".format(kpp))

In [None]:
class RandomForest_Classifier:
    
    def __init__(self, best_scores):
        self.best_scores = best_scores
    
    def search_model(self, X, y, grid):        
        rfc = RandomForestClassifier()
        
        search = RandomizedSearchCV(rfc, param_distributions=grid, random_state=42, n_iter=200, cv=3, verbose=1, n_jobs=1, return_train_score=True)
        
        search.fit(X, y)
        
        self.best_scores(search.cv_results_, 1)
    
    def train_single(self, X_train, Y_train, X_test, Y_test, params):
        rfc = RandomForestClassifier()
        rfc.set_params(params)
        
        model = rfc.fit(X_train, Y_train)
        y_predicted = model.predict(X_test)
        
        acc = sk.metrics.accuracy_score(Y_test, y_predicted)
        prec = sk.metrics.precision_score(Y_test, y_predicted, average=None)[1]
        rec = sk.metrics.recall_score(Y_test, y_predicted, average=None)[1]
        kpp = sk.metrics.cohen_kappa_score(Y_test, y_predicted)
        print("Accuracy: {:.1%}".format(acc))
        print("Precision: {:.1%}".format(prec))
        print("Recall: {:.1%}".format(rec))
        print("Kappa: {:.1%}".format(kpp))

In [None]:
#TODO: GausianNB

### Base

- apresenta 98 imagens corrompidas

In [4]:
BASE_PATH = "D:\\Carlos-NCA\\Documents\\Faculdade\\Visao\\dataset\\dataset_updated\\training_set\\"
labels = []

folders = os.listdir(BASE_PATH)
labels = folders

features = []
labels = []
error_images = []
max_size = 0
for f in folders:
    images = glob.glob(BASE_PATH + f + "\\*.*")    
    for img in images:
        image = cv2.imread(img)
        
        if image is not None:
            feature = HOG.extract(image)
            
            if feature.shape[0] > max_size:
                max_size = feature.shape[0]
                
#             features.append(feature)
#             labels.append(folders.index(f))
        else:
            error_images.append(img)

error: OpenCV(4.3.0) C:\projects\opencv-python\opencv\modules\core\src\matrix.cpp:235: error: (-215:Assertion failed) s >= 0 in function 'cv::setSize'


In [None]:
print(max_size)

In [None]:
features = np.array(n_features)
labels = np.array(labels)

In [None]:
print(features.shape)

In [None]:
SVM = SVM_Classifier(report_best_scores)

In [None]:
grid = {'C': [1, 2, 5, 10, 100, 1000], 'gamma': [0.5, 0.1, 0.01, 0.001, 0.0001], 'kernel': ['linear', 'rbf', 'sigmoid']}

In [None]:
SVM.search_model(features, labels, grid)