- **Conectando com Google Drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# **1 - Carrega bibliotecas**

In [None]:
#!pip install scikit-learn --upgrade                                              # caso seja necessário atualizar a versão
import sklearn
sklearn.__version__

In [None]:
from skimage import io, color                                                    #Módulo para manipulação de imagens
from os import listdir
import os, glob                                                                  #Módulos para manipular estruturas de diretório
from os.path import isfile, join
import numpy as np                                                               #Módulo para a manipulação de arrays
import pandas as pd                                                              #Módulo para a manipulação de dataframes
import matplotlib.pyplot as plt                                                  #Módulo para a manipulação de gráficos
import random
import pickle
from scipy.stats import uniform, randint
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from numpy import arange
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import  StandardScaler                                # Normalização Min-Max e Padronização
from sklearn.preprocessing import LabelEncoder
import cv2                                                                       #Módulo para manipulação de imagens
from sklearn.model_selection import train_test_split                             #Módulo para divisão de dados em treino e teste
from sklearn.svm import SVC                                                      #Módulo para a aplicação do SVM
from sklearn.neural_network import MLPClassifier                                 #Módulo para a aplicação do MLP
from sklearn. model_selection import StratifiedGroupKFold                        #Módulo para a aplicação do K-Fold estratificado
from sklearn.metrics import confusion_matrix, classification_report              #Módulo para a aplicação da matriz de confusão
from sklearn.metrics import ConfusionMatrixDisplay                               #Módulo para a aplicação da matriz de confusão
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score  #Métricas
from sklearn.model_selection import RandomizedSearchCV                           #Módulo para a aplicação do Random Search
from sklearn.metrics import make_scorer                                          #Módulo para a aplicação do Grid Search
import joblib                                                                    #Módulo para salvar o modelo

# **2 - Modelos de Classificação**

* Para a classificação, 4 algoritmos de Machine Learning serão empregados: **Suport Vector Machine - SVM**, **Random Forest - RF**, **Artificial Neural Networks - ANN** e **Linear Discriminant Analysis - LDA**. A implementação será realizada por meio da biblioteca Scikit-Learn que possui módulos específicos para cada algoritmo.

## **2.1 - Importando os conjunto de dados (Características extraídas)**



In [None]:
# ================================================================================
# Importando todos os conjunto de dados - treino e teste
# ================================================================================

# Dados de treinamento
data_train = pd.concat((pd.read_csv(f, sep=',', encoding='utf-8') for f in glob.glob("./Output/Features_LBP/train/*.csv")), axis = 1)
data_train = data_train.loc[:, ~data_train.columns.duplicated()]                                                         # Elimina as colunas duplicadas
data_train = data_train[ ['Class', "Sample"] + [ col for col in data_train.columns if col not in ['Class', "Sample"] ]]  # Reordena as colunas
print(data_train.shape)
display(data_train)

# Dados de test
data_test = pd.concat((pd.read_csv(f, sep=',', encoding='utf-8') for f in glob.glob("./Output/Features_LBP/test/*.csv")), axis = 1)
data_test = data_test.loc[:, ~data_test.columns.duplicated()]                                                         # Elimina as colunas duplicadas
data_test = data_test[ ['Class', "Sample"] + [ col for col in data_test.columns if col not in ['Class', "Sample"] ]]  # Reordena as colunas
print(data_test.shape)
#display(data_test)


In [None]:
# ================================================================================
# Informações sobre o conjunto de dados
# ================================================================================
#train
print(f'Número de variáveis dos dados: {data_train.shape[1]}')
print(f'Número de amostras: {data_train.shape[0]}')
print(f'Número de espécies: {data_train["Class"].nunique()}')
print('---'*12)
print(f'Número de imagens por: {data_train.groupby("Class").size()}')

## **2.2 - Divisão de variáveis**



In [None]:
# ================================================================================
# Divisão de variáveis
# ================================================================================
#Train
X_train = data_train.filter(regex = '_')                                          # Separando os atributos
y_train = data_train.filter(items = [['Class', 'Sample']])                        # Separando as classes                                                  # Separando as amostras

#Test
X_test = data_test.filter(regex = '_')                                            # Separando os atributos
y_test = data_test.filter(items = ['Class', "Sample"])                            # Separando as classes

print("x_train shape:", X_train.shape, "y_train shape:", y_train.shape)
print("x_test shape:", X_test.shape, "y_test shape:", y_test.shape)
display(X_test)

## **2.3 - Normalização Z-score**

Deve-se normalizar/padronizar os dados para evitar o **'vazamento de dados'**, pois a normalização daria ao modelo informações adicionais sobre o conjunto de teste se normalizássemos todos os dados de uma vez.

In [None]:
# ================================================================================
# Normalização Z-score
# ================================================================================

scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns = X_train.columns)
X_test = pd.DataFrame(scaler.transform(X_test), columns = X_test.columns)
#display(X_train)
display(X_test)



## **2.4 - Treinando os classificadores**

### **2.4.1 - Separa as features extraídas por cada operador**

In [None]:
# ================================================================================
# Separa as features extraídas por cada operador (LBP)- Conjunto de Treino
# ================================================================================

y_train["Class"]

# Features: Local Binary Pattern
X_train_u = X_train.filter(like = 'uni')            # LBP Uniforme (3 combinações) (54 features)
X_train_u81 = X_train.filter(regex = 'uni81')       # LBP Uniforme (P = 8, R = 1)  (10 features)
X_train_u162 = X_train.filter(regex = 'uni162')     # LBP Uniforme (P = 16, R = 2) (18 features)
X_train_u243 = X_train.filter(regex = 'uni243')     # LBP Uniforme (P = 24, R = 3) (26 features)

X_train_nri = X_train.filter(like = 'nri')           # LBP NRI (3 combinações) (857 features)
X_train_nri81 = X_train.filter(regex = 'nri81')      # LBP NRI (P = 8, R = 1)  (59 features)
X_train_nri162 = X_train.filter(regex = 'nri162')    # LBP NRI (P = 16, R = 2) (243 features)
X_train_nri243 = X_train.filter(regex = 'nri243')    # LBP NRI (P = 24, R = 3) (555 features)

X_train_unri = X_train.filter(regex = r'uni|nri')     # LBP Uniforme e LBP NRI (857 + 54 = 911)

In [None]:
# ================================================================================
# Separa as features extraídas por cada operador (LBP) - Conjunto de Teste
# ================================================================================

y_test["Class"]

# Features: Local Binary Pattern
X_test_u = X_test.filter(like = 'uni')            # LBP Uniforme (3 combinações) (54 features)
X_test_u81 = X_test.filter(regex = 'uni81')       # LBP Uniforme (P = 8, R = 1)  (10 features)
X_test_u162 = X_test.filter(regex = 'uni162')     # LBP Uniforme (P = 16, R = 2) (18 features)
X_test_u243 = X_test.filter(regex = 'uni243')     # LBP Uniforme (P = 24, R = 3) (26 features)

X_test_nri = X_test.filter(like = 'nri')           # LBP NRI (3 combinações) (857 features)
X_test_nri81 = X_test.filter(regex = 'nri81')      # LBP NRI (P = 8, R = 1)  (59 features)
X_test_nri162 = X_test.filter(regex = 'nri162')    # LBP NRI (P = 16, R = 2) (243 features)
X_test_nri243 = X_test.filter(regex = 'nri243')    # LBP NRI (P = 24, R = 3) (555 features)
X_test_unri = X_test.filter(regex = r'uni|nri') # LBP Uniforme e LBP NRI (857 + 54 = 911)


### **2.4.2 - Configuraçoes de treinamento** (Todos os modelos)

In [None]:
# ================================================================================
# Configurações do treinamento
# ================================================================================

# Define Stratified Group 10-fold cross-validation
cv = StratifiedGroupKFold(n_splits = 5)

# Define as métricas de desempenho
metrics = {'accuracy' : make_scorer(accuracy_score),
           'precision' : make_scorer(precision_score, average = 'weighted'),
           'recall' : make_scorer(recall_score, average = 'weighted'),
           'f1_score' : make_scorer(f1_score, average = 'weighted')}

# Extrair matrizes de confusão da validação cruzada
def cm_cv(search, X, Y, Z, name = ''):

  for i, (train_index, test_index) in enumerate(cv.split(X, Y, Z)):
      X_train, X_test = X.iloc[train_index], X.iloc[test_index]
      y_train, y_test = Y.iloc[train_index], Y.iloc[test_index]
      z_train, z_test = Z.iloc[train_index], Z.iloc[test_index]

      # Treinando o melhor modelo encontrado pelo GridSearchCV
      best_model = search.best_estimator_
      best_model.fit(X_train, y_train)

      # Predizendo classes no conjunto de teste
      y_pred = best_model.predict(X_test)

      # Calculando matriz de confusão
      cm = confusion_matrix(y_test, y_pred)

      # Imprimindo matriz de confusão
      #print(f"Matriz de confusão para a fold {i+1}:")

      # Calcula estatística
      ac_w = accuracy_score(y_true = y_test, y_pred = y_pred)
      p_w = precision_score(y_true = y_test, y_pred = y_pred, average='weighted')
      r_w = recall_score(y_true = y_test, y_pred = y_pred, average='weighted')
      f1_w = f1_score(y_true = y_test, y_pred = y_pred, average='weighted')

      # Plotar matriz de confusão utilizando ConfusionMatrixDisplay
      disp = ConfusionMatrixDisplay(confusion_matrix= cm, display_labels=np.unique(Y))
      disp.plot(cmap = 'Blues', xticks_rotation = 90, colorbar = False)   # Greys, Purples, Blues, Greens, BuGn, GnBu
      disp.ax_.set_title(f'CM-fold {i+1}\n (Accuracy: {ac_w:.4f}, Precision: {p_w:.4f}, Recall: {r_w:.4f}, F1-score: {f1_w:.4f})',
                         fontsize=10)
      disp.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
      disp.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
      plt.yticks(style='italic')
      plt.xticks(style='italic')
      disp.ax_.xaxis.set_tick_params(labelsize=10)
      disp.ax_.yaxis.set_tick_params(labelsize=10)
      plt.savefig(f'./Output/CM_LBP/cm-fold_{i+1}_{name}', dpi = 800, bbox_inches='tight')
      plt.show()
  return

### **2.4.3 - Suporte Vector Classifier - SVC**

In [None]:
# ================================================================================
# Definições do estimador SVC
# ================================================================================
# SVC().get_params()

# Define o modelo
model_svc = SVC(decision_function_shape = 'ovo', probability = True)

#### **Modelo C1**: Usando features LBP Uniforme (P = 8, R = 1)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {
    'C': uniform(loc = 0.1, scale = 10).rvs(size = 20, random_state = 10),             # Uniform distribution between 0.1 and 10
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'degree': [2, 3, 4],
    'gamma': ['scale', 'auto'] + list(np.logspace(-9, 3, 13))
}

# Create the RandomizedSearchCV object
rs_u81_svc = RandomizedSearchCV(estimator = model_svc, param_distributions = param_grid,
                                 scoring = metrics, cv = cv, refit = 'accuracy',
                                 verbose = 3, return_train_score = False, n_iter = 20)

rs_u81_svc.fit(X_train_u81, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_u81_svc, open(f'./Output/Models_LBP/SVC/rs_u81_svc.pickle', "wb"))
joblib.dump(rs_u81_svc, f'./Output/Models_LBP/SVC/rs_u81_svc.joblib')

print(f'Os melhores parâmetros foram: {rs_u81_svc.best_params_}')
print(f'A melhor acurácia foi de: {rs_u81_svc.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_u81_svc.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))               # carrega o modelo da pasta
res1_rs_u81_svc = pd.DataFrame(rs_u81_svc.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_u81_svc = res1_rs_u81_svc.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_u81_svc = accf1_rs_u81_svc.filter(regex = 'accuracy').min()
max_acc_u81_svc = accf1_rs_u81_svc.filter(regex = 'accuracy').max()
min_f1_u81_svc = accf1_rs_u81_svc.filter(regex = 'f1').min()
max_f1_u81_svc = accf1_rs_u81_svc.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_u81_svc:.6f}')
print(f'Acurácia max: {max_acc_u81_svc:.6f}')
print(f'F1 min: {min_f1_u81_svc:.6f}')
print(f'F1 max: {max_f1_u81_svc:.6f}')

res2_rs_u81_svc = res1_rs_u81_svc.filter(regex = r'(mean|std|params)')
display(res2_rs_u81_svc.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada - 5 folds
# ================================================================================
cm_cv(rs_u81_svc, X_train_u81, y_train['Class'], y_train['Sample'], name = 'rs_u81_svc')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste (n = 566)
# ================================================================================
y_pred_rs_u81_svc = rs_u81_svc.predict(X_test_u81)
accuracy_rs_u81_svc = accuracy_score(y_test["Class"], y_pred_rs_u81_svc)
f1_rs_u81_svc = f1_score(y_test["Class"], y_pred_rs_u81_svc, average = 'weighted')
print(f"Best SVC Accuracy: {accuracy_rs_u81_svc:.6f}")
print(f"Best SVC F1: {f1_rs_u81_svc:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================

cm_rs_u81_svc = confusion_matrix(y_test['Class'], y_pred_rs_u81_svc)
disp_rs_u81_svc = ConfusionMatrixDisplay(confusion_matrix = cm_rs_u81_svc, display_labels = rs_u81_svc.classes_)
disp_rs_u81_svc.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_u81_svc.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u81_svc.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u81_svc.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_u81_svc.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM_LBP/cm_teste_u81_svc', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_u81_svc, target_names= rs_u81_svc.classes_))

#### **Modelo C2**: Usando features LBP Uniforme (P = 16, R = 2)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {
    'C': uniform(loc = 0.1, scale = 10).rvs(size = 20, random_state = 10),             # Uniform distribution between 0.1 and 10
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'degree': [2, 3, 4],
    'gamma': ['scale', 'auto'] + list(np.logspace(-9, 3, 13))
}

# Create the RandomizedSearchCV object
rs_u162_svc = RandomizedSearchCV(estimator = model_svc, param_distributions = param_grid,
                                 scoring = metrics, cv = cv, refit = 'accuracy',
                                 verbose = 3, return_train_score = False, n_iter = 20)

rs_u162_svc.fit(X_train_u162, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_u162_svc, open(f'./Output/Models_LBP/SVC/rs_u162_svc.pickle', "wb"))
joblib.dump(rs_u162_svc, f'./Output/Models_LBP/SVC/rs_u162_svc.joblib')


print(f'Os melhores parâmetros foram: {rs_u162_svc.best_params_}')
print(f'A melhor acurácia foi de: {rs_u162_svc.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_u162_svc.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_u162_svc = pd.DataFrame(rs_u162_svc.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_u162_svc = res1_rs_u162_svc.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_u162_svc = accf1_rs_u162_svc.filter(regex = 'accuracy').min()
max_acc_u162_svc = accf1_rs_u162_svc.filter(regex = 'accuracy').max()
min_f1_u162_svc = accf1_rs_u162_svc.filter(regex = 'f1').min()
max_f1_u162_svc = accf1_rs_u162_svc.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_u162_svc:.6f}')
print(f'Acurácia max: {max_acc_u162_svc:.6f}')
print(f'F1 min: {min_f1_u162_svc:.6f}')
print(f'F1 max: {max_f1_u162_svc:.6f}')

res2_rs_u162_svc = res1_rs_u162_svc.filter(regex = r'(mean|std|params)')
display(res2_rs_u162_svc.iloc[0])
#display(res2_rs_svc[:1])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_u162_svc, X_train_u162, y_train['Class'], y_train['Sample'], name = 'rs_u162_svc')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste (n = 566)
# ================================================================================
y_pred_rs_u162_svc = rs_u162_svc.predict(X_test_u162)
accuracy_rs_u162_svc = accuracy_score(y_test["Class"], y_pred_rs_u162_svc)
f1_rs_u162_svc = f1_score(y_test["Class"], y_pred_rs_u162_svc, average = 'weighted')
print(f"Best SVC Accuracy: {accuracy_rs_u162_svc:.6f}")
print(f"Best SVC F1: {f1_rs_u162_svc:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================

cm_rs_u162_svc = confusion_matrix(y_test['Class'], y_pred_rs_u162_svc)
disp_rs_u162_svc = ConfusionMatrixDisplay(confusion_matrix = cm_rs_u162_svc, display_labels = rs_u162_svc.classes_)
disp_rs_u162_svc.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_u162_svc.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u162_svc.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u162_svc.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_u162_svc.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM_LBP/cm_teste_u162_svc', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_u162_svc, target_names= rs_u162_svc.classes_))

#### **Modelo C3**: Usando features LBP Uniforme (P = 24, R = 3)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {
    'C': uniform(loc = 0.1, scale = 10).rvs(size = 20, random_state = 10),             # Uniform distribution between 0.1 and 10
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'degree': [2, 3, 4],
    'gamma': ['scale', 'auto'] + list(np.logspace(-9, 3, 13))
}

# Create the RandomizedSearchCV object
rs_u243_svc = RandomizedSearchCV(estimator = model_svc, param_distributions = param_grid,
                                 scoring = metrics, cv = cv, refit = 'accuracy',
                                 verbose = 3, return_train_score = False, n_iter = 20)

rs_u243_svc.fit(X_train_u243, y_train['Class'], groups = y_train['Sample'])

pickle.dump(rs_u243_svc, open(f'./Output/Models_LBP/SVC/rs_u243_svc.pickle', "wb"))
joblib.dump(rs_u243_svc, f'./Output/Models_LBP/SVC/rs_u243_svc.joblib')

print(f'Os melhores parâmetros foram: {rs_u243_svc.best_params_}')
print(f'A melhor acurácia foi de: {rs_u243_svc.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_u243_svc.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_u243_svc = pd.DataFrame(rs_u243_svc.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_u243_svc = res1_rs_u243_svc.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_u243_svc = accf1_rs_u243_svc.filter(regex = 'accuracy').min()
max_acc_u243_svc = accf1_rs_u243_svc.filter(regex = 'accuracy').max()
min_f1_u243_svc = accf1_rs_u243_svc.filter(regex = 'f1').min()
max_f1_u243_svc = accf1_rs_u243_svc.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_u243_svc:.6f}')
print(f'Acurácia max: {max_acc_u243_svc:.6f}')
print(f'F1 min: {min_f1_u243_svc:.6f}')
print(f'F1 max: {max_f1_u243_svc:.6f}')

res2_rs_u243_svc = res1_rs_u243_svc.filter(regex = r'(mean|std|params)')
display(res2_rs_u243_svc.iloc[0])
#display(res2_rs_svc[:1])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_u243_svc, X_train_u243, y_train['Class'], y_train['Sample'], name = 'rs_u243_svc')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste (n = 566)
# ================================================================================
y_pred_rs_u243_svc = rs_u243_svc.predict(X_test_u243)
accuracy_rs_u243_svc = accuracy_score(y_test["Class"], y_pred_rs_u243_svc)
f1_rs_u243_svc = f1_score(y_test["Class"], y_pred_rs_u243_svc, average = 'weighted')
print(f"Best SVC Accuracy: {accuracy_rs_u243_svc:.6f}")
print(f"Best SVC F1: {f1_rs_u243_svc:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================

cm_rs_u243_svc = confusion_matrix(y_test['Class'], y_pred_rs_u243_svc)
disp_rs_u243_svc = ConfusionMatrixDisplay(confusion_matrix = cm_rs_u243_svc, display_labels = rs_u243_svc.classes_)
disp_rs_u243_svc.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_u243_svc.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u243_svc.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u243_svc.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_u243_svc.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM_LBP/cm_teste_u243_svc', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_u243_svc, target_names= rs_u243_svc.classes_))

#### **Modelo C4**: Usando features LBP Uniforme (Todos)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {
    'C': uniform(loc = 0.1, scale = 10).rvs(size = 20, random_state = 10),             # Uniform distribution between 0.1 and 10
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'degree': [2, 3, 4],
    'gamma': ['scale', 'auto'] + list(np.logspace(-9, 3, 13))
}

# Create the RandomizedSearchCV object
rs_uni_svc = RandomizedSearchCV(estimator = model_svc, param_distributions = param_grid,
                                 scoring = metrics, cv = cv, refit = 'accuracy',
                                 verbose = 3, return_train_score = False, n_iter = 20)

rs_uni_svc.fit(X_train_u, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_uni_svc, open(f'./Output/Models_LBP/SVC/rs_uni_svc.pickle', "wb"))
joblib.dump(rs_uni_svc, f'./Output/Models_LBP/SVC/rs_uni_svc.joblib')

print(f'Os melhores parâmetros foram: {rs_uni_svc.best_params_}')
print(f'A melhor acurácia foi de: {rs_uni_svc.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_uni_svc.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_uni_svc = pd.DataFrame(rs_uni_svc.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_uni_svc = res1_rs_uni_svc.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_uni_svc = accf1_rs_uni_svc.filter(regex = 'accuracy').min()
max_acc_uni_svc = accf1_rs_uni_svc.filter(regex = 'accuracy').max()
min_f1_uni_svc = accf1_rs_uni_svc.filter(regex = 'f1').min()
max_f1_uni_svc = accf1_rs_uni_svc.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_uni_svc:.6f}')
print(f'Acurácia max: {max_acc_uni_svc:.6f}')
print(f'F1 min: {min_f1_uni_svc:.6f}')
print(f'F1 max: {max_f1_uni_svc:.6f}')

res2_rs_uni_svc = res1_rs_uni_svc.filter(regex = r'(mean|std|params)')
display(res2_rs_uni_svc.iloc[0])
#display(res2_rs_svc[:1])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_uni_svc, X_train_u, y_train['Class'], y_train['Sample'], name = 'rs_uni_svc')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste (n = 566)
# ================================================================================
y_pred_rs_uni_svc = rs_uni_svc.predict(X_test_u)
accuracy_rs_uni_svc = accuracy_score(y_test["Class"], y_pred_rs_uni_svc)
f1_rs_uni_svc = f1_score(y_test["Class"], y_pred_rs_uni_svc, average = 'weighted')
print(f"Best SVC Accuracy: {accuracy_rs_uni_svc:.6f}")
print(f"Best SVC F1: {f1_rs_uni_svc:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================

cm_rs_uni_svc = confusion_matrix(y_test['Class'], y_pred_rs_uni_svc)
disp_rs_uni_svc = ConfusionMatrixDisplay(confusion_matrix = cm_rs_uni_svc, display_labels = rs_uni_svc.classes_)
disp_rs_uni_svc.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_uni_svc.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_uni_svc.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_uni_svc.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_uni_svc.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM_LBP/cm_teste_uni_svc', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_uni_svc, target_names= rs_uni_svc.classes_))

#### **Modelo C5**: Usando features LBP Uniforme Não Invariante (P = 8, R = 1)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {
    'C': uniform(loc = 0.1, scale = 10).rvs(size = 20, random_state = 10),             # Uniform distribution between 0.1 and 10
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'degree': [2, 3, 4],
    'gamma': ['scale', 'auto'] + list(np.logspace(-9, 3, 13))
}

# Create the RandomizedSearchCV object
rs_nri81_svc = RandomizedSearchCV(estimator = model_svc, param_distributions = param_grid,
                                 scoring = metrics, cv = cv, refit = 'accuracy',
                                 verbose = 3, return_train_score = False, n_iter = 20)

rs_nri81_svc.fit(X_train_nri81, y_train['Class'], groups = y_train['Sample'])

pickle.dump(rs_nri81_svc, open(f'./Output/Models_LBP/SVC/rs_nri81_svc.pickle', "wb"))
joblib.dump(rs_nri81_svc, f'./Output/Models_LBP/SVC/rs_nri81_svc.joblib')

print(f'Os melhores parâmetros foram: {rs_nri81_svc.best_params_}')
print(f'A melhor acurácia foi de: {rs_nri81_svc.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_nri81_svc.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_nri81_svc = pd.DataFrame(rs_nri81_svc.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_nri81_svc = res1_rs_nri81_svc.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_nri81_svc = accf1_rs_nri81_svc.filter(regex = 'accuracy').min()
max_acc_nri81_svc = accf1_rs_nri81_svc.filter(regex = 'accuracy').max()
min_f1_nri81_svc = accf1_rs_nri81_svc.filter(regex = 'f1').min()
max_f1_nri81_svc = accf1_rs_nri81_svc.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_nri81_svc:.6f}')
print(f'Acurácia max: {max_acc_nri81_svc:.6f}')
print(f'F1 min: {min_f1_nri81_svc:.6f}')
print(f'F1 max: {max_f1_nri81_svc:.6f}')

res2_rs_nri81_svc = res1_rs_nri81_svc.filter(regex = r'(mean|std|params)')
display(res2_rs_nri81_svc.iloc[0])
#display(res2_rs_svc[:1])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_nri81_svc, X_train_nri81, y_train['Class'], y_train['Sample'], name = 'rs_nri81_svc')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste (n = 561)
# ================================================================================
y_pred_rs_nri81_svc = rs_nri81_svc.predict(X_test_nri81)
accuracy_rs_nri81_svc = accuracy_score(y_test["Class"], y_pred_rs_nri81_svc)
f1_rs_nri81_svc = f1_score(y_test["Class"], y_pred_rs_nri81_svc, average = 'weighted')
print(f"Best SVC Accuracy: {accuracy_rs_nri81_svc:.6f}")
print(f"Best SVC F1: {f1_rs_nri81_svc:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 561)
# ================================================================================

cm_rs_nri81_svc = confusion_matrix(y_test['Class'], y_pred_rs_nri81_svc)
disp_rs_nri81_svc = ConfusionMatrixDisplay(confusion_matrix = cm_rs_nri81_svc, display_labels = rs_nri81_svc.classes_)
disp_rs_nri81_svc.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_nri81_svc.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri81_svc.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri81_svc.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_nri81_svc.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM_LBP/cm_teste_nri81_svc', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_nri81_svc, target_names= rs_nri81_svc.classes_))

#### **Modelo C6**: Usando features LBP Uniforme Não Invariante (P = 16, R = 2)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {
    'C': uniform(loc = 0.1, scale = 10).rvs(size = 20, random_state = 10),             # Uniform distribution between 0.1 and 10
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'degree': [2, 3, 4],
    'gamma': ['scale', 'auto'] + list(np.logspace(-9, 3, 13))
}

# Create the RandomizedSearchCV object
rs_nri162_svc = RandomizedSearchCV(estimator = model_svc, param_distributions = param_grid,
                                 scoring = metrics, cv = cv, refit = 'accuracy',
                                 verbose = 3, return_train_score = False, n_iter = 20)

rs_nri162_svc.fit(X_train_nri162, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_nri162_svc, open(f'./Output/Models_LBP/SVC/rs_nri162_svc.pickle', "wb"))
joblib.dump(rs_nri162_svc, f'./Output/Models_LBP/SVC/rs_nri162_svc.joblib')

print(f'Os melhores parâmetros foram: {rs_nri162_svc.best_params_}')
print(f'A melhor acurácia foi de: {rs_nri162_svc.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_nri162_svc.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_nri162_svc = pd.DataFrame(rs_nri162_svc.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_nri162_svc = res1_rs_nri162_svc.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_nri162_svc = accf1_rs_nri162_svc.filter(regex = 'accuracy').min()
max_acc_nri162_svc = accf1_rs_nri162_svc.filter(regex = 'accuracy').max()
min_f1_nri162_svc = accf1_rs_nri162_svc.filter(regex = 'f1').min()
max_f1_nri162_svc = accf1_rs_nri162_svc.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_nri162_svc:.6f}')
print(f'Acurácia max: {max_acc_nri162_svc:.6f}')
print(f'F1 min: {min_f1_nri162_svc:.6f}')
print(f'F1 max: {max_f1_nri162_svc:.6f}')

res2_rs_nri162_svc = res1_rs_nri162_svc.filter(regex = r'(mean|std|params)')
display(res2_rs_nri162_svc.iloc[0])
#display(res2_rs_svc[:1])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_nri162_svc, X_train_nri162, y_train['Class'], y_train['Sample'], name = 'rs_nri162_svc')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste (n = 566)
# ================================================================================
y_pred_rs_nri162_svc = rs_nri162_svc.predict(X_test_nri162)
accuracy_rs_nri162_svc = accuracy_score(y_test["Class"], y_pred_rs_nri162_svc)
f1_rs_nri162_svc = f1_score(y_test["Class"], y_pred_rs_nri162_svc, average = 'weighted')
print(f"Best SVC Accuracy: {accuracy_rs_nri162_svc:.6f}")
print(f"Best SVC F1: {f1_rs_nri162_svc:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================

cm_rs_nri162_svc = confusion_matrix(y_test['Class'], y_pred_rs_nri162_svc)
disp_rs_nri162_svc = ConfusionMatrixDisplay(confusion_matrix = cm_rs_nri162_svc, display_labels = rs_nri162_svc.classes_)
disp_rs_nri162_svc.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_nri162_svc.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri162_svc.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri162_svc.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_nri162_svc.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM_LBP/cm_teste_nri162_svc', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_nri162_svc, target_names= rs_nri162_svc.classes_))

#### **Modelo C7**: Usando features LBP Uniforme Não Invariante (P = 24, R = 3)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {
    'C': uniform(loc = 0.1, scale = 10).rvs(size = 20, random_state = 10),             # Uniform distribution between 0.1 and 10
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'degree': [2, 3, 4],
    'gamma': ['scale', 'auto'] + list(np.logspace(-9, 3, 13))
}

# Create the RandomizedSearchCV object
rs_nri243_svc = RandomizedSearchCV(estimator = model_svc, param_distributions = param_grid,
                                 scoring = metrics, cv = cv, refit = 'accuracy',
                                 verbose = 3, return_train_score = False, n_iter = 20)

rs_nri243_svc.fit(X_train_nri243, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_nri243_svc, open(f'./Output/Models_LBP/SVC/rs_nri243_svc.pickle', "wb"))
joblib.dump(rs_nri243_svc, f'./Output/Models_LBP/SVC/rs_nri243_svc.joblib')

print(f'Os melhores parâmetros foram: {rs_nri243_svc.best_params_}')
print(f'A melhor acurácia foi de: {rs_nri243_svc.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_nri243_svc.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_nri243_svc = pd.DataFrame(rs_nri243_svc.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_nri243_svc = res1_rs_nri243_svc.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_nri243_svc = accf1_rs_nri243_svc.filter(regex = 'accuracy').min()
max_acc_nri243_svc = accf1_rs_nri243_svc.filter(regex = 'accuracy').max()
min_f1_nri243_svc = accf1_rs_nri243_svc.filter(regex = 'f1').min()
max_f1_nri243_svc = accf1_rs_nri243_svc.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_nri243_svc:.6f}')
print(f'Acurácia max: {max_acc_nri243_svc:.6f}')
print(f'F1 min: {min_f1_nri243_svc:.6f}')
print(f'F1 max: {max_f1_nri243_svc:.6f}')

res2_rs_nri243_svc = res1_rs_nri243_svc.filter(regex = r'(mean|std|params)')
display(res2_rs_nri243_svc.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_nri243_svc, X_train_nri243, y_train['Class'], y_train['Sample'], name = 'rs_nri243_svc')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste (n = 566)
# ================================================================================
y_pred_rs_nri243_svc = rs_nri243_svc.predict(X_test_nri243)
accuracy_rs_nri243_svc = accuracy_score(y_test["Class"], y_pred_rs_nri243_svc)
f1_rs_nri243_svc = f1_score(y_test["Class"], y_pred_rs_nri243_svc, average = 'weighted')
print(f"Best SVC Accuracy: {accuracy_rs_nri243_svc:.6f}")
print(f"Best SVC F1: {f1_rs_nri243_svc:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================

cm_rs_nri243_svc = confusion_matrix(y_test['Class'], y_pred_rs_nri243_svc)
disp_rs_nri243_svc = ConfusionMatrixDisplay(confusion_matrix = cm_rs_nri243_svc, display_labels = rs_nri243_svc.classes_)
disp_rs_nri243_svc.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_nri243_svc.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri243_svc.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri243_svc.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_nri243_svc.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM_LBP/cm_teste_nri243_svc', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_nri243_svc, target_names= rs_nri243_svc.classes_))

#### **Modelo C8**: Usando features LBP Uniforme Não Invariante (Todos)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {
    'C': uniform(loc = 0.1, scale = 10).rvs(size = 20, random_state = 10),             # Uniform distribution between 0.1 and 10
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'degree': [2, 3, 4],
    'gamma': ['scale', 'auto'] + list(np.logspace(-9, 3, 13))
}

# Create the RandomizedSearchCV object
rs_nri_svc = RandomizedSearchCV(estimator = model_svc, param_distributions = param_grid,
                                 scoring = metrics, cv = cv, refit = 'accuracy',
                                 verbose = 3, return_train_score = False, n_iter = 20)

rs_nri_svc.fit(X_train_nri, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_nri_svc, open(f'./Output/Models_LBP/SVC/rs_nri_svc.pickle', "wb"))
joblib.dump(rs_nri_svc, f'./Output/Models_LBP/SVC/rs_nri_svc.joblib')

print(f'Os melhores parâmetros foram: {rs_nri_svc.best_params_}')
print(f'A melhor acurácia foi de: {rs_nri_svc.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_nri_svc.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_nri_svc = pd.DataFrame(rs_nri_svc.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_nri_svc = res1_rs_nri_svc.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_nri_svc = accf1_rs_nri_svc.filter(regex = 'accuracy').min()
max_acc_nri_svc = accf1_rs_nri_svc.filter(regex = 'accuracy').max()
min_f1_nri_svc = accf1_rs_nri_svc.filter(regex = 'f1').min()
max_f1_nri_svc = accf1_rs_nri_svc.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_nri_svc:.6f}')
print(f'Acurácia max: {max_acc_nri_svc:.6f}')
print(f'F1 min: {min_f1_nri_svc:.6f}')
print(f'F1 max: {max_f1_nri_svc:.6f}')

res2_rs_nri_svc = res1_rs_nri_svc.filter(regex = r'(mean|std|params)')
display(res2_rs_nri_svc.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_nri_svc, X_train_nri, y_train['Class'], y_train['Sample'], name = 'rs_nri_svc')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste (n = 566)
# ================================================================================
y_pred_rs_nri_svc = rs_nri_svc.predict(X_test_nri)
accuracy_rs_nri_svc = accuracy_score(y_test["Class"], y_pred_rs_nri_svc)
f1_rs_nri_svc = f1_score(y_test["Class"], y_pred_rs_nri_svc, average = 'weighted')
print(f"Best SVC Accuracy: {accuracy_rs_nri_svc:.6f}")
print(f"Best SVC F1: {f1_rs_nri_svc:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================

cm_rs_nri_svc = confusion_matrix(y_test['Class'], y_pred_rs_nri_svc)
disp_rs_nri_svc = ConfusionMatrixDisplay(confusion_matrix = cm_rs_nri_svc, display_labels = rs_nri_svc.classes_)
disp_rs_nri_svc.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_nri_svc.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri_svc.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri_svc.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_nri_svc.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM_LBP/SVC/cm_teste_nri_svc', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_nri_svc, target_names= rs_nri_svc.classes_))

#### **Modelo C9**: Usando features LBP (Todos)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {
    'C': uniform(loc = 0.1, scale = 10).rvs(size = 20, random_state = 10),             # Uniform distribution between 0.1 and 10
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'degree': [2, 3, 4],
    'gamma': ['scale', 'auto'] + list(np.logspace(-9, 3, 13))
}

# Create the RandomizedSearchCV object
rs_unri_svc = RandomizedSearchCV(estimator = model_svc, param_distributions = param_grid,
                                 scoring = metrics, cv = cv, refit = 'accuracy',
                                 verbose = 3, return_train_score = False, n_iter = 20)

rs_unri_svc.fit(X_train_unri, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_unri_svc, open(f'./Output/Models_LBP/SVC/rs_unri_svc.pickle', "wb"))
joblib.dump(rs_unri_svc, f'./Output/Models_LBP/SVC/rs_unri_svc.joblib')

print(f'Os melhores parâmetros foram: {rs_unri_svc.best_params_}')
print(f'A melhor acurácia foi de: {rs_unri_svc.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_unri_svc.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_unri_svc = joblib.load(open('./Output/Models_LBP/SVC/rs_unri_svc.joblib', "rb"))
res1_rs_unri_svc = pd.DataFrame(rs_unri_svc.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_unri_svc = res1_rs_unri_svc.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_unri_svc = accf1_rs_unri_svc.filter(regex = 'accuracy').min()
max_acc_unri_svc = accf1_rs_unri_svc.filter(regex = 'accuracy').max()
min_f1_unri_svc = accf1_rs_unri_svc.filter(regex = 'f1').min()
max_f1_unri_svc = accf1_rs_unri_svc.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_unri_svc:.6f}')
print(f'Acurácia max: {max_acc_unri_svc:.6f}')
print(f'F1 min: {min_f1_unri_svc:.6f}')
print(f'F1 max: {max_f1_unri_svc:.6f}')

res2_rs_unri_svc = res1_rs_unri_svc.filter(regex = r'(mean|std|params)')
display(res2_rs_unri_svc.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_unri_svc, X_train_unri, y_train['Class'], y_train['Sample'], name = 'rs_unri_svc')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste (n = 566)
# ================================================================================
y_pred_rs_unri_svc = rs_unri_svc.predict(X_test_unri)
accuracy_rs_unri_svc = accuracy_score(y_test["Class"], y_pred_rs_unri_svc)
f1_rs_unri_svc = f1_score(y_test["Class"], y_pred_rs_unri_svc, average = 'weighted')
print(f"Best SVC Accuracy: {accuracy_rs_unri_svc:.6f}")
print(f"Best SVC F1: {f1_rs_unri_svc:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================

cm_rs_unri_svc = confusion_matrix(y_test['Class'], y_pred_rs_unri_svc)
disp_rs_unri_svc = ConfusionMatrixDisplay(confusion_matrix = cm_rs_unri_svc, display_labels = rs_unri_svc.classes_)
disp_rs_unri_svc.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_unri_svc.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_unri_svc.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_unri_svc.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_unri_svc.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM_LBP/SVC/cm_teste_unri_svc', dpi = 800, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_unri_svc, target_names= rs_unri_svc.classes_))

### **2.4.4 - Artificial Neural Network - ANN**

In [None]:
# ================================================================================
# Definições do estimador ANN
# ================================================================================
# MLPClassifier().get_params()

# Define o modelo
model_ann = MLPClassifier(random_state = 10, max_iter = 2000)

#### **Modelo C1**: Usando features LBP Uniforme (P = 8, R = 1)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (150,), (200,), (250,),
                          (300,), (350,), (400,), (450,),(500,)],
    'activation': ['relu', 'identity'],    # ['relu', 'tanh', 'logistic', 'identity']
    'solver': ['adam', 'lbfgs'],           # ['sgd', 'adam', 'lbfgs']
    'alpha': uniform(loc = 0.0001, scale = 0.09).rvs(size = 20, random_state = 10),
    'learning_rate': ['constant','adaptive', 'invscaling']}

# Create the RandomizedSearchCV object
rs_u81_ann = RandomizedSearchCV(estimator = model_ann, param_distributions = param_grid,
                                 scoring = metrics, cv = cv, refit = 'accuracy',
                                 verbose = 3, return_train_score = False, n_iter = 20)

rs_u81_ann.fit(X_train_u81, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_u81_ann, open(f'./Output/Models/rs_u81_ann.pickle', "wb"))

print(f'Os melhores parâmetros foram: {rs_u81_ann.best_params_}')
print(f'A melhor acurácia foi de: {rs_u81_ann.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_u81_ann.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_u81_ann = pd.DataFrame(rs_u81_ann.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_u81_ann = res1_rs_u81_ann.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_u81_ann = accf1_rs_u81_ann.filter(regex = 'accuracy').min()
max_acc_rs_u81_ann = accf1_rs_u81_ann.filter(regex = 'accuracy').max()
min_f1_rs_u81_ann = accf1_rs_u81_ann.filter(regex = 'f1').min()
max_f1_rs_u81_ann = accf1_rs_u81_ann.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_u81_ann:.6f}')
print(f'Acurácia max: {max_acc_rs_u81_ann:.6f}')
print(f'F1 min: {min_f1_rs_u81_ann:.6f}')
print(f'F1 max: {max_f1_rs_u81_ann:.6f}')

res2_rs_u81_ann = res1_rs_u81_ann.filter(regex = r'(mean|std|params)')
display(res2_rs_u81_ann.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_u81_ann, X_train_u81, y_train['Class'], y_train['Sample'], name = 'rs_u81_ann')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_u81_ann = rs_u81_ann.predict(X_test_u81)
acc_test_rs_u81_ann = accuracy_score(y_test["Class"], y_pred_rs_u81_ann)
f1_rs_u81_ann = f1_score(y_test["Class"], y_pred_rs_u81_ann, average = 'weighted')
print(f"Best ANN Accuracy: {acc_test_rs_u81_ann:.2f}")
print(f"Best ANN F1: {f1_rs_u81_ann:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_u81_ann = confusion_matrix(y_test['Class'], y_pred_rs_u81_ann)
disp_rs_u81_ann = ConfusionMatrixDisplay(confusion_matrix = cm_rs_u81_ann, display_labels = rs_u81_ann.classes_)
disp_rs_u81_ann.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_u81_ann.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u81_ann.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u81_ann.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_u81_ann.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM/cm_teste_u81_ann', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_u81_ann, target_names= rs_u81_ann.classes_))

#### **Modelo C2**: Usando features LBP Uniforme (P = 16, R = 2)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (150,), (200,), (250,),
                          (300,), (350,), (400,), (450,),(500,)],
    'activation': ['relu', 'identity'],    # ['relu', 'tanh', 'logistic', 'identity']
    'solver': ['adam', 'lbfgs'],           # ['sgd', 'adam', 'lbfgs']
    'alpha': uniform(loc = 0.0001, scale = 0.09).rvs(size = 20, random_state = 10),
    'learning_rate': ['constant','adaptive', 'invscaling']}

# Create the RandomizedSearchCV object
rs_u162_ann = RandomizedSearchCV(estimator = model_ann, param_distributions = param_grid,
                                 scoring = metrics, cv = cv, refit = 'accuracy',
                                 verbose = 3, return_train_score = False, n_iter = 20)

rs_u162_ann.fit(X_train_u162, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_u162_ann, open(f'./Output/Models/rs_u162_ann.pickle', "wb"))
joblib.dump(rs_u162_ann, open(f'./Output/Models/rs_u162_ann.joblib', "wb"))

print(f'Os melhores parâmetros foram: {rs_u162_ann.best_params_}')
print(f'A melhor acurácia foi de: {rs_u162_ann.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_u162_ann.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_u162_ann = pd.DataFrame(rs_u162_ann.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_u162_ann = res1_rs_u162_ann.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_u162_ann = accf1_rs_u162_ann.filter(regex = 'accuracy').min()
max_acc_rs_u162_ann = accf1_rs_u162_ann.filter(regex = 'accuracy').max()
min_f1_rs_u162_ann = accf1_rs_u162_ann.filter(regex = 'f1').min()
max_f1_rs_u162_ann = accf1_rs_u162_ann.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_u162_ann:.6f}')
print(f'Acurácia max: {max_acc_rs_u162_ann:.6f}')
print(f'F1 min: {min_f1_rs_u162_ann:.6f}')
print(f'F1 max: {max_f1_rs_u162_ann:.6f}')

res2_rs_u162_ann = res1_rs_u162_ann.filter(regex = r'(mean|std|params)')
display(res2_rs_u162_ann.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_u162_ann, X_train_u162, y_train['Class'], y_train['Sample'], name = 'rs_u162_ann')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_u162_ann = rs_u162_ann.predict(X_test_u162)
acc_test_rs_u162_ann = accuracy_score(y_test["Class"], y_pred_rs_u162_ann)
f1_rs_u162_ann = f1_score(y_test["Class"], y_pred_rs_u162_ann, average = 'weighted')
print(f"Best ANN Accuracy: {acc_test_rs_u162_ann:.2f}")
print(f"Best ANN F1: {f1_rs_u162_ann:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_u162_ann = confusion_matrix(y_test['Class'], y_pred_rs_u162_ann)
disp_rs_u162_ann = ConfusionMatrixDisplay(confusion_matrix = cm_rs_u162_ann, display_labels = rs_u162_ann.classes_)
disp_rs_u162_ann.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_u162_ann.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u162_ann.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u162_ann.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_u162_ann.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM/cm_teste_u162_ann', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_u162_ann, target_names= rs_u162_ann.classes_))

#### **Modelo C3**: Usando features LBP Uniforme (P = 24, R = 3)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (150,), (200,), (250,),
                          (300,), (350,), (400,), (450,),(500,)],
    'activation': ['relu', 'identity'],    # ['relu', 'tanh', 'logistic', 'identity']
    'solver': ['adam', 'lbfgs'],           # ['sgd', 'adam', 'lbfgs']
    'alpha': uniform(loc = 0.0001, scale = 0.09).rvs(size = 20, random_state = 10),
    'learning_rate': ['constant','adaptive', 'invscaling']}

# Create the RandomizedSearchCV object
rs_u243_ann = RandomizedSearchCV(estimator = model_ann, param_distributions = param_grid,
                                 scoring = metrics, cv = cv, refit = 'accuracy',
                                 verbose = 3, return_train_score = False, n_iter = 20)

rs_u243_ann.fit(X_train_u243, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_u243_ann, open(f'./Output/Models/rs_u243_ann.pickle', "wb"))
joblib.dump(rs_u243_ann, open(f'./Output/Models/rs_u243_ann.joblib', "wb"))

print(f'Os melhores parâmetros foram: {rs_u243_ann.best_params_}')
print(f'A melhor acurácia foi de: {rs_u243_ann.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_u243_ann.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_u243_ann = pd.DataFrame(rs_u243_ann.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_u243_ann = res1_rs_u243_ann.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_u243_ann = accf1_rs_u243_ann.filter(regex = 'accuracy').min()
max_acc_rs_u243_ann = accf1_rs_u243_ann.filter(regex = 'accuracy').max()
min_f1_rs_u243_ann = accf1_rs_u243_ann.filter(regex = 'f1').min()
max_f1_rs_u243_ann = accf1_rs_u243_ann.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_u243_ann:.6f}')
print(f'Acurácia max: {max_acc_rs_u243_ann:.6f}')
print(f'F1 min: {min_f1_rs_u243_ann:.6f}')
print(f'F1 max: {max_f1_rs_u243_ann:.6f}')

res2_rs_u243_ann = res1_rs_u243_ann.filter(regex = r'(mean|std|params)')
display(res2_rs_u243_ann.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_u243_ann, X_train_u243, y_train['Class'], y_train['Sample'], name = 'rs_u243_ann')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_u243_ann = rs_u243_ann.predict(X_test_u243)
acc_test_rs_u243_ann = accuracy_score(y_test["Class"], y_pred_rs_u243_ann)
f1_rs_u243_ann = f1_score(y_test["Class"], y_pred_rs_u243_ann, average = 'weighted')
print(f"Best ANN Accuracy: {acc_test_rs_u243_ann:.2f}")
print(f"Best ANN F1: {f1_rs_u243_ann:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_u243_ann = confusion_matrix(y_test['Class'], y_pred_rs_u243_ann)
disp_rs_u243_ann = ConfusionMatrixDisplay(confusion_matrix = cm_rs_u243_ann, display_labels = rs_u243_ann.classes_)
disp_rs_u243_ann.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_u243_ann.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u243_ann.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u243_ann.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_u243_ann.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM/cm_teste_u243_ann', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_u243_ann, target_names= rs_u243_ann.classes_))

#### **Modelo C4**: Usando features LBP Uniforme (Todos)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (150,), (200,), (250,),
                          (300,), (350,), (400,), (450,),(500,)],
    'activation': ['relu', 'identity'],    # ['relu', 'tanh', 'logistic', 'identity']
    'solver': ['adam', 'lbfgs'],           # ['sgd', 'adam', 'lbfgs']
    'alpha': uniform(loc = 0.0001, scale = 0.09).rvs(size = 20, random_state = 10),
    'learning_rate': ['constant','adaptive', 'invscaling']}

# Create the RandomizedSearchCV object
rs_uni_ann = RandomizedSearchCV(estimator = model_ann, param_distributions = param_grid,
                                 scoring = metrics, cv = cv, refit = 'accuracy',
                                 verbose = 3, return_train_score = False, n_iter = 20)

rs_uni_ann.fit(X_train_u, y_train['Class'], groups = y_train['Sample'])

pickle.dump(rs_uni_ann, open(f'./Output/Models/rs_uni_ann.pickle', "wb"))

print(f'Os melhores parâmetros foram: {rs_uni_ann.best_params_}')
print(f'A melhor acurácia foi de: {rs_uni_ann.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_uni_ann.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_uni_ann = pd.DataFrame(rs_uni_ann.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_uni_ann = res1_rs_uni_ann.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_uni_ann = accf1_rs_uni_ann.filter(regex = 'accuracy').min()
max_acc_rs_uni_ann = accf1_rs_uni_ann.filter(regex = 'accuracy').max()
min_f1_rs_uni_ann = accf1_rs_uni_ann.filter(regex = 'f1').min()
max_f1_rs_uni_ann = accf1_rs_uni_ann.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_uni_ann:.6f}')
print(f'Acurácia max: {max_acc_rs_uni_ann:.6f}')
print(f'F1 min: {min_f1_rs_uni_ann:.6f}')
print(f'F1 max: {max_f1_rs_uni_ann:.6f}')

res2_rs_uni_ann = res1_rs_uni_ann.filter(regex = r'(mean|std|params)')
display(res2_rs_uni_ann.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_uni_ann, X_train_u, y_train['Class'], y_train['Sample'], name = 'rs_uni_ann')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_uni_ann = rs_uni_ann.predict(X_test_u)
acc_test_rs_uni_ann = accuracy_score(y_test["Class"], y_pred_rs_uni_ann)
f1_rs_uni_ann = f1_score(y_test["Class"], y_pred_rs_uni_ann, average = 'weighted')
print(f"Best ANN Accuracy: {acc_test_rs_uni_ann:.2f}")
print(f"Best ANN F1: {f1_rs_uni_ann:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_uni_ann = confusion_matrix(y_test['Class'], y_pred_rs_uni_ann)
disp_rs_uni_ann = ConfusionMatrixDisplay(confusion_matrix = cm_rs_uni_ann, display_labels = rs_uni_ann.classes_)
disp_rs_uni_ann.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_uni_ann.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_uni_ann.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_uni_ann.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_uni_ann.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM/cm_teste_uni_ann', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_uni_ann, target_names= rs_uni_ann.classes_))

#### **Modelo C5**: Usando features LBP Uniforme Não Invariante (P = 8, R = 1)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (150,), (200,), (250,),
                          (300,), (350,), (400,), (450,),(500,)],
    'activation': ['relu', 'identity'],    # ['relu', 'tanh', 'logistic', 'identity']
    'solver': ['adam', 'lbfgs'],           # ['sgd', 'adam', 'lbfgs']
    'alpha': uniform(loc = 0.0001, scale = 0.09).rvs(size = 20, random_state = 10),
    'learning_rate': ['constant','adaptive', 'invscaling']}

# Create the RandomizedSearchCV object
rs_nri81_ann = RandomizedSearchCV(estimator = model_ann, param_distributions = param_grid,
                                 scoring = metrics, cv = cv, refit = 'accuracy',
                                 verbose = 3, return_train_score = False, n_iter = 20)

rs_nri81_ann.fit(X_train_nri81, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_nri81_ann, open(f'./Output/Models/rs_nri81_ann.pickle', "wb"))
joblib.dump(rs_nri81_ann, open(f'./Output/Models/rs_nri81_ann.joblib', "wb"))

print(f'Os melhores parâmetros foram: {rs_nri81_ann.best_params_}')
print(f'A melhor acurácia foi de: {rs_nri81_ann.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_nri81_ann.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_nri81_ann = pd.DataFrame(rs_nri81_ann.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_nri81_ann = res1_rs_nri81_ann.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_nri81_ann = accf1_rs_nri81_ann.filter(regex = 'accuracy').min()
max_acc_rs_nri81_ann = accf1_rs_nri81_ann.filter(regex = 'accuracy').max()
min_f1_rs_nri81_ann = accf1_rs_nri81_ann.filter(regex = 'f1').min()
max_f1_rs_nri81_ann = accf1_rs_nri81_ann.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_nri81_ann:.6f}')
print(f'Acurácia max: {max_acc_rs_nri81_ann:.6f}')
print(f'F1 min: {min_f1_rs_nri81_ann:.6f}')
print(f'F1 max: {max_f1_rs_nri81_ann:.6f}')

res2_rs_nri81_ann = res1_rs_nri81_ann.filter(regex = r'(mean|std|params)')
display(res2_rs_nri81_ann.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_nri81_ann, X_train_nri81, y_train['Class'], y_train['Sample'], name = 'rs_nri81_ann')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_nri81_ann = rs_nri81_ann.predict(X_test_nri81)
acc_test_rs_nri81_ann = accuracy_score(y_test["Class"], y_pred_rs_nri81_ann)
f1_rs_nri81_ann = f1_score(y_test["Class"], y_pred_rs_nri81_ann, average = 'weighted')
print(f"Best ANN Accuracy: {acc_test_rs_nri81_ann:.2f}")
print(f"Best ANN F1: {f1_rs_nri81_ann:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_nri81_ann = confusion_matrix(y_test['Class'], y_pred_rs_nri81_ann)
disp_rs_nri81_ann = ConfusionMatrixDisplay(confusion_matrix = cm_rs_nri81_ann, display_labels = rs_nri81_ann.classes_)
disp_rs_nri81_ann.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_nri81_ann.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri81_ann.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri81_ann.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_nri81_ann.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM/cm_teste_nri81_ann', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_nri81_ann, target_names= rs_nri81_ann.classes_))

#### **Modelo C6**: Usando features LBP Uniforme Não Invariante (P = 16, R = 2)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (150,), (200,), (250,),
                          (300,), (350,), (400,), (450,),(500,)],
    'activation': ['relu', 'identity'],    # ['relu', 'tanh', 'logistic', 'identity']
    'solver': ['adam', 'lbfgs'],           # ['sgd', 'adam', 'lbfgs']
    'alpha': uniform(loc = 0.0001, scale = 0.09).rvs(size = 20, random_state = 10),
    'learning_rate': ['constant','adaptive', 'invscaling']}

# Create the RandomizedSearchCV object
rs_nri162_ann = RandomizedSearchCV(estimator = model_ann, param_distributions = param_grid,
                                 scoring = metrics, cv = cv, refit = 'accuracy',
                                 verbose = 3, return_train_score = False, n_iter = 20)

rs_nri162_ann.fit(X_train_nri162, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_nri162_ann, open(f'./Output/Models/rs_nri162_ann.pickle', "wb"))
pickle.joblib(rs_nri162_ann, open(f'./Output/Models/rs_nri162_ann.joblib', "wb"))

print(f'Os melhores parâmetros foram: {rs_nri162_ann.best_params_}')
print(f'A melhor acurácia foi de: {rs_nri162_ann.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_nri162_ann.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_nri162_ann = pd.DataFrame(rs_nri162_ann.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_nri162_ann = res1_rs_nri162_ann.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_nri162_ann = accf1_rs_nri162_ann.filter(regex = 'accuracy').min()
max_acc_rs_nri162_ann = accf1_rs_nri162_ann.filter(regex = 'accuracy').max()
min_f1_rs_nri162_ann = accf1_rs_nri162_ann.filter(regex = 'f1').min()
max_f1_rs_nri162_ann = accf1_rs_nri162_ann.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_nri162_ann:.6f}')
print(f'Acurácia max: {max_acc_rs_nri162_ann:.6f}')
print(f'F1 min: {min_f1_rs_nri162_ann:.6f}')
print(f'F1 max: {max_f1_rs_nri162_ann:.6f}')

res2_rs_nri162_ann = res1_rs_nri162_ann.filter(regex = r'(mean|std|params)')
display(res2_rs_nri162_ann.iloc[0])
#display(res2_rs_svc[:1])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_nri162_ann, X_train_nri162, y_train['Class'], y_train['Sample'], name = 'rs_nri162_ann')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_nri162_ann = rs_nri162_ann.predict(X_test_nri162)
acc_test_rs_nri162_ann = accuracy_score(y_test["Class"], y_pred_rs_nri162_ann)
f1_rs_nri162_ann = f1_score(y_test["Class"], y_pred_rs_nri162_ann, average = 'weighted')
print(f"Best ANN Accuracy: {acc_test_rs_nri162_ann:.2f}")
print(f"Best ANN F1: {f1_rs_nri162_ann:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_nri162_ann = confusion_matrix(y_test['Class'], y_pred_rs_nri162_ann)
disp_rs_nri162_ann = ConfusionMatrixDisplay(confusion_matrix = cm_rs_nri162_ann, display_labels = rs_nri162_ann.classes_)
disp_rs_nri162_ann.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_nri162_ann.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri162_ann.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri162_ann.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_nri162_ann.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM/cm_teste_nri162_ann', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_nri162_ann, target_names= rs_nri162_ann.classes_))

#### **Modelo C7**: Usando features LBP Uniforme Não Invariante (P = 24, R = 3)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (150,), (200,), (250,),
                          (300,), (350,), (400,), (450,),(500,)],
    'activation': ['relu', 'identity'],    # ['relu', 'tanh', 'logistic', 'identity']
    'solver': ['adam', 'lbfgs'],           # ['sgd', 'adam', 'lbfgs']
    'alpha': uniform(loc = 0.0001, scale = 0.09).rvs(size = 20, random_state = 10),
    'learning_rate': ['constant','adaptive', 'invscaling']}

# Create the RandomizedSearchCV object
rs_nri243_ann = RandomizedSearchCV(estimator = model_ann, param_distributions = param_grid,
                                 scoring = metrics, cv = cv, refit = 'accuracy',
                                 verbose = 3, return_train_score = False, n_iter = 20)

rs_nri243_ann.fit(X_train_nri243, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_nri243_ann, open(f'./Output/Models_LBP/rs_nri243_ann.pickle', "wb"))
joblib.dump(rs_nri243_ann, open(f'./Output/Models_LBP/rs_nri243_ann.joblib', "wb"))

print(f'Os melhores parâmetros foram: {rs_nri243_ann.best_params_}')
print(f'A melhor acurácia foi de: {rs_nri243_ann.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_nri243_ann.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_nri243_ann = pd.DataFrame(rs_nri243_ann.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_nri243_ann = res1_rs_nri243_ann.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_nri243_ann = accf1_rs_nri243_ann.filter(regex = 'accuracy').min()
max_acc_rs_nri243_ann = accf1_rs_nri243_ann.filter(regex = 'accuracy').max()
min_f1_rs_nri243_ann = accf1_rs_nri243_ann.filter(regex = 'f1').min()
max_f1_rs_nri243_ann = accf1_rs_nri243_ann.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_nri243_ann:.6f}')
print(f'Acurácia max: {max_acc_rs_nri243_ann:.6f}')
print(f'F1 min: {min_f1_rs_nri243_ann:.6f}')
print(f'F1 max: {max_f1_rs_nri243_ann:.6f}')

res2_rs_nri243_ann = res1_rs_nri243_ann.filter(regex = r'(mean|std|params)')
display(res2_rs_nri243_ann.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_nri243_ann, X_train_nri243, y_train['Class'], y_train['Sample'], name = 'rs_nri243_ann')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_nri243_ann = rs_nri243_ann.predict(X_test_nri243)
acc_test_rs_nri243_ann = accuracy_score(y_test["Class"], y_pred_rs_nri243_ann)
f1_rs_nri243_ann = f1_score(y_test["Class"], y_pred_rs_nri243_ann, average = 'weighted')
print(f"Best ANN Accuracy: {acc_test_rs_nri243_ann:.2f}")
print(f"Best ANN F1: {f1_rs_nri243_ann:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_nri243_ann = confusion_matrix(y_test['Class'], y_pred_rs_nri243_ann)
disp_rs_nri243_ann = ConfusionMatrixDisplay(confusion_matrix = cm_rs_nri243_ann, display_labels = rs_nri243_ann.classes_)
disp_rs_nri243_ann.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_nri243_ann.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri243_ann.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri243_ann.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_nri243_ann.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM_LBP/cm_teste_nri243_ann', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_nri243_ann, target_names= rs_nri243_ann.classes_))

#### **Modelo C8**: Usando features LBP Uniforme Não Invariante (Todos)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (150,), (200,), (250,),
                          (300,), (350,), (400,), (450,),(500,)],
    'activation': ['relu', 'identity'],    # ['relu', 'tanh', 'logistic', 'identity']
    'solver': ['adam', 'lbfgs', 'sgd'],           # ['sgd', 'adam', 'lbfgs']
    'alpha': uniform(loc = 0.0001, scale = 0.09).rvs(size = 20, random_state = 10),
    'learning_rate': ['constant','adaptive', 'invscaling']}

# Create the RandomizedSearchCV object
rs_nri_ann = RandomizedSearchCV(estimator = model_ann, param_distributions = param_grid,
                                 scoring = metrics, cv = cv, refit = 'accuracy',
                                 verbose = 3, return_train_score = False, n_iter = 20)

rs_nri_ann.fit(X_train_nri, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_nri_ann, open(f'./Output_patches/Models_LBP/rs_nri_ann.pickle', "wb"))
joblib.dump(rs_nri_ann, open(f'./Output_patches/Models_LBP/rs_nri_ann.joblib', "wb"))

print(f'Os melhores parâmetros foram: {rs_nri_ann.best_params_}')
print(f'A melhor acurácia foi de: {rs_nri_ann.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_nri_ann.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_nri_ann = pd.DataFrame(rs_nri_ann.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_nri_ann = res1_rs_nri_ann.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_nri_ann = accf1_rs_nri_ann.filter(regex = 'accuracy').min()
max_acc_rs_nri_ann = accf1_rs_nri_ann.filter(regex = 'accuracy').max()
min_f1_rs_nri_ann = accf1_rs_nri_ann.filter(regex = 'f1').min()
max_f1_rs_nri_ann = accf1_rs_nri_ann.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_nri_ann:.6f}')
print(f'Acurácia max: {max_acc_rs_nri_ann:.6f}')
print(f'F1 min: {min_f1_rs_nri_ann:.6f}')
print(f'F1 max: {max_f1_rs_nri_ann:.6f}')

res2_rs_nri_ann = res1_rs_nri_ann.filter(regex = r'(mean|std|params)')
display(res2_rs_nri_ann.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_nri_ann, X_train_nri, y_train['Class'], y_train['Sample'], name = 'rs_nri_ann')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_nri_ann = rs_nri_ann.predict(X_test_nri)
acc_test_rs_nri_ann = accuracy_score(y_test["Class"], y_pred_rs_nri_ann)
f1_rs_nri_ann = f1_score(y_test["Class"], y_pred_rs_nri_ann, average = 'weighted')
print(f"Best ANN Accuracy: {acc_test_rs_nri_ann:.2f}")
print(f"Best ANN F1: {f1_rs_nri_ann:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_nri_ann = confusion_matrix(y_test['Class'], y_pred_rs_nri_ann)
disp_rs_nri_ann = ConfusionMatrixDisplay(confusion_matrix = cm_rs_nri_ann, display_labels = rs_nri_ann.classes_)
disp_rs_nri_ann.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_nri_ann.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri_ann.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri_ann.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_nri_ann.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM_LBP/cm_teste_nri_ann', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_nri_ann, target_names= rs_nri_ann.classes_))

#### **Modelo C9**: Usando features LBP (Todos)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (150,), (200,), (250,),
                          (300,), (350,), (400,), (450,),(500,)],
    'activation': ['relu', 'identity'],    # ['relu', 'tanh', 'logistic', 'identity']
    'solver': ['adam', 'lbfgs'],           # ['sgd', 'adam', 'lbfgs']
    'alpha': uniform(loc = 0.0001, scale = 0.09).rvs(size = 20, random_state = 10),
    'learning_rate': ['constant','adaptive', 'invscaling']}

# Create the RandomizedSearchCV object
rs_unri_ann = RandomizedSearchCV(estimator = model_ann, param_distributions = param_grid,
                                 scoring = metrics, cv = cv, refit = 'accuracy',
                                 verbose = 3, return_train_score = False, n_iter = 20)

rs_unri_ann.fit(X_train_unri, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_unri_ann, open(f'./Output/Models_LBP/rs_unri_ann.pickle', "wb"))
joblib.dump(rs_unri_ann, open(f'./Output/Models_LBP/rs_unri_ann.joblib', "wb"))

print(f'Os melhores parâmetros foram: {rs_unri_ann.best_params_}')
print(f'A melhor acurácia foi de: {rs_unri_ann.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_unri_ann.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_unri_ann = pd.DataFrame(rs_unri_ann.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_unri_ann = res1_rs_unri_ann.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_unri_ann = accf1_rs_unri_ann.filter(regex = 'accuracy').min()
max_acc_rs_unri_ann = accf1_rs_unri_ann.filter(regex = 'accuracy').max()
min_f1_rs_unri_ann = accf1_rs_unri_ann.filter(regex = 'f1').min()
max_f1_rs_unri_ann = accf1_rs_unri_ann.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_unri_ann:.6f}')
print(f'Acurácia max: {max_acc_rs_unri_ann:.6f}')
print(f'F1 min: {min_f1_rs_unri_ann:.6f}')
print(f'F1 max: {max_f1_rs_unri_ann:.6f}')

res2_rs_unri_ann = res1_rs_unri_ann.filter(regex = r'(mean|std|params)')
display(res2_rs_unri_ann.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_unri_ann, X_train_unri, y_train['Class'], y_train['Sample'], name = 'rs_unri_ann')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_unri_ann = rs_unri_ann.predict(X_test_unri)
acc_test_rs_unri_ann = accuracy_score(y_test["Class"], y_pred_rs_unri_ann)
f1_rs_unri_ann = f1_score(y_test["Class"], y_pred_rs_unri_ann, average = 'weighted')
print(f"Best ANN Accuracy: {acc_test_rs_unri_ann:.2f}")
print(f"Best ANN F1: {f1_rs_unri_ann:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_unri_ann = confusion_matrix(y_test['Class'], y_pred_rs_unri_ann)
disp_rs_unri_ann = ConfusionMatrixDisplay(confusion_matrix = cm_rs_unri_ann, display_labels = rs_unri_ann.classes_)
disp_rs_unri_ann.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_unri_ann.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_unri_ann.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_unri_ann.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_unri_ann.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM_LBP/cm_teste_unri_ann', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_unri_ann, target_names= rs_unri_ann.classes_))

### **2.4.5 - Random Forest - RF**

In [None]:
# ================================================================================
# Definições do estimador RF
# ================================================================================
#RandomForestClassifier().get_params()

# Define o modelo
model_rf = RandomForestClassifier(random_state = 10)

#### **Modelo C1**: Usando features LBP Uniforme (P = 8, R = 1)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {'n_estimators': np.arange(40, 320, 20),                        # Número de árvores (estimadores) na floresta aleatória
              'max_depth': list(np.arange(10, 100, step=10)) + [None],       # Número máximo de níveis na árvore
              'max_features': list(np.arange(30, 60, 5)) + ['sqrt', "log2"], # Número de variáveis a serem considerados em cada divisão
              'criterion': ['gini','entropy'],
              'min_samples_leaf': np.arange(10, 110, 10),                    # Número mínimo de amostras exigidos em cada nó folha (terminal)
              'min_samples_split': np.arange(2, 10, 2),                      # Número mínimo de amostras exigido para dividir um nó
              'bootstrap': [True, False]                                     # Método de seleção de amostras para treinamento de cada árvore
              }

# Create the RandomizedSearchCV object
rs_u81_rf = RandomizedSearchCV(estimator = model_rf, param_distributions = param_grid,
                           scoring = metrics, cv = cv, refit = 'accuracy',
                           verbose = 3, return_train_score = False, n_iter = 50)

rs_u81_rf.fit(X_train_u81, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_u81_rf, open(f'./Output/Models_LBP/rs_u81_rf.pickle', "wb"))
joblib.dump(rs_u81_rf, open(f'./Output/Models_LBP/rs_u81_rf.joblib', "wb"))

print('Melhores hiperparâmetros: '+str(rs_u81_rf.best_params_))
print('Melhor Acurácia: '+str(rs_u81_rf.best_score_))
print('Melhor Estimador: '+str(rs_u81_rf.best_estimator_))

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_u81_rf = pd.DataFrame(rs_u81_rf.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_u81_rf = res1_rs_u81_rf.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_u81_rf = accf1_rs_u81_rf.filter(regex = 'accuracy').min()
max_acc_rs_u81_rf = accf1_rs_u81_rf.filter(regex = 'accuracy').max()
min_f1_rs_u81_rf = accf1_rs_u81_rf.filter(regex = 'f1').min()
max_f1_rs_u81_rf = accf1_rs_u81_rf.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_u81_rf:.6f}')
print(f'Acurácia max: {max_acc_rs_u81_rf:.6f}')
print(f'F1 min: {min_f1_rs_u81_rf:.6f}')
print(f'F1 max: {max_f1_rs_u81_rf:.6f}')

res2_rs_u81_rf = res1_rs_u81_rf.filter(regex = r'(mean|std|params)')
display(res2_rs_u81_rf.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_u81_rf, X_train, y_train['Class'], y_train['Sample'], name = 'rs_u81_rf')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_u81_rf = rs_u81_rf.predict(X_test_u81)
acc_test_rs_u81_rf = accuracy_score(y_test["Class"], y_pred_rs_u81_rf)
f1_rs_u81_rf = f1_score(y_test["Class"], y_pred_rs_u81_rf, average = 'weighted')
print(f"Best RF Accuracy: {acc_test_rs_u81_rf:.2f}")
print(f"Best RF F1: {f1_rs_u81_rf:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_u81_rf = confusion_matrix(y_test['Class'], y_pred_rs_u81_rf)
disp_rs_u81_rf = ConfusionMatrixDisplay(confusion_matrix = cm_rs_u81_rf, display_labels = rs_u81_rf.classes_)
disp_rs_u81_rf.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_u81_rf.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u81_rf.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u81_rf.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_u81_rf.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'.Output/CM_LBP/RF/cm_rs_u81_rf', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_u81_rf, target_names= rs_u81_rf.classes_))

#### **Modelo C2**: Usando features LBP Uniforme (P = 16, R = 2)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {'n_estimators': np.arange(40, 320, 20),                        # Número de árvores (estimadores) na floresta aleatória
              'max_depth': list(np.arange(10, 100, step=10)) + [None],       # Número máximo de níveis na árvore
              'max_features': list(np.arange(30, 60, 5)) + ['sqrt', "log2"], # Número de variáveis a serem considerados em cada divisão
              'criterion': ['gini','entropy'],
              'min_samples_leaf': np.arange(10, 110, 10),                    # Número mínimo de amostras exigidos em cada nó folha (terminal)
              'min_samples_split': np.arange(2, 10, 2),                      # Número mínimo de amostras exigido para dividir um nó
              'bootstrap': [True, False]                                     # Método de seleção de amostras para treinamento de cada árvore
              }

# Create the RandomizedSearchCV object
rs_u162_rf = RandomizedSearchCV(estimator = model_rf, param_distributions = param_grid,
                           scoring = metrics, cv = cv, refit = 'accuracy',
                           verbose = 3, return_train_score = False, n_iter = 50)

rs_u162_rf.fit(X_train_u162, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_u162_rf, open(f'./Output/Models_LBP/rs_u162_rf.pickle', "wb"))
joblib.dump(rs_u162_rf, open(f'./Output/Models_LBP/rs_u162_rf.joblib', "wb"))

print(f'Melhores hiperparâmetros: {rs_u162_rf.best_params_}')
print(f'Melhor Acurácia: {rs_u162_rf.best_score_}')
print(f'Melhor Estimador: {rs_u162_rf.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_u162_rf = pd.DataFrame(rs_u162_rf.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_u162_rf = res1_rs_u162_rf.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_u162_rf = accf1_rs_u162_rf.filter(regex = 'accuracy').min()
max_acc_rs_u162_rf = accf1_rs_u162_rf.filter(regex = 'accuracy').max()
min_f1_rs_u162_rf = accf1_rs_u162_rf.filter(regex = 'f1').min()
max_f1_rs_u162_rf = accf1_rs_u162_rf.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_u162_rf:.6f}')
print(f'Acurácia max: {max_acc_rs_u162_rf:.6f}')
print(f'F1 min: {min_f1_rs_u162_rf:.6f}')
print(f'F1 max: {max_f1_rs_u162_rf:.6f}')

res2_rs_u162_rf = res1_rs_u162_rf.filter(regex = r'(mean|std|params)')
display(res2_rs_u162_rf.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_u162_rf, X_train_u162, y_train['Class'], y_train['Sample'], name = 'rs_u162_rf')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_u162_rf = rs_u162_rf.predict(X_test_u162)
acc_test_rs_u162_rf = accuracy_score(y_test["Class"], y_pred_rs_u162_rf)
f1_rs_u162_rf = f1_score(y_test["Class"], y_pred_rs_u162_rf, average = 'weighted')
print(f"Best RF Accuracy: {acc_test_rs_u162_rf:.2f}")
print(f"Best RF F1: {f1_rs_u162_rf:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_u162_rf = confusion_matrix(y_test['Class'], y_pred_rs_u162_rf)
disp_rs_u162_rf = ConfusionMatrixDisplay(confusion_matrix = cm_rs_u162_rf, display_labels = rs_u162_rf.classes_)
disp_rs_u162_rf.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_u162_rf.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u162_rf.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u162_rf.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_u162_rf.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM_LBP/RF/cm_rs_u162_rf', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_u162_rf, target_names= rs_u162_rf.classes_))

#### **Modelo C3**: Usando features LBP Uniforme (P = 24, R = 23)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {'n_estimators': np.arange(40, 320, 20),                        # Número de árvores (estimadores) na floresta aleatória
              'max_depth': list(np.arange(10, 100, step=10)) + [None],       # Número máximo de níveis na árvore
              'max_features': list(np.arange(30, 60, 5)) + ['sqrt', "log2"], # Número de variáveis a serem considerados em cada divisão
              'criterion': ['gini','entropy'],
              'min_samples_leaf': np.arange(10, 110, 10),                    # Número mínimo de amostras exigidos em cada nó folha (terminal)
              'min_samples_split': np.arange(2, 10, 2),                      # Número mínimo de amostras exigido para dividir um nó
              'bootstrap': [True, False]                                     # Método de seleção de amostras para treinamento de cada árvore
              }

# Create the RandomizedSearchCV object
rs_u243_rf = RandomizedSearchCV(estimator = model_rf, param_distributions = param_grid,
                           scoring = metrics, cv = cv, refit = 'accuracy',
                           verbose = 3, return_train_score = False, n_iter = 50)

rs_u243_rf.fit(X_train_u243, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_u243_rf, open(f'./Output/Models_LBP/rs_u243_rf.pickle', "wb"))
joblib.dump(rs_u243_rf, open(f'./Output/Models_LBP/rs_u243_rf.joblib', "wb"))

print(f'Melhores hiperparâmetros: {rs_u243_rf.best_params_}')
print(f'Melhor Acurácia: {rs_u243_rf.best_score_}')
print(f'Melhor Estimador: {rs_u243_rf.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_u243_rf = pd.DataFrame(rs_u243_rf.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_u243_rf = res1_rs_u243_rf.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_u243_rf = accf1_rs_u243_rf.filter(regex = 'accuracy').min()
max_acc_rs_u243_rf = accf1_rs_u243_rf.filter(regex = 'accuracy').max()
min_f1_rs_u243_rf = accf1_rs_u243_rf.filter(regex = 'f1').min()
max_f1_rs_u243_rf = accf1_rs_u243_rf.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_u243_rf:.6f}')
print(f'Acurácia max: {max_acc_rs_u243_rf:.6f}')
print(f'F1 min: {min_f1_rs_u243_rf:.6f}')
print(f'F1 max: {max_f1_rs_u243_rf:.6f}')

res2_rs_u243_rf = res1_rs_u243_rf.filter(regex = r'(mean|std|params)')
display(res2_rs_u243_rf.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_u243_rf, X_train_u243, y_train['Class'], y_train['Sample'], name = 'rs_u243_rf')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_u243_rf = rs_u243_rf.predict(X_test_u243)
acc_test_rs_u243_rf = accuracy_score(y_test["Class"], y_pred_rs_u243_rf)
f1_rs_u243_rf = f1_score(y_test["Class"], y_pred_rs_u243_rf, average = 'weighted')
print(f"Best RF Accuracy: {acc_test_rs_u243_rf:.2f}")
print(f"Best RF F1: {f1_rs_u243_rf:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_u243_rf = confusion_matrix(y_test['Class'], y_pred_rs_u243_rf)
disp_rs_u243_rf = ConfusionMatrixDisplay(confusion_matrix = cm_rs_u243_rf, display_labels = rs_u243_rf.classes_)
disp_rs_u243_rf.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_u243_rf.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u243_rf.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u243_rf.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_u243_rf.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM_LBP/RF/cm_rs_u243_rf', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_u243_rf, target_names= rs_u243_rf.classes_))

#### **Modelo C4**: Usando features LBP Uniforme (Todos)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {'n_estimators': np.arange(40, 320, 20),                        # Número de árvores (estimadores) na floresta aleatória
              'max_depth': list(np.arange(10, 100, step=10)) + [None],       # Número máximo de níveis na árvore
              'max_features': list(np.arange(30, 60, 5)) + ['sqrt', "log2"], # Número de variáveis a serem considerados em cada divisão
              'criterion': ['gini','entropy'],
              'min_samples_leaf': np.arange(10, 110, 10),                    # Número mínimo de amostras exigidos em cada nó folha (terminal)
              'min_samples_split': np.arange(2, 10, 2),                      # Número mínimo de amostras exigido para dividir um nó
              'bootstrap': [True, False]                                     # Método de seleção de amostras para treinamento de cada árvore
              }

# Create the RandomizedSearchCV object
rs_uni_rf = RandomizedSearchCV(estimator = model_rf, param_distributions = param_grid,
                           scoring = metrics, cv = cv, refit = 'accuracy',
                           verbose = 3, return_train_score = False, n_iter = 50)

rs_uni_rf.fit(X_train_u, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_uni_rf, open(f'./Output/Models_LBP/rs_uni_rf.pickle', "wb"))
joblib.dump(rs_uni_rf, open(f'./Output/Models_LBP/rs_uni_rf.joblib', "wb"))

print(f'Melhores hiperparâmetros: {rs_uni_rf.best_params_}')
print(f'Melhor Acurácia: {rs_uni_rf.best_score_}')
print(f'Melhor Estimador: {rs_uni_rf.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_uni_rf = pd.DataFrame(rs_uni_rf.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_uni_rf = res1_rs_uni_rf.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_uni_rf = accf1_rs_uni_rf.filter(regex = 'accuracy').min()
max_acc_rs_uni_rf = accf1_rs_uni_rf.filter(regex = 'accuracy').max()
min_f1_rs_uni_rf = accf1_rs_uni_rf.filter(regex = 'f1').min()
max_f1_rs_uni_rf = accf1_rs_uni_rf.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_uni_rf:.6f}')
print(f'Acurácia max: {max_acc_rs_uni_rf:.6f}')
print(f'F1 min: {min_f1_rs_uni_rf:.6f}')
print(f'F1 max: {max_f1_rs_uni_rf:.6f}')

res2_rs_uni_rf = res1_rs_uni_rf.filter(regex = r'(mean|std|params)')
display(res2_rs_uni_rf.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_uni_rf, X_train_u, y_train['Class'], y_train['Sample'], name = 'rs_uni_rf')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_uni_rf = rs_uni_rf.predict(X_test_u)
acc_test_rs_uni_rf = accuracy_score(y_test["Class"], y_pred_rs_uni_rf)
f1_rs_uni_rf = f1_score(y_test["Class"], y_pred_rs_uni_rf, average = 'weighted')
print(f"Best RF Accuracy: {acc_test_rs_uni_rf:.2f}")
print(f"Best RF F1: {f1_rs_uni_rf:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_uni_rf = confusion_matrix(y_test['Class'], y_pred_rs_uni_rf)
disp_rs_uni_rf = ConfusionMatrixDisplay(confusion_matrix = cm_rs_uni_rf, display_labels = rs_uni_rf.classes_)
disp_rs_uni_rf.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_uni_rf.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_uni_rf.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_uni_rf.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_uni_rf.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM_LBP/RF/cm_rs_uni_rf', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_uni_rf, target_names= rs_uni_rf.classes_))

#### **Modelo C5**: Usando features LBP Uniforme Não Invariante (P = 8, R = 1)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {'n_estimators': np.arange(40, 320, 20),                        # Número de árvores (estimadores) na floresta aleatória
              'max_depth': list(np.arange(10, 100, step=10)) + [None],       # Número máximo de níveis na árvore
              'max_features': list(np.arange(30, 60, 5)) + ['sqrt', "log2"], # Número de variáveis a serem considerados em cada divisão
              'criterion': ['gini','entropy'],
              'min_samples_leaf': np.arange(10, 110, 10),                    # Número mínimo de amostras exigidos em cada nó folha (terminal)
              'min_samples_split': np.arange(2, 10, 2),                      # Número mínimo de amostras exigido para dividir um nó
              'bootstrap': [True, False]                                     # Método de seleção de amostras para treinamento de cada árvore
              }

# Create the RandomizedSearchCV object
rs_nri81_rf = RandomizedSearchCV(estimator = model_rf, param_distributions = param_grid,
                           scoring = metrics, cv = cv, refit = 'accuracy',
                           verbose = 3, return_train_score = False, n_iter = 50)

rs_nri81_rf.fit(X_train_nri81, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_nri81_rf, open(f'./Output/Models_LBP/rs_nri81_rf.pickle', "wb"))
joblib.dump(rs_nri81_rf, open(f'./nathy/Output/Models_LBP/rs_nri81_rf.pickle', "wb"))

print(f'Melhores hiperparâmetros: {rs_nri81_rf.best_params_}')
print(f'Melhor Acurácia: {rs_nri81_rf.best_score_}')
print(f'Melhor Estimador: {rs_nri81_rf.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_nri81_rf = pd.DataFrame(rs_nri81_rf.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_nri81_rf = res1_rs_nri81_rf.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_nri81_rf = accf1_rs_nri81_rf.filter(regex = 'accuracy').min()
max_acc_rs_nri81_rf = accf1_rs_nri81_rf.filter(regex = 'accuracy').max()
min_f1_rs_nri81_rf = accf1_rs_nri81_rf.filter(regex = 'f1').min()
max_f1_rs_nri81_rf = accf1_rs_nri81_rf.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_nri81_rf:.6f}')
print(f'Acurácia max: {max_acc_rs_nri81_rf:.6f}')
print(f'F1 min: {min_f1_rs_nri81_rf:.6f}')
print(f'F1 max: {max_f1_rs_nri81_rf:.6f}')

res2_rs_nri81_rf = res1_rs_nri81_rf.filter(regex = r'(mean|std|params)')
display(res2_rs_nri81_rf.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_nri81_rf, X_train_nri81, y_train['Class'], y_train['Sample'], name = 'rs_nri81_rf')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_nri81_rf = rs_nri81_rf.predict(X_test_nri81)
acc_test_rs_nri81_rf = accuracy_score(y_test["Class"], y_pred_rs_nri81_rf)
f1_rs_nri81_rf = f1_score(y_test["Class"], y_pred_rs_nri81_rf, average = 'weighted')
print(f"Best RF Accuracy: {acc_test_rs_nri81_rf:.2f}")
print(f"Best RF F1: {f1_rs_nri81_rf:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_nri81_rf = confusion_matrix(y_test['Class'], y_pred_rs_nri81_rf)
disp_rs_nri81_rf = ConfusionMatrixDisplay(confusion_matrix = cm_rs_nri81_rf, display_labels = rs_nri81_rf.classes_)
disp_rs_nri81_rf.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_nri81_rf.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri81_rf.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri81_rf.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_nri81_rf.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'.Output/CM_LBP/RF/cm_rs_nri81_rf', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_nri81_rf, target_names= rs_nri81_rf.classes_))

#### **Modelo C6**: Usando features LBP Uniforme Não Invariante (P = 16, R = 2)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {'n_estimators': np.arange(40, 320, 20),                        # Número de árvores (estimadores) na floresta aleatória
              'max_depth': list(np.arange(10, 100, step=10)) + [None],       # Número máximo de níveis na árvore
              'max_features': list(np.arange(30, 60, 5)) + ['sqrt', "log2"], # Número de variáveis a serem considerados em cada divisão
              'criterion': ['gini','entropy'],
              'min_samples_leaf': np.arange(10, 110, 10),                    # Número mínimo de amostras exigidos em cada nó folha (terminal)
              'min_samples_split': np.arange(2, 10, 2),                      # Número mínimo de amostras exigido para dividir um nó
              'bootstrap': [True, False]                                     # Método de seleção de amostras para treinamento de cada árvore
              }

# Create the RandomizedSearchCV object
rs_nri162_rf = RandomizedSearchCV(estimator = model_rf, param_distributions = param_grid,
                           scoring = metrics, cv = cv, refit = 'accuracy',
                           verbose = 3, return_train_score = False, n_iter = 50)

rs_nri162_rf.fit(X_train_nri162, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_nri162_rf, open(f'./Output/Models_LBP/rs_nri162_rf.pickle', "wb"))
joblib.dump(rs_nri162_rf, open(f'./Output/Models_LBP/rs_nri162_rf.joblib', "wb"))

print(f'Melhores hiperparâmetros: {rs_nri162_rf.best_params_}')
print(f'Melhor Acurácia: {rs_nri162_rf.best_score_}')
print(f'Melhor Estimador: {rs_nri162_rf.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_nri162_rf = pd.DataFrame(rs_nri162_rf.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_nri162_rf = res1_rs_nri162_rf.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_nri162_rf = accf1_rs_nri162_rf.filter(regex = 'accuracy').min()
max_acc_rs_nri162_rf = accf1_rs_nri162_rf.filter(regex = 'accuracy').max()
min_f1_rs_nri162_rf = accf1_rs_nri162_rf.filter(regex = 'f1').min()
max_f1_rs_nri162_rf = accf1_rs_nri162_rf.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_nri162_rf:.6f}')
print(f'Acurácia max: {max_acc_rs_nri162_rf:.6f}')
print(f'F1 min: {min_f1_rs_nri162_rf:.6f}')
print(f'F1 max: {max_f1_rs_nri162_rf:.6f}')

res2_rs_nri162_rf = res1_rs_nri162_rf.filter(regex = r'(mean|std|params)')
display(res2_rs_nri162_rf.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_nri162_rf, X_train_nri162, y_train['Class'], y_train['Sample'], name = 'rs_nri162_rf')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_nri162_rf = rs_nri162_rf.predict(X_test_nri162)
acc_test_rs_nri162_rf = accuracy_score(y_test["Class"], y_pred_rs_nri162_rf)
f1_rs_nri162_rf = f1_score(y_test["Class"], y_pred_rs_nri162_rf, average = 'weighted')
print(f"Best RF Accuracy: {acc_test_rs_nri162_rf:.2f}")
print(f"Best RF F1: {f1_rs_nri162_rf:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_nri162_rf = confusion_matrix(y_test['Class'], y_pred_rs_nri162_rf)
disp_rs_nri162_rf = ConfusionMatrixDisplay(confusion_matrix = cm_rs_nri162_rf, display_labels = rs_nri162_rf.classes_)
disp_rs_nri162_rf.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_nri162_rf.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri162_rf.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri162_rf.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_nri162_rf.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM_LBP/RF/cm_rs_nri162_rf', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_nri162_rf, target_names= rs_nri162_rf.classes_))

#### **Modelo C7**: Usando features LBP Uniforme Não Invariante (P = 24, R = 3)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {'n_estimators': np.arange(40, 320, 20),                        # Número de árvores (estimadores) na floresta aleatória
              'max_depth': list(np.arange(10, 100, step=10)) + [None],       # Número máximo de níveis na árvore
              'max_features': list(np.arange(30, 60, 5)) + ['sqrt', "log2"], # Número de variáveis a serem considerados em cada divisão
              'criterion': ['gini','entropy'],
              'min_samples_leaf': np.arange(10, 110, 10),                    # Número mínimo de amostras exigidos em cada nó folha (terminal)
              'min_samples_split': np.arange(2, 10, 2),                      # Número mínimo de amostras exigido para dividir um nó
              'bootstrap': [True, False]                                     # Método de seleção de amostras para treinamento de cada árvore
              }

# Create the RandomizedSearchCV object
rs_nri243_rf = RandomizedSearchCV(estimator = model_rf, param_distributions = param_grid,
                           scoring = metrics, cv = cv, refit = 'accuracy',
                           verbose = 3, return_train_score = False, n_iter = 50)

rs_nri243_rf.fit(X_train_nri243, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_nri243_rf, open(f'./Output/Models_LBP/rs_nri243_rf.pickle', "wb"))
joblib.dump(rs_nri243_rf, open(f'./Output/Models_LBP/rs_nri243_rf.joblib', "wb"))

print(f'Melhores hiperparâmetros: {rs_nri243_rf.best_params_}')
print(f'Melhor Acurácia: {rs_nri243_rf.best_score_}')
print(f'Melhor Estimador: {rs_nri243_rf.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_nri243_rf = pd.DataFrame(rs_nri243_rf.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_nri243_rf = res1_rs_nri243_rf.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_nri243_rf = accf1_rs_nri243_rf.filter(regex = 'accuracy').min()
max_acc_rs_nri243_rf = accf1_rs_nri243_rf.filter(regex = 'accuracy').max()
min_f1_rs_nri243_rf = accf1_rs_nri243_rf.filter(regex = 'f1').min()
max_f1_rs_nri243_rf = accf1_rs_nri243_rf.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_nri243_rf:.6f}')
print(f'Acurácia max: {max_acc_rs_nri243_rf:.6f}')
print(f'F1 min: {min_f1_rs_nri243_rf:.6f}')
print(f'F1 max: {max_f1_rs_nri243_rf:.6f}')

res2_rs_nri243_rf = res1_rs_nri243_rf.filter(regex = r'(mean|std|params)')
display(res2_rs_nri243_rf.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_nri243_rf, X_train_nri243, y_train['Class'], y_train['Sample'], name = 'rs_nri243_rf')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_nri243_rf = rs_nri243_rf.predict(X_test_nri243)
acc_test_rs_nri243_rf = accuracy_score(y_test["Class"], y_pred_rs_nri243_rf)
f1_rs_nri243_rf = f1_score(y_test["Class"], y_pred_rs_nri243_rf, average = 'weighted')
print(f"Best RF Accuracy: {acc_test_rs_nri243_rf:.2f}")
print(f"Best RF F1: {f1_rs_nri243_rf:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_nri243_rf = confusion_matrix(y_test['Class'], y_pred_rs_nri243_rf)
disp_rs_nri243_rf = ConfusionMatrixDisplay(confusion_matrix = cm_rs_nri243_rf, display_labels = rs_nri243_rf.classes_)
disp_rs_nri243_rf.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_nri243_rf.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri243_rf.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri243_rf.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_nri243_rf.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM_LBP/RF/cm_rs_nri243_rf', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_nri243_rf, target_names= rs_nri243_rf.classes_))

#### **Modelo C8**: Usando features LBP Uniforme Não Invariante (Todos)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {'n_estimators': np.arange(40, 320, 20),                        # Número de árvores (estimadores) na floresta aleatória
              'max_depth': list(np.arange(10, 100, step=10)) + [None],       # Número máximo de níveis na árvore
              'max_features': list(np.arange(30, 60, 5)) + ['sqrt', "log2"], # Número de variáveis a serem considerados em cada divisão
              'criterion': ['gini','entropy'],
              'min_samples_leaf': np.arange(10, 110, 10),                    # Número mínimo de amostras exigidos em cada nó folha (terminal)
              'min_samples_split': np.arange(2, 10, 2),                      # Número mínimo de amostras exigido para dividir um nó
              'bootstrap': [True, False]                                     # Método de seleção de amostras para treinamento de cada árvore
              }

# Create the RandomizedSearchCV object
rs_nri_rf = RandomizedSearchCV(estimator = model_rf, param_distributions = param_grid,
                           scoring = metrics, cv = cv, refit = 'accuracy',
                           verbose = 3, return_train_score = False, n_iter = 50)

rs_nri_rf.fit(X_train_nri, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_nri_rf, open(f'./Output/Models_LBP/rs_nri_rf.pickle', "wb"))
joblib.dump(rs_nri_rf, open(f'./Output/Models_LBP/rs_nri_rf.joblib', "wb"))

print(f'Melhores hiperparâmetros: {rs_nri_rf.best_params_}')
print(f'Melhor Acurácia: {rs_nri_rf.best_score_}')
print(f'Melhor Estimador: {rs_nri_rf.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_nri_rf = pd.DataFrame(rs_nri_rf.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_nri_rf = res1_rs_nri_rf.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_nri_rf = accf1_rs_nri_rf.filter(regex = 'accuracy').min()
max_acc_rs_nri_rf = accf1_rs_nri_rf.filter(regex = 'accuracy').max()
min_f1_rs_nri_rf = accf1_rs_nri_rf.filter(regex = 'f1').min()
max_f1_rs_nri_rf = accf1_rs_nri_rf.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_nri_rf:.6f}')
print(f'Acurácia max: {max_acc_rs_nri_rf:.6f}')
print(f'F1 min: {min_f1_rs_nri_rf:.6f}')
print(f'F1 max: {max_f1_rs_nri_rf:.6f}')

res2_rs_nri_rf = res1_rs_nri_rf.filter(regex = r'(mean|std|params)')
display(res2_rs_nri_rf.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_nri_rf, X_train_nri, y_train['Class'], y_train['Sample'], name = 'rs_nri_rf')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_nri_rf = rs_nri_rf.predict(X_test_nri)
acc_test_rs_nri_rf = accuracy_score(y_test["Class"], y_pred_rs_nri_rf)
f1_rs_nri_rf = f1_score(y_test["Class"], y_pred_rs_nri_rf, average = 'weighted')
print(f"Best RF Accuracy: {acc_test_rs_nri_rf:.2f}")
print(f"Best RF F1: {f1_rs_nri_rf:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_nri_rf = confusion_matrix(y_test['Class'], y_pred_rs_nri_rf)
disp_rs_nri_rf = ConfusionMatrixDisplay(confusion_matrix = cm_rs_nri_rf, display_labels = rs_nri_rf.classes_)
disp_rs_nri_rf.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_nri_rf.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri_rf.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri_rf.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_nri_rf.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM_LBP/RF/cm_rs_nri_rf', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_nri_rf, target_names= rs_nri_rf.classes_))

#### **Modelo C9**: Usando features LBP (Todos)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {'n_estimators': np.arange(40, 320, 20),                        # Número de árvores (estimadores) na floresta aleatória
              'max_depth': list(np.arange(10, 100, step=10)) + [None],       # Número máximo de níveis na árvore
              'max_features': list(np.arange(30, 60, 5)) + ['sqrt', "log2"], # Número de variáveis a serem considerados em cada divisão
              'criterion': ['gini','entropy'],
              'min_samples_leaf': np.arange(10, 110, 10),                    # Número mínimo de amostras exigidos em cada nó folha (terminal)
              'min_samples_split': np.arange(2, 10, 2),                      # Número mínimo de amostras exigido para dividir um nó
              'bootstrap': [True, False]                                     # Método de seleção de amostras para treinamento de cada árvore
              }

# Create the RandomizedSearchCV object
rs_unri_rf = RandomizedSearchCV(estimator = model_rf, param_distributions = param_grid,
                           scoring = metrics, cv = cv, refit = 'accuracy',
                           verbose = 3, return_train_score = False, n_iter = 50)

rs_unri_rf.fit(X_train_unri, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_unri_rf, open(f'./Output/Models_LBP/rs_unri_rf.pickle', "wb"))
joblib.dump(rs_unri_rf, open(f'./Output/Models_LBP/rs_unri_rf.joblib', "wb"))

print(f'Melhores hiperparâmetros: {rs_unri_rf.best_params_}')
print(f'Melhor Acurácia: {rs_unri_rf.best_score_}')
print(f'Melhor Estimador: {rs_unri_rf.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_unri_rf = pd.DataFrame(rs_unri_rf.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_unri_rf = res1_rs_unri_rf.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_unri_rf = accf1_rs_unri_rf.filter(regex = 'accuracy').min()
max_acc_rs_unri_rf = accf1_rs_unri_rf.filter(regex = 'accuracy').max()
min_f1_rs_unri_rf = accf1_rs_unri_rf.filter(regex = 'f1').min()
max_f1_rs_unri_rf = accf1_rs_unri_rf.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_unri_rf:.6f}')
print(f'Acurácia max: {max_acc_rs_unri_rf:.6f}')
print(f'F1 min: {min_f1_rs_unri_rf:.6f}')
print(f'F1 max: {max_f1_rs_unri_rf:.6f}')

res2_rs_unri_rf = res1_rs_unri_rf.filter(regex = r'(mean|std|params)')
display(res2_rs_unri_rf.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_unri_rf, X_train_unri, y_train['Class'], y_train['Sample'], name = 'rs_unri_rf')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_unri_rf = rs_unri_rf.predict(X_test_unri)
acc_test_rs_unri_rf = accuracy_score(y_test["Class"], y_pred_rs_unri_rf)
f1_rs_unri_rf = f1_score(y_test["Class"], y_pred_rs_unri_rf, average = 'weighted')
print(f"Best RF Accuracy: {acc_test_rs_unri_rf:.2f}")
print(f"Best RF F1: {f1_rs_unri_rf:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_unri_rf = confusion_matrix(y_test['Class'], y_pred_rs_unri_rf)
disp_rs_unri_rf = ConfusionMatrixDisplay(confusion_matrix = cm_rs_unri_rf, display_labels = rs_unri_rf.classes_)
disp_rs_unri_rf.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_unri_rf.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_unri_rf.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_unri_rf.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_unri_rf.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM_LBP/RF/cm_rs_unri_rf', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_unri_rf, target_names= rs_unri_rf.classes_))

### **2.4.6 - Linear Discriminant Analysis - LDA**

In [None]:
# ================================================================================
# Definições do estimador LDA
# ================================================================================
# LinearDiscriminantAnalysis().get_params()

# Define o modelo
model_lda = LinearDiscriminantAnalysis(shrinkage='auto', n_components = None)

#### **Modelo C1**: Usando features LBP Uniforme (P = 8, R = 1)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {'solver': ['lsqr', 'eigen'],
              'tol' : [0.0001, 0.0002, 0.0003]}

# Create the RandomizedSearchCV object
rs_u81_lda = RandomizedSearchCV(estimator = model_lda, param_distributions = param_grid,
                            scoring = metrics, cv = cv, refit = 'accuracy',
                            verbose = 3, return_train_score = False, n_iter = 50)

rs_u81_lda.fit(X_train_u81, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_u81_lda, open(f'./Output/Models/rs_u81_lda.pickle', "wb"))
joblib.dump(rs_u81_lda, open(f'./Output/Models/rs_u81_lda.joblib', "wb"))

print(f'Os melhores parâmetros foram: {rs_u81_lda.best_params_}')
print(f'A melhor acurácia foi de: {rs_u81_lda.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_u81_lda.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_u81_lda = pd.DataFrame(rs_u81_lda.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_u81_lda = res1_rs_u81_lda.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_u81_lda = accf1_rs_u81_lda.filter(regex = 'accuracy').min()
max_acc_rs_u81_lda = accf1_rs_u81_lda.filter(regex = 'accuracy').max()
min_f1_rs_u81_lda = accf1_rs_u81_lda.filter(regex = 'f1').min()
max_f1_rs_u81_lda = accf1_rs_u81_lda.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_u81_lda:.6f}')
print(f'Acurácia max: {max_acc_rs_u81_lda:.6f}')
print(f'F1 min: {min_f1_rs_u81_lda:.6f}')
print(f'F1 max: {max_f1_rs_u81_lda:.6f}')

res2_rs_u81_lda = res1_rs_u81_lda.filter(regex = r'(mean|std|params)')
display(res2_rs_u81_lda.iloc[0])


In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_u81_lda, X_train_u81, y_train['Class'], y_train['Sample'], name = 'rs_u81_lda')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_u81_lda = rs_u81_lda.predict(X_test_u81)
acc_test_rs_u81_lda = accuracy_score(y_test["Class"], y_pred_rs_u81_lda)
f1_rs_u81_lda = f1_score(y_test["Class"], y_pred_rs_u81_lda, average = 'weighted')
print(f"Best LDA Accuracy: {acc_test_rs_u81_lda:.2f}")
print(f"Best LDA F1: {f1_rs_u81_lda:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_u81_lda = confusion_matrix(y_test['Class'], y_pred_rs_u81_lda)
disp_rs_u81_lda = ConfusionMatrixDisplay(confusion_matrix = cm_rs_u81_lda, display_labels = rs_u81_lda.classes_)
disp_rs_u81_lda.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_u81_lda.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u81_lda.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u81_lda.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_u81_lda.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM/cm_teste_u81_lda', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_u81_lda, target_names= rs_u81_lda.classes_))

#### **Modelo C2**: Usando features LBP Uniforme (P = 16, R = 2)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {'solver': ['lsqr', 'eigen'],
              'tol' : [0.0001, 0.0002, 0.0003]}

# Create the RandomizedSearchCV object
rs_u162_lda = RandomizedSearchCV(estimator = model_lda, param_distributions = param_grid,
                            scoring = metrics, cv = cv, refit = 'accuracy',
                            verbose = 3, return_train_score = False, n_iter = 50)

rs_u162_lda.fit(X_train_u162, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_u162_lda, open(f'./Output/Models/rs_u162_lda.pickle', "wb"))
joblib.dump(rs_u162_lda, open(f'./Output/Models/rs_u162_lda.joblib', "wb"))

print(f'Os melhores parâmetros foram: {rs_u162_lda.best_params_}')
print(f'A melhor acurácia foi de: {rs_u162_lda.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_u162_lda.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_u162_lda = pd.DataFrame(rs_u162_lda.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_u162_lda = res1_rs_u162_lda.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_u162_lda = accf1_rs_u162_lda.filter(regex = 'accuracy').min()
max_acc_rs_u162_lda = accf1_rs_u162_lda.filter(regex = 'accuracy').max()
min_f1_rs_u162_lda = accf1_rs_u162_lda.filter(regex = 'f1').min()
max_f1_rs_u162_lda = accf1_rs_u162_lda.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_u162_lda:.6f}')
print(f'Acurácia max: {max_acc_rs_u162_lda:.6f}')
print(f'F1 min: {min_f1_rs_u162_lda:.6f}')
print(f'F1 max: {max_f1_rs_u162_lda:.6f}')

res2_rs_u162_lda = res1_rs_u162_lda.filter(regex = r'(mean|std|params)')
display(res2_rs_u162_lda.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_u162_lda, X_train_u162, y_train['Class'], y_train['Sample'], name = 'rs_u162_lda')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_u162_lda = rs_u162_lda.predict(X_test_u162)
acc_test_rs_u162_lda = accuracy_score(y_test["Class"], y_pred_rs_u162_lda)
f1_rs_u162_lda = f1_score(y_test["Class"], y_pred_rs_u162_lda, average = 'weighted')
print(f"Best LDA Accuracy: {acc_test_rs_u162_lda:.2f}")
print(f"Best LDA F1: {f1_rs_u162_lda:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_u162_lda = confusion_matrix(y_test['Class'], y_pred_rs_u162_lda)
disp_rs_u162_lda = ConfusionMatrixDisplay(confusion_matrix = cm_rs_u162_lda, display_labels = rs_u162_lda.classes_)
disp_rs_u162_lda.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_u162_lda.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u162_lda.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u162_lda.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_u162_lda.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM/cm_teste_u162_lda', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_u162_lda, target_names= rs_u162_lda.classes_))

#### **Modelo C3**: Usando features LBP Uniforme (P = 24, R = 3)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {'solver': ['lsqr', 'eigen'],
              'tol' : [0.0001, 0.0002, 0.0003]}

# Create the RandomizedSearchCV object
rs_u243_lda = RandomizedSearchCV(estimator = model_lda, param_distributions = param_grid,
                            scoring = metrics, cv = cv, refit = 'accuracy',
                            verbose = 3, return_train_score = False, n_iter = 50)

rs_u243_lda.fit(X_train_u243, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_u243_lda, open(f'./Output/Models/rs_u243_lda.pickle', "wb"))
joblib.dump(rs_u243_lda, open(f'./Output/Models/rs_u243_lda.joblib', "wb"))

print(f'Os melhores parâmetros foram: {rs_u243_lda.best_params_}')
print(f'A melhor acurácia foi de: {rs_u243_lda.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_u243_lda.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_u243_lda = pd.DataFrame(rs_u243_lda.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_u243_lda = res1_rs_u243_lda.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_u243_lda = accf1_rs_u162_lda.filter(regex = 'accuracy').min()
max_acc_rs_u243_lda = accf1_rs_u162_lda.filter(regex = 'accuracy').max()
min_f1_rs_u243_lda = accf1_rs_u162_lda.filter(regex = 'f1').min()
max_f1_rs_u243_lda = accf1_rs_u162_lda.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_u243_lda:.6f}')
print(f'Acurácia max: {max_acc_rs_u243_lda:.6f}')
print(f'F1 min: {min_f1_rs_u243_lda:.6f}')
print(f'F1 max: {max_f1_rs_u243_lda:.6f}')

res2_rs_u243_lda = res1_rs_u243_lda.filter(regex = r'(mean|std|params)')
display(res2_rs_u243_lda.iloc[0])


In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_u243_lda, X_train_u243, y_train['Class'], y_train['Sample'], name = 'rs_u243_lda')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_u243_lda = rs_u243_lda.predict(X_test_u243)
acc_test_rs_u243_lda = accuracy_score(y_test["Class"], y_pred_rs_u243_lda)
f1_rs_u243_lda = f1_score(y_test["Class"], y_pred_rs_u243_lda, average = 'weighted')
print(f"Best LDA Accuracy: {acc_test_rs_u243_lda:.2f}")
print(f"Best LDA F1: {f1_rs_u243_lda:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_u243_lda = confusion_matrix(y_test['Class'], y_pred_rs_u243_lda)
disp_rs_u243_lda = ConfusionMatrixDisplay(confusion_matrix = cm_rs_u243_lda, display_labels = rs_u243_lda.classes_)
disp_rs_u243_lda.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_u243_lda.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u243_lda.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_u243_lda.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_u243_lda.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM/cm_teste_u243_lda', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_u243_lda, target_names= rs_u243_lda.classes_))

#### **Modelo C4**: Usando features LBP Uniforme (Todos)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {'solver': ['lsqr', 'eigen'],
              'tol' : [0.0001, 0.0002, 0.0003]}

# Create the RandomizedSearchCV object
rs_uni_lda = RandomizedSearchCV(estimator = model_lda, param_distributions = param_grid,
                            scoring = metrics, cv = cv, refit = 'accuracy',
                            verbose = 3, return_train_score = False, n_iter = 50)

rs_uni_lda.fit(X_train_u, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_uni_lda, open(f'./Output/Models/rs_uni_lda.pickle', "wb"))
joblib.dump(rs_uni_lda, open(f'./Output/Models/rs_uni_lda.joblib', "wb"))

print(f'Os melhores parâmetros foram: {rs_uni_lda.best_params_}')
print(f'A melhor acurácia foi de: {rs_uni_lda.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_uni_lda.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_uni_lda = pd.DataFrame(rs_uni_lda.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_uni_lda = res1_rs_uni_lda.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_uni_lda = accf1_rs_uni_lda.filter(regex = 'accuracy').min()
max_acc_rs_uni_lda = accf1_rs_uni_lda.filter(regex = 'accuracy').max()
min_f1_rs_uni_lda = accf1_rs_uni_lda.filter(regex = 'f1').min()
max_f1_rs_uni_lda = accf1_rs_uni_lda.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_uni_lda:.6f}')
print(f'Acurácia max: {max_acc_rs_uni_lda:.6f}')
print(f'F1 min: {min_f1_rs_uni_lda:.6f}')
print(f'F1 max: {max_f1_rs_uni_lda:.6f}')

res2_rs_uni_lda = res1_rs_uni_lda.filter(regex = r'(mean|std|params)')
display(res2_rs_uni_lda.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_uni_lda, X_train_u, y_train['Class'], y_train['Sample'], name = 'rs_uni_lda')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_uni_lda = rs_uni_lda.predict(X_test_u)
acc_test_rs_uni_lda = accuracy_score(y_test["Class"], y_pred_rs_uni_lda)
f1_rs_uni_lda = f1_score(y_test["Class"], y_pred_rs_uni_lda, average = 'weighted')
print(f"Best LDA Accuracy: {acc_test_rs_uni_lda:.2f}")
print(f"Best LDA F1: {f1_rs_uni_lda:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_uni_lda = confusion_matrix(y_test['Class'], y_pred_rs_uni_lda)
disp_rs_uni_lda = ConfusionMatrixDisplay(confusion_matrix = cm_rs_uni_lda, display_labels = rs_uni_lda.classes_)
disp_rs_uni_lda.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_uni_lda.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_uni_lda.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_uni_lda.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_uni_lda.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM/cm_teste_uni_lda', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_uni_lda, target_names= rs_uni_lda.classes_))

#### **Modelo C5**: Usando features LBP Uniforme Não Invariante (P = 8, R = 1)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {'solver': ['lsqr', 'eigen'],
              'tol' : [0.0001, 0.0002, 0.0003]}

# Create the RandomizedSearchCV object
rs_nri81_lda = RandomizedSearchCV(estimator = model_lda, param_distributions = param_grid,
                            scoring = metrics, cv = cv, refit = 'accuracy',
                            verbose = 3, return_train_score = False, n_iter = 50)

rs_nri81_lda.fit(X_train_nri81, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_nri81_lda, open(f'./Output/Models/rs_nri81_lda.pickle', "wb"))
joblib.dump(rs_nri81_lda, open(f'./Output/Models/rs_nri81_lda.joblib', "wb"))

print(f'Os melhores parâmetros foram: {rs_nri81_lda.best_params_}')
print(f'A melhor acurácia foi de: {rs_nri81_lda.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_nri81_lda.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_nri81_lda = pd.DataFrame(rs_nri81_lda.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_nri81_lda = res1_rs_nri81_lda.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_nri81_lda = accf1_rs_nri81_lda.filter(regex = 'accuracy').min()
max_acc_rs_nri81_lda = accf1_rs_nri81_lda.filter(regex = 'accuracy').max()
min_f1_rs_nri81_lda = accf1_rs_nri81_lda.filter(regex = 'f1').min()
max_f1_rs_nri81_lda = accf1_rs_nri81_lda.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_nri81_lda:.6f}')
print(f'Acurácia max: {max_acc_rs_nri81_lda:.6f}')
print(f'F1 min: {min_f1_rs_nri81_lda:.6f}')
print(f'F1 max: {max_f1_rs_nri81_lda:.6f}')

res2_rs_nri81_lda = res1_rs_nri81_lda.filter(regex = r'(mean|std|params)')
display(res2_rs_nri81_lda.iloc[0])


In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_nri81_lda, X_train_nri81, y_train['Class'], y_train['Sample'], name = 'rs_nri81_lda')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_nri81_lda = rs_nri81_lda.predict(X_test_nri81)
acc_test_rs_nri81_lda = accuracy_score(y_test["Class"], y_pred_rs_nri81_lda)
f1_rs_nri81_lda = f1_score(y_test["Class"], y_pred_rs_nri81_lda, average = 'weighted')
print(f"Best LDA Accuracy: {acc_test_rs_nri81_lda:.2f}")
print(f"Best LDA F1: {f1_rs_nri81_lda:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_nri81_lda = confusion_matrix(y_test['Class'], y_pred_rs_nri81_lda)
disp_rs_nri81_lda = ConfusionMatrixDisplay(confusion_matrix = cm_rs_nri81_lda, display_labels = rs_nri81_lda.classes_)
disp_rs_nri81_lda.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_nri81_lda.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri81_lda.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri81_lda.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_nri81_lda.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM/cm_teste_nri81_lda', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_nri81_lda, target_names= rs_nri81_lda.classes_))

#### **Modelo C6**: Usando features LBP Uniforme Não Invariante (P = 16, R = 2)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {'solver': ['lsqr', 'eigen'],
              'tol' : [0.0001, 0.0002, 0.0003]}

# Create the RandomizedSearchCV object
rs_nri162_lda = RandomizedSearchCV(estimator = model_lda, param_distributions = param_grid,
                            scoring = metrics, cv = cv, refit = 'accuracy',
                            verbose = 3, return_train_score = False, n_iter = 50)

rs_nri162_lda.fit(X_train_nri162, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_nri162_lda, open(f'./Output/Models/rs_nri162_lda.pickle', "wb"))
joblib.dump(rs_nri162_lda, open(f'./Output/Models/rs_nri162_lda.joblib', "wb"))

print(f'Os melhores parâmetros foram: {rs_nri162_lda.best_params_}')
print(f'A melhor acurácia foi de: {rs_nri162_lda.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_nri162_lda.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_nri162_lda = pd.DataFrame(rs_nri162_lda.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_nri162_lda = res1_rs_nri162_lda.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_nri162_lda = accf1_rs_nri162_lda.filter(regex = 'accuracy').min()
max_acc_rs_nri162_lda = accf1_rs_nri162_lda.filter(regex = 'accuracy').max()
min_f1_rs_nri162_lda = accf1_rs_nri162_lda.filter(regex = 'f1').min()
max_f1_rs_nri162_lda = accf1_rs_nri162_lda.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_nri162_lda:.6f}')
print(f'Acurácia max: {max_acc_rs_nri162_lda:.6f}')
print(f'F1 min: {min_f1_rs_nri162_lda:.6f}')
print(f'F1 max: {max_f1_rs_nri162_lda:.6f}')

res2_rs_nri162_lda = res1_rs_nri162_lda.filter(regex = r'(mean|std|params)')
display(res2_rs_nri162_lda.iloc[0])


In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_nri162_lda, X_train_nri162, y_train['Class'], y_train['Sample'], name = 'rs_nri162_lda')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_nri162_lda = rs_nri162_lda.predict(X_test_nri162)
acc_test_rs_nri162_lda = accuracy_score(y_test["Class"], y_pred_rs_nri162_lda)
f1_rs_nri162_lda = f1_score(y_test["Class"], y_pred_rs_nri162_lda, average = 'weighted')
print(f"Best LDA Accuracy: {acc_test_rs_nri162_lda:.2f}")
print(f"Best LDA F1: {f1_rs_nri162_lda:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_nri162_lda = confusion_matrix(y_test['Class'], y_pred_rs_nri162_lda)
disp_rs_nri162_lda = ConfusionMatrixDisplay(confusion_matrix = cm_rs_nri162_lda, display_labels = rs_nri162_lda.classes_)
disp_rs_nri162_lda.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_nri162_lda.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri162_lda.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri162_lda.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_nri162_lda.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM/cm_teste_nri162_lda', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_nri162_lda, target_names= rs_nri162_lda.classes_))

#### **Modelo C7**: Usando features LBP Uniforme Não Invariante (P = 24, R = 3)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {'solver': ['lsqr', 'eigen'],
              'tol' : [0.0001, 0.0002, 0.0003]}

# Create the RandomizedSearchCV object
rs_nri243_lda = RandomizedSearchCV(estimator = model_lda, param_distributions = param_grid,
                            scoring = metrics, cv = cv, refit = 'accuracy',
                            verbose = 3, return_train_score = False, n_iter = 50)

rs_nri243_lda.fit(X_train_nri243, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_nri243_lda, open(f'./Output/Models/rs_nri243_lda.pickle', "wb"))
joblib.dump(rs_nri243_lda, open(f'./Output/Models/rs_nri243_lda.joblib', "wb"))

print(f'Os melhores parâmetros foram: {rs_nri243_lda.best_params_}')
print(f'A melhor acurácia foi de: {rs_nri243_lda.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_nri243_lda.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_nri243_lda = pd.DataFrame(rs_nri243_lda.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_nri243_lda = res1_rs_nri243_lda.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_nri243_lda = accf1_rs_nri243_lda.filter(regex = 'accuracy').min()
max_acc_rs_nri243_lda = accf1_rs_nri243_lda.filter(regex = 'accuracy').max()
min_f1_rs_nri243_lda = accf1_rs_nri243_lda.filter(regex = 'f1').min()
max_f1_rs_nri243_lda = accf1_rs_nri243_lda.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_nri243_lda:.6f}')
print(f'Acurácia max: {max_acc_rs_nri243_lda:.6f}')
print(f'F1 min: {min_f1_rs_nri243_lda:.6f}')
print(f'F1 max: {max_f1_rs_nri243_lda:.6f}')

res2_rs_nri243_lda = res1_rs_nri243_lda.filter(regex = r'(mean|std|params)')
display(res2_rs_nri243_lda.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_nri243_lda, X_train_nri243, y_train['Class'], y_train['Sample'], name = 'rs_nri243_lda')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_nri243_lda = rs_nri243_lda.predict(X_test_nri243)
acc_test_rs_nri243_lda = accuracy_score(y_test["Class"], y_pred_rs_nri243_lda)
f1_rs_nri243_lda = f1_score(y_test["Class"], y_pred_rs_nri243_lda, average = 'weighted')
print(f"Best LDA Accuracy: {acc_test_rs_nri243_lda:.2f}")
print(f"Best LDA F1: {f1_rs_nri243_lda:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_nri243_lda = confusion_matrix(y_test['Class'], y_pred_rs_nri243_lda)
disp_rs_nri243_lda = ConfusionMatrixDisplay(confusion_matrix = cm_rs_nri243_lda, display_labels = rs_nri243_lda.classes_)
disp_rs_nri243_lda.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_nri243_lda.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri243_lda.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri243_lda.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_nri243_lda.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM/cm_teste_nri243_lda', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_nri243_lda, target_names= rs_nri243_lda.classes_))

#### **Modelo C8**: Usando features LBP Uniforme Não Invariante (Todos)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {'solver': ['lsqr', 'eigen'],
              'tol' : [0.0001, 0.0002, 0.0003]}

# Create the RandomizedSearchCV object
rs_nri_lda = RandomizedSearchCV(estimator = model_lda, param_distributions = param_grid,
                            scoring = metrics, cv = cv, refit = 'accuracy',
                            verbose = 3, return_train_score = False, n_iter = 50)

rs_nri_lda.fit(X_train_nri, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_nri_lda, open(f'./Output_patches/Models/rs_nri_lda.pickle', "wb"))
joblib.dump(rs_nri_lda, open(f'./Output_patches/Models/rs_nri_lda.joblib', "wb"))

print(f'Os melhores parâmetros foram: {rs_nri_lda.best_params_}')
print(f'A melhor acurácia foi de: {rs_nri_lda.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_nri_lda.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
rs_nri_lda = pickle.load(open('./Output_patches/Models/rs_nri_lda.pickle', "rb"))
res1_rs_nri_lda = pd.DataFrame(rs_nri_lda.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_nri_lda = res1_rs_nri_lda.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_nri_lda = accf1_rs_nri_lda.filter(regex = 'accuracy').min()
max_acc_rs_nri_lda = accf1_rs_nri_lda.filter(regex = 'accuracy').max()
min_f1_rs_nri_lda = accf1_rs_nri_lda.filter(regex = 'f1').min()
max_f1_rs_nri_lda = accf1_rs_nri_lda.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_nri_lda:.6f}')
print(f'Acurácia max: {max_acc_rs_nri_lda:.6f}')
print(f'F1 min: {min_f1_rs_nri_lda:.6f}')
print(f'F1 max: {max_f1_rs_nri_lda:.6f}')

res2_rs_nri_lda = res1_rs_nri_lda.filter(regex = r'(mean|std|params)')
display(res2_rs_nri_lda.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_nri_lda, X_train_nri, y_train['Class'], y_train['Sample'], name = 'rs_nri_lda')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_nri_lda = rs_nri_lda.predict(X_test_nri)
acc_test_rs_nri_lda = accuracy_score(y_test["Class"], y_pred_rs_nri_lda)
f1_rs_nri_lda = f1_score(y_test["Class"], y_pred_rs_nri_lda, average = 'weighted')
print(f"Best LDA Accuracy: {acc_test_rs_nri_lda:.2f}")
print(f"Best LDA F1: {f1_rs_nri_lda:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_nri_lda = confusion_matrix(y_test['Class'], y_pred_rs_nri_lda)
disp_rs_nri_lda = ConfusionMatrixDisplay(confusion_matrix = cm_rs_nri_lda, display_labels = rs_nri_lda.classes_)
disp_rs_nri_lda.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_nri_lda.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri_lda.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_nri_lda.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_nri_lda.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output_patches/CM/cm_teste_nri_lda', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_nri_lda, target_names= rs_nri_lda.classes_))

#### **Modelo C9**: Usando features LBP (Todos)

In [None]:
# ================================================================================
# Ajuste de Hiperparâmetros (Hyperparameter tuning) - RandomizedSearchCV()
# ================================================================================

param_grid = {'solver': ['lsqr', 'eigen'],
              'tol' : [0.0001, 0.0002, 0.0003]}

# Create the RandomizedSearchCV object
rs_unri_lda = RandomizedSearchCV(estimator = model_lda, param_distributions = param_grid,
                            scoring = metrics, cv = cv, refit = 'accuracy',
                            verbose = 3, return_train_score = False, n_iter = 50)

rs_unri_lda.fit(X_train_unri, y_train['Class'], groups = y_train['Sample'])

# Salvar modelo
pickle.dump(rs_unri_lda, open(f'./Output/Models/rs_unri_lda.pickle', "wb"))
joblib.dump(rs_unri_lda, open(f'./Output/Models/rs_unri_lda.joblib', "wb"))

print(f'Os melhores parâmetros foram: {rs_unri_lda.best_params_}')
print(f'A melhor acurácia foi de: {rs_unri_lda.best_score_:.4f}')
print(f'O melhor modelo foi: {rs_unri_lda.best_estimator_}')

In [None]:
# ================================================================================
# Desempenho na Validação Cruzada (Stratified Group 5-fold cross-validation)
# ================================================================================
#rs_svc = pickle.load(open('./Output/Models/rs_svc.pickle', "rb"))
res1_rs_unri_lda = pd.DataFrame(rs_unri_lda.cv_results_).filter(regex = r'(params|test)').sort_values(by=["rank_test_accuracy"])

accf1_rs_unri_lda = res1_rs_unri_lda.iloc[0].filter(regex = r'(accuracy|f1)').filter(regex = 'split')
min_acc_rs_unri_lda = accf1_rs_unri_lda.filter(regex = 'accuracy').min()
max_acc_rs_unri_lda = accf1_rs_unri_lda.filter(regex = 'accuracy').max()
min_f1_rs_unri_lda = accf1_rs_unri_lda.filter(regex = 'f1').min()
max_f1_rs_unri_lda = accf1_rs_unri_lda.filter(regex = 'f1').max()

print(f'Acurácia min: {min_acc_rs_unri_lda:.6f}')
print(f'Acurácia max: {max_acc_rs_unri_lda:.6f}')
print(f'F1 min: {min_f1_rs_unri_lda:.6f}')
print(f'F1 max: {max_f1_rs_unri_lda:.6f}')

res2_rs_unri_lda = res1_rs_unri_lda.filter(regex = r'(mean|std|params)')
display(res2_rs_unri_lda.iloc[0])

In [None]:
# ================================================================================
# Matriz de Confusão - Validação Cruzada
# ================================================================================
cm_cv(rs_unri_lda, X_train_unri, y_train['Class'], y_train['Sample'], name = 'rs_unri_lda')

In [None]:
# ================================================================================
# Desempenho no conjunto de teste
# ================================================================================
y_pred_rs_unri_lda = rs_unri_lda.predict(X_test_unri)
acc_test_rs_unri_lda = accuracy_score(y_test["Class"], y_pred_rs_unri_lda)
f1_rs_unri_lda = f1_score(y_test["Class"], y_pred_rs_unri_lda, average = 'weighted')
print(f"Best LDA Accuracy: {acc_test_rs_unri_lda:.2f}")
print(f"Best LDA F1: {f1_rs_unri_lda:.6f}")

In [None]:
# ================================================================================
# Matriz de Confusão - conjunto de teste (n = 566)
# ================================================================================
cm_rs_unri_lda = confusion_matrix(y_test['Class'], y_pred_rs_unri_lda)
disp_rs_unri_lda = ConfusionMatrixDisplay(confusion_matrix = cm_rs_unri_lda, display_labels = rs_unri_lda.classes_)
disp_rs_unri_lda.plot(cmap = 'Blues', xticks_rotation = 90)
disp_rs_unri_lda.ax_.set_xlabel('Predito', fontsize=10, style='normal', fontweight = 'bold')
disp_rs_unri_lda.ax_.set_ylabel("Observado", fontsize=10, style='normal', fontweight = 'bold')
disp_rs_unri_lda.ax_.xaxis.set_tick_params(labelsize=10)
disp_rs_unri_lda.ax_.yaxis.set_tick_params(labelsize=10)
plt.yticks(style='italic')
plt.xticks(style='italic')

plt.savefig(f'./Output/CM/cm_teste_unri_lda', dpi = 600, bbox_inches='tight')
plt.show()

In [None]:
# ================================================================================
# Relatório de classificação (Conjunto de teste)
# ================================================================================
print(classification_report(y_test['Class'], y_pred_rs_unri_lda, target_names= rs_unri_lda.classes_))