In [3]:
from google.colab import drive 
drive.mount('/content/gdrive')
data_dir = "gdrive/MyDrive/PROJETO/NovoDATASET2"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


Carregar dados


In [7]:
import cv2
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from skimage.morphology import thin
#from sklearn.model_selection import cross_val_score
import os

# Defina a classe CustomTransform para o pré-processamento das imagens
class CustomTransform(object):
    def __call__(self, img):
        img = np.array(img, dtype=np.uint8)
        img = cv2.resize(img, (300, 300))
        bilateral = cv2.bilateralFilter(img, 9, 75, 75)
        gray = cv2.cvtColor(bilateral, cv2.COLOR_BGR2GRAY)
        edges = cv2.Canny(gray, 100, 200)
        edges_inv = cv2.bitwise_not(edges)
        kernel = np.ones((3, 3), np.uint8)
        opening = cv2.morphologyEx(edges_inv, cv2.MORPH_OPEN, kernel)
        gradient = cv2.morphologyEx(opening, cv2.MORPH_GRADIENT, kernel)
        skel = thin(gradient, max_num_iter=1)
        result = skel.astype('uint8')
        return result.flatten()

# Criar listas para armazenar as amostras e os rótulos
samples = []
labels = []

# Percorrer os conjuntos de dados (TRAIN, VAL, TEST)
for dataset_folder in ['TRAIN', 'VAL', 'TEST']:
    dataset_path = os.path.join(data_dir, dataset_folder)
    class_folders = os.listdir(dataset_path)
    for label, class_folder in enumerate(class_folders):
        class_path = os.path.join(dataset_path, class_folder)
        image_files = os.listdir(class_path)
        for image_file in image_files:
            image_path = os.path.join(class_path, image_file)
            img = cv2.imread(image_path)
            transformed_img = CustomTransform()(img)
            samples.append(transformed_img)
            labels.append(label)

# Converter as listas para arrays numpy
samples = np.array(samples)
labels = np.array(labels)

n_train_samples = 736
n_val_samples = 174

# Dividir o conjunto de dados em treinamento, validação e teste
train_samples = samples[:n_train_samples]
train_labels = labels[:n_train_samples]
val_samples = samples[n_train_samples:n_train_samples + n_val_samples]
val_labels = labels[n_train_samples:n_train_samples + n_val_samples]
test_samples = samples[n_train_samples + n_val_samples:]
test_labels = labels[n_train_samples + n_val_samples:]

print(train_samples[0])

[0 0 0 ... 0 0 0]


[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


KNN

In [11]:
# Criar e treinar o modelo KNN
knn = KNeighborsClassifier(n_neighbors=5)

# Realizar a validação cruzada durante o treinamento
#cv_scores = cross_val_score(knn, train_samples, train_labels, cv=5)
#print('Cross-Validation Scores:', cv_scores)
#print('Mean Cross-Validation Score:', np.mean(cv_scores))

# Treinar o modelo KNN no conjunto de treinamento completo
knn.fit(train_samples, train_labels)

# Realizar previsões no conjunto de validação
val_predictions = knn.predict(val_samples)

# Calcular a acurácia das previsões no conjunto de validação
#val_accuracy = np.mean(val_predictions == val_labels)
val_accuracy = accuracy_score(val_predictions, val_labels)
val_f1 = f1_score(val_predictions, val_labels, average='weighted')

print('Validation Accuracy: {:.4f}'.format(val_accuracy))
print('Validation F1-Score: {:.4f}'.format(val_f1))

#Matriz de confusão - Treinamento:
print('\nConfusion Matrix:')
print(confusion_matrix(val_predictions, val_labels))


# Realizar previsões no conjunto de teste
#test_predictions = knn.predict(test_samples)

# Calcular a acurácia das previsões no conjunto de teste
#test_accuracy = np.mean(test_predictions == test_labels)
#print('Test Accuracy: {:.4f}'.format(test_accuracy))

Validation Accuracy: 0.2241
Validation F1-Score: 0.3151

Confusion Matrix:
[[ 2  0  0  0  0]
 [32 15 42  4 18]
 [ 0  0  0  0  0]
 [ 6 14  6 22 13]
 [ 0  0  0  0  0]]
