In [None]:
import os
import numpy as np
import cv2
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, recall_score, log_loss


In [None]:
# Define paths
base_dir = 'D:\PKG - C-NMC 2019\C-NMC_training_data'
folds = ['fold_0', 'fold_1', 'fold_2']
categories = ['all', 'hem']

# Parameters
img_width, img_height = 150, 150


In [None]:
def load_images_and_labels(base_dir, folds, categories, img_width, img_height):
    images = []
    labels = []
    for fold in folds:
        for category in categories:
            category_dir = os.path.join(base_dir, fold, category)
            label = 0 if category == 'all' else 1
            for filename in os.listdir(category_dir):
                img_path = os.path.join(category_dir, filename)
                img = cv2.imread(img_path)
                if img is not None:
                    img = cv2.resize(img, (img_width, img_height))
                    images.append(img)
                    labels.append(label)
    return np.array(images), np.array(labels)

# Load images and labels
images, labels = load_images_and_labels(base_dir, folds, categories, img_width, img_height)


In [None]:
def extract_hog_features(images):
    hog_features = []
    for image in images:
        feature = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), multichannel=True)
        hog_features.append(feature)
    return np.array(hog_features)

# Extract HOG features
features = extract_hog_features(images)


In [None]:
# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(features, labels, test_size=0.2, random_state=42)


In [None]:
# Define K-Nearest Neighbors (KNN) Classifier
knn = KNeighborsClassifier(n_neighbors=5)

# Train the KNN classifier
knn.fit(X_train, y_train)


In [None]:
# Validate the model
y_pred = knn.predict(X_val)
y_pred_proba = knn.predict_proba(X_val)

# Calculate metrics
accuracy = accuracy_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
loss = log_loss(y_val, y_pred_proba)

print(f'Validation Accuracy: {accuracy:.2f}')
print(f'Validation Recall: {recall:.2f}')
print(f'Validation Log Loss: {loss:.2f}')
