In [12]:
import os
import numpy as np
import cv2
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, recall_score, log_loss


In [13]:
# Define paths
base_dir = 'D:\PKG - C-NMC 2019\C-NMC_training_data'
folds = ['fold_0', 'fold_1', 'fold_2']
categories = ['all', 'hem']

# Parameters
img_width, img_height = 150, 150  # Image size 128x128 pixels


In [14]:
def load_images_and_labels(base_dir, folds, categories, img_width, img_height):
    images = []
    labels = []
    for fold in folds:
        for category in categories:
            category_dir = os.path.join(base_dir, fold, category)
            label = 0 if category == 'all' else 1
            for filename in os.listdir(category_dir):
                img_path = os.path.join(category_dir, filename)
                img = cv2.imread(img_path)
                if img is not None:
                    img = cv2.resize(img, (img_width, img_height))
                    images.append(img)
                    labels.append(label)
    return np.array(images), np.array(labels)


In [15]:
def extract_hog_features(images):
    hog_features = []
    for image in images:
        # Extract HOG features for each color channel (R, G, B)
        hog_r = hog(image[:, :, 0], pixels_per_cell=(8, 8), cells_per_block=(2, 2))
        hog_g = hog(image[:, :, 1], pixels_per_cell=(8, 8), cells_per_block=(2, 2))
        hog_b = hog(image[:, :, 2], pixels_per_cell=(8, 8), cells_per_block=(2, 2))
        
        # Concatenate HOG features from all three channels
        hog_features.append(np.hstack([hog_r, hog_g, hog_b]))
    
    return np.array(hog_features)



In [16]:
# Load images and labels
images, labels = load_images_and_labels(base_dir, folds, categories, img_width, img_height)


In [17]:
# Extract HOG features
features = extract_hog_features(images)


In [18]:
# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(features, labels, test_size=0.2, random_state=42)


In [19]:
# Define base models
knn = KNeighborsClassifier(n_neighbors=5)
svm = SVC(probability=True, kernel='linear', random_state=42)
random_forest = RandomForestClassifier(n_estimators=100, random_state=42)


In [20]:
# Define meta-model
meta_model = LogisticRegression()


In [21]:
# Define stacking classifier (combination of SVM, KNN, and Random Forest)
stacking_classifier = StackingClassifier(
    estimators=[
        ('svm', svm),
        ('knn', knn),
        ('rf', random_forest)
    ],
    final_estimator=meta_model,
    cv=5
)

# Train the stacking classifier
stacking_classifier.fit(X_train, y_train)


In [22]:
# Validate the model
y_pred = stacking_classifier.predict(X_val)
y_pred_proba = stacking_classifier.predict_proba(X_val)

# Calculate metrics
accuracy = accuracy_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
loss = log_loss(y_val, y_pred_proba)

print(f'Validation Accuracy: {accuracy:.2f}')
print(f'Validation Recall: {recall:.2f}')
print(f'Validation Log Loss: {loss:.2f}')


Validation Accuracy: 0.84
Validation Recall: 0.64
Validation Log Loss: 0.40
