In [7]:
import cv2
import os
import numpy as np
from skimage.feature import hog
from sklearn.neural_network import MLPClassifier
from sklearn import metrics
from sklearn.model_selection import StratifiedKFold, GridSearchCV

# Define the path to the preprocessed data folder
disease = 'C:/Users/samue/Downloads/Disease'

# Define the HOG parameters
hog_params = dict(orientations=9,
                  pixels_per_cell=(8, 8),
                  cells_per_block=(3, 3),
                  block_norm='L2-Hys')

# Initialize empty feature and label lists
features, labels = [], []

# Loop through each subdirectory and extract HOG features for each image
for label in ['PNEUMONIA', 'TURBERCULOSIS', 'COVID19']:
    path = os.path.join(disease, label)
    for image_file in os.listdir(path):
        # Load the image and reduce its size
        img = cv2.resize(cv2.imread(os.path.join(path, image_file)), (244, 244))

        # Extract HOG features and append them with the corresponding label
        features.append(hog(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), **hog_params))
        labels.append(label)

# Convert features and labels to numpy arrays
features, labels = np.array(features), np.array(labels)

# Define the hyperparameter space to search over
hyperparameters = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50)],
    'max_iter': [500, 1000],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'sgd']
}

# Initialize the FCNN classifier
clf = MLPClassifier(random_state=42)

# Initialize the GridSearchCV object
grid_search = GridSearchCV(clf, hyperparameters, cv=10, n_jobs=-1)

# Fit the GridSearchCV object to the data
grid_search.fit(features, labels)

# Print the best hyperparameters and the corresponding accuracy score
print("Best hyperparameters:", grid_search.best_params_)
print("Best accuracy:", grid_search.best_score_)

# Initialize the StratifiedKFold cross validator and empty list to store accuracy scores
skf, accuracy_scores = StratifiedKFold(n_splits=10, shuffle=True, random_state=42), []

# Loop through each fold and train the classifier
for fold_idx, (train_idx, test_idx) in enumerate(skf.split(features, labels)):
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = features[train_idx], features[test_idx], labels[train_idx], labels[test_idx]

    # Train the classifier and evaluate it on the testing set
    clf = MLPClassifier(**grid_search.best_params_, random_state=42)
    clf.fit(X_train, y_train)
    accuracy_scores.append(metrics.accuracy_score(y_test, clf.predict(X_test)))

    #Print the accuracy score for the current fold
    print(f"Fold {fold_idx+1}: Accuracy = {accuracy_scores[-1]}")

# Print the average accuracy score across all folds
print(f"\nAverage accuracy = {np.mean(accuracy_scores)}")

#Evaluate the classifier using the entire dataset
clf = MLPClassifier(**grid_search.best_params_, random_state=42)
clf.fit(features, labels)
y_pred = clf.predict(features)

#Calculate and print the evaluation metrics
evaluation_metrics = {
"Accuracy": accuracy_score(labels, y_pred),
"F1 score": f1_score(labels, y_pred, average='weighted'),
"Cohen's Kappa score": cohen_kappa_score(labels, y_pred),
}
for metric_name, metric_value in evaluation_metrics.items():
    print(f"{metric_name}: {metric_value}")

#Print the confusion matrix for the classifier
confusion_matrix = confusion_matrix(labels, y_pred)
print("\nConfusion Matrix:")
print(confusion_matrix)

#Calculate and print the specificity and sensitivity
TP = np.diag(confusion_matrix)
FP = confusion_matrix.sum(axis=0) - TP
FN = confusion_matrix.sum(axis=1) - TP
TN = confusion_matrix.sum() - (TP + FP + FN)

specificity = TN / (TN + FP)
sensitivity = TP / (TP + FN)

print("Specificity:", specificity)
print("Sensitivity:", sensitivity)


Best hyperparameters: {'activation': 'relu', 'hidden_layer_sizes': (100, 50), 'max_iter': 500, 'solver': 'adam'}
Best accuracy: 0.9776529338327091
Fold 1: Accuracy = 0.9555555555555556
Fold 2: Accuracy = 0.9555555555555556
Fold 3: Accuracy = 1.0
Fold 4: Accuracy = 0.9777777777777777
Fold 5: Accuracy = 1.0
Fold 6: Accuracy = 0.9777777777777777
Fold 7: Accuracy = 0.9777777777777777
Fold 8: Accuracy = 1.0
Fold 9: Accuracy = 1.0
Fold 10: Accuracy = 0.9772727272727273

Average accuracy = 0.9821717171717171
Accuracy: 1.0
F1 score: 1.0
Cohen's Kappa score: 1.0

Confusion Matrix:
[[149   0   0]
 [  0 149   0]
 [  0   0 150]]
Specificity: [1. 1. 1.]
Sensitivity: [1. 1. 1.]
