In [None]:
# IMPORTS #
import os
import cv2
import numpy as np
from skimage import feature, transform
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, roc_curve, auc, accuracy_score
from sklearn.cluster import KMeans
from skimage.transform import resize
from keras.preprocessing.image import ImageDataGenerator


# FEATURE EXTRACTION & preprocessing#
# FEATURE EXTRACTION FUNCTION
def extract_features(image):
    # Apply histogram equalization for contrast enhancement
    image = cv2.equalizeHist(image)
    # Normalize pixel values to the range [0, 1]
    image = image / 255.0
    # Resize the image to a fixed size (e.g., 64x64)
    image = resize(image, (64, 64), mode='constant', anti_aliasing=True)
    
    # Extract HOG features
    hog_features, _ = feature.hog(image, orientations=9, pixels_per_cell=(8, 8),
                                  cells_per_block=(2, 2), block_norm='L2-Hys', visualize=True)
    
    # Extract LBP features
    lbp_features = feature.local_binary_pattern(image, P=8, R=1, method='uniform')
    
    # Concatenate HOG and LBP features
    combined_features = np.concatenate([hog_features, lbp_features.flatten()])
    
    return combined_features


# DATA READING & LIST CREATION #
# DATASET LOADING AND READING
dataset_path = 'C:\programing\Ml project\Dataset3'
class_folders = os.listdir(dataset_path)

# Lists to store features and labels
features = []
labels = []

max_feature_length = 0  # Initialize variable to store maximum feature length

# Loop through each class folder
for class_folder in class_folders:
    class_path = os.path.join(dataset_path, class_folder)
    
    # Loop through each image in the class folder
    for image_name in os.listdir(class_path):
        image_path = os.path.join(class_path, image_name)
        
        # Load the image
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE).astype(np.uint8)  # Convert to grayscale
        
        # Extract HOG features
        extracted_features = extract_features(image)
        
        # Update max_feature_length if the current feature length is greater
        max_feature_length = max(max_feature_length, len(extracted_features))
        
        # Append features to the list
        features.append(extracted_features)
        
        # Append the label to the list
        labels.append(class_folder)


# FEATURE RESIZING #
# Resize features to a fixed size
resized_features = [resize(feature, (max_feature_length,), mode='constant', anti_aliasing=True) for feature in features]
features = np.array(resized_features)

# Convert lists to numpy arrays
labels = np.array(labels)


# USING INCODERS #
# Encode class labels
label_encoder = LabelEncoder()
labels_numeric = label_encoder.fit_transform(labels)

# Data augmentation using Keras ImageDataGenerator
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Initialize StratifiedKFold for cross-validation
kf = StratifiedKFold(n_splits=5, shuffle=True)

# Create KMeans clustering model
kmeans = KMeans(n_clusters=5, random_state=0)

# Initialize lists to store evaluation metrics for each fold
conf_matrices = []
tpr_list = []
fpr_list = []
roc_auc_list = []
accuracies = []


# DATA SPLITING & CROSS VALIDATION #
# REPORT #
# Perform cross-validation
for fold, (train_index, test_index) in enumerate(kf.split(features, labels_numeric), start=1):
    X_train, X_test = features[train_index], features[test_index]
    y_train, y_test = labels_numeric[train_index], labels_numeric[test_index]

    # Initialize KMeans for each class
    kmeans_per_class = []
    for class_idx in range(len(np.unique(labels_numeric))):
        class_samples = X_train[y_train == class_idx]
        kmeans = KMeans(n_clusters=5, random_state=0)  # Assuming 2 clusters for binary classification
        kmeans.fit(class_samples)
        kmeans_per_class.append(kmeans)
    # PREDICTION #
    # Predict on test data
    y_pred = np.array([kmeans.predict(X_test) for kmeans in kmeans_per_class]).T

    # Create confusion matrix
    conf_matrix = confusion_matrix(y_test, np.argmax(y_pred, axis=1))
    conf_matrices.append(conf_matrix)

    # ROC curve for each class
    tpr_fold_list = []
    fpr_fold_list = []
    roc_auc_fold_list = []
    for class_idx in range(len(np.unique(labels_numeric))):
        # Convert labels to binary format
        binary_labels = (y_test == class_idx).astype(int)
        
        # Use predicted probabilities for positive class
        fpr, tpr, _ = roc_curve(binary_labels, y_pred[:, class_idx])
        tpr_fold_list.append(tpr)
        fpr_fold_list.append(fpr)
        
        # Calculate AUC
        roc_auc = auc(fpr, tpr)
        roc_auc_fold_list.append(roc_auc)
        print(f'Fold {fold}, Class {class_idx}, AUC: {roc_auc}')

    tpr_list.append(tpr_fold_list)
    fpr_list.append(fpr_fold_list)
    roc_auc_list.append(roc_auc_fold_list)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, np.argmax(y_pred, axis=1))
    accuracies.append(accuracy)
    print(f'Fold {fold} Accuracy: {accuracy}')

# Plot ROC curves for each class
for class_idx, class_name in enumerate(np.unique(labels)):
    plt.figure()

    # ROC curve for each fold
    for fold, (tpr_fold, fpr_fold) in enumerate(zip(tpr_list, fpr_list), start=1):
        plt.plot(fpr_fold[class_idx], tpr_fold[class_idx], lw=1, label=f'Fold {fold}')

    # Plot the average ROC curve
    mean_fpr = np.linspace(0, 1, 100)
    mean_tpr = np.mean([np.interp(mean_fpr, fpr_fold[class_idx], tpr_fold[class_idx]) for fpr_fold, tpr_fold in zip(fpr_list, tpr_list)], axis=0)
    plt.plot(mean_fpr, mean_tpr, color='b', label='Mean ROC', lw=2)

    # Calculate and plot the average ROC AUC
    roc_auc_fold_list = []
    for fold, (tpr_fold, fpr_fold) in enumerate(zip(tpr_list, fpr_list), start=1):
        roc_auc = auc(fpr_fold[class_idx], tpr_fold[class_idx])
        roc_auc_fold_list.append(roc_auc)

    average_roc_auc = np.mean(roc_auc_fold_list)
    plt.title(f'ROC Curve for Class: {class_name}\nAverage AUC: {average_roc_auc:.3f}')
    
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend()
    plt.show()

# Plot the Mean ROC curve across all classes
plt.figure()

# ROC curve for each class
for class_idx, class_name in enumerate(np.unique(labels)):
    mean_fpr = np.linspace(0, 1, 100)
    mean_tpr = np.zeros_like(mean_fpr)

    for fold, (tpr_fold, fpr_fold) in enumerate(zip(tpr_list, fpr_list), start=1):
        mean_tpr += np.interp(mean_fpr, fpr_fold[class_idx], tpr_fold[class_idx])

    mean_tpr /= len(tpr_list)

    plt.plot(mean_fpr, mean_tpr, lw=1, label=f'Mean ROC - {class_name}')

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Mean ROC Curve Across All Classes and Folds')
plt.legend()
plt.show()    


# Display or use the confusion matrices as needed
for fold, conf_matrix in enumerate(conf_matrices, start=1):
    plt.figure()

    # Use scikit-learn's confusion_matrix function to plot the matrix
    plt.imshow(conf_matrix, interpolation='nearest', cmap=plt.cm.Blues)
    
    plt.title(f'Confusion Matrix - Fold {fold}')
    plt.colorbar()

    # Add numerical values in each square
    for i, class_label in enumerate(np.unique(labels)):
        for j, predicted_label in enumerate(np.unique(labels)):
            plt.text(j, i, str(conf_matrix[i, j]), ha='center', va='center', color='red')

    tick_marks = np.arange(len(np.unique(labels)))
    plt.xticks(tick_marks, np.unique(labels), rotation=45)
    plt.yticks(tick_marks, np.unique(labels))
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.show()

# Calculate and plot the average confusion matrix
average_conf_matrix = np.mean(conf_matrices, axis=0)
plt.figure()
plt.imshow(average_conf_matrix, interpolation='nearest', cmap=plt.cm.Blues)
plt.title(f'Average Confusion Matrix')
plt.colorbar()

# Add numerical values in each square
for i, class_label in enumerate(np.unique(labels)):
    for j, predicted_label in enumerate(np.unique(labels)):
        plt.text(j, i, str(int(average_conf_matrix[i, j])), ha='center', va='center', color='red')

tick_marks = np.arange(len(np.unique(labels)))
plt.xticks(tick_marks, np.unique(labels), rotation=45)
plt.yticks(tick_marks, np.unique(labels))
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()    

# Print average confusion matrix
print("Average Confusion Matrix:")
print(average_conf_matrix)

# Print AUC for each class and fold
for class_idx, class_name in enumerate(np.unique(labels)):
    for fold, roc_auc_fold in enumerate(roc_auc_list, start=1):
        print(f'Fold {fold}, Class {class_name}, AUC: {roc_auc_fold[class_idx]}')

# Print accuracies for each fold
for fold, accuracy in enumerate(accuracies, start=1):
    print(f"Fold {fold} Accuracy: {accuracy}")

# Print average accuracy over all folds
average_accuracy = np.mean(accuracies)
print(f"\nAverage Accuracy: {average_accuracy}")
