In [None]:
# Import necessary libraries
import torch
import numpy as np
import xgboost as xgb
from PIL import Image
import torchvision.transforms as transforms
import os
from tqdm import tqdm
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import cv2

SEED = 123
def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(SEED)


In [None]:
# Paths and Parameters
checkpoint_path = r''  # Path to AlexNet model weights
xgb_model_path = r''

INPUT_SIZE = (227,227)
MEAN = (0.5960, 0.4489, 0.4046)
STD = (0.2102, 0.1782, 0.1719)

main_data_dir = r""
val_dir = os.path.join(main_data_dir, "val")
test_dir = os.path.join(main_data_dir, "test")

val_dataset = datasets.ImageFolder(root=val_dir)
class_names = list(val_dataset.classes)
print("Class to label mapping:", val_dataset.class_to_idx)

In [None]:
class CLAHETransform:
    def __init__(self, clip_limit=2.0, tile_grid_size=(8, 8)):
        self.clip_limit = clip_limit
        self.tile_grid_size = tile_grid_size

    def __call__(self, img):
        img_np = np.array(img)

        img_lab = cv2.cvtColor(img_np, cv2.COLOR_RGB2LAB)
        l, a, b = cv2.split(img_lab)

        clahe = cv2.createCLAHE(clipLimit=self.clip_limit, tileGridSize=self.tile_grid_size)
        l_clahe = clahe.apply(l)

        img_clahe = cv2.merge((l_clahe, a, b))
        img_clahe = cv2.cvtColor(img_clahe, cv2.COLOR_LAB2RGB)

        return Image.fromarray(img_clahe)

In [None]:
transform_val_test = transforms.Compose([
    transforms.Resize(INPUT_SIZE),
    CLAHETransform(clip_limit=2.0, tile_grid_size=(8, 8)),
    transforms.ToTensor(),
    transforms.Normalize(mean=MEAN, std=STD)
])

val_dataset = datasets.ImageFolder(root=val_dir, transform=transform_val_test)
test_dataset = datasets.ImageFolder(root=test_dir, transform=transform_val_test)
val_loader = DataLoader(val_dataset, shuffle=False)
test_loader = DataLoader(test_dataset,  shuffle=False)



In [None]:
import torch
import torch.nn as nn
from torchvision import models

model = models.alexnet(weights=None)  

model.classifier = nn.Sequential(
    *list(model.classifier.children())[:3]  
)

state_dict = torch.load(checkpoint_path, weights_only=True)

new_state_dict = {}
for k, v in state_dict.items():
    new_key = k.replace("model.", "") if k.startswith("model.") else k
    if new_key in model.state_dict().keys():
        new_state_dict[new_key] = v

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

model.load_state_dict(new_state_dict, strict=False)  

for param in model.parameters():
    param.requires_grad = False


model.eval()
print(f"Loaded model from {checkpoint_path}")
print(model)

# from torchsummary import summary
# input_size = (3, 227, 227)
# summary(model, input_size=input_size, device=str(device))


In [None]:
xgb_model = xgb.XGBClassifier()
xgb_model.load_model(xgb_model_path)
print(f"Loaded XGBoost model from {xgb_model_path}")

In [None]:
def evaluate_model(data_loader, set_name, export_txt=False):
    true_labels = []
    predicted_labels = []
    image_paths = []  # To store image file paths
    confidences = []  # To store confidence scores

    # Access the dataset from the DataLoader to get image file paths
    dataset = data_loader.dataset

    for idx, (images, labels) in enumerate(tqdm(data_loader, desc=f"Evaluating {set_name}")):
        images = images.to(device)
        labels = labels.cpu().numpy()

        with torch.no_grad():
            features = model(images).cpu().numpy()
        probabilities = xgb_model.predict_proba(features)  # Get probabilities for each class
        predictions = np.argmax(probabilities, axis=1)  # Predicted labels
        confidence_scores = np.max(probabilities, axis=1) * 100   # Confidence scores for predicted labels

        true_labels.extend(labels)
        predicted_labels.extend(predictions)
        confidences.extend(confidence_scores)

        # Collect image file paths from the dataset
        image_paths.extend([dataset.samples[i][0] for i in range(idx, idx + len(images))])

    # Calculate and print accuracy
    accuracy = accuracy_score(true_labels, predicted_labels)
    print(f"\n{set_name} Accuracy: {accuracy * 100:.2f}%")

    # Print classification report
    print(f"\n{set_name} Classification Report:\n")
    print(classification_report(true_labels, predicted_labels, target_names=class_names))

    # Confusion matrix
    cm = confusion_matrix(true_labels, predicted_labels)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Greens', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.title(f'{set_name} Confusion Matrix on AlexNet With XGBoost')
    plt.show()

    if export_txt:
        output_file = f"{set_name}_with_xgb_predictions.txt"
        with open(output_file, "w") as file:
            file.write("Image File, True Label, Predicted Label, Confidence\n")
            for img_path, true_label, pred_label, confidence in zip(image_paths, true_labels, predicted_labels, confidences):
                true_class_name = class_names[true_label]
                pred_class_name = class_names[pred_label]
                file.write(f"{img_path}, {true_class_name}, {pred_class_name}, {confidence:.2f}%\n")
        print(f"Predictions saved to {output_file}")


In [None]:

evaluate_model(val_loader, "Validation Set")

In [None]:
evaluate_model(test_loader, "Test Set", export_txt=True)