In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install torch torchvision medmnist
!pip install git+https://github.com/MedMNIST/MedMNIST.git

In [None]:
!pip install scikit-learn
from sklearn.metrics import roc_auc_score

**Code to load images / labels and saved model to classify the images.
Final version with good results!!**

In [None]:
# much needed definitions
from sklearn.metrics import roc_auc_score
import numpy as np
import matplotlib.pyplot as plt
import medmnist
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import models
import torchvision.transforms as transforms
from tqdm import tqdm

# Load and preprocess dataset (224x224 resolution)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Pretrained model normalization
])

# Define the Model with ResNet18 pretrained
class PathMNISTClassifier(nn.Module):
    def __init__(self, num_classes=9):  # PathMNIST has 9 classes
        super(PathMNISTClassifier, self).__init__()
        self.model = models.resnet18(pretrained=True)

        # Modify the final fully connected layer to match the number of classes in PathMNIST
        in_features = self.model.fc.in_features
        self.model.fc = nn.Linear(in_features, num_classes)

    def forward(self, x):
        return self.model(x)

class CustomDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image, label = self.data[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

# Function to unnormalize an image
def unnormalize(tensor, mean, std):
    """Unnormalizes a tensor image with mean and standard deviation."""
    for t, m, s in zip(tensor, mean, std):
        t.mul_(s).add_(m)
    return tensor

In [None]:
#. Load images/ labels and load the model to classify
#  Log True vs Predicted labels to confirm classification quality.
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
import medmnist
from medmnist import INFO, PathMNIST
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, multilabel_confusion_matrix
import numpy as np
import seaborn as sns
import os
from PIL import Image

save_dir = "/content/drive/MyDrive/ColabNotebooks/PathMnistTestData/"  # Directory to load images from
labels_file_path = os.path.join(save_dir, "labels.txt")  # Path for labels file

# --- Load saved images and create testsaved_loader ---
testsaved_data = []
with open(labels_file_path, "r") as labels_file:  # Open labels file for reading
    for line in labels_file:
        image_name, label = line.strip().split(",")  # Get image name and label
        image_path = os.path.join(save_dir, image_name)
        image = Image.open(image_path).convert('RGB')
        testsaved_data.append((image, int(label)))  # Append to testsaved_data

testsaved_loader = DataLoader(CustomDataset(testsaved_data, transform=transform), batch_size=1)

# --- Test with saved model and compare results ---
# Load the best model
model_path = '/content/drive/MyDrive/ColabNotebooks/pathmnist_224_best_auc_model.pth'
model = PathMNISTClassifier(num_classes=9)
#model.load_state_dict(torch.load(model_path))
model.load_state_dict(torch.load(model_path, weights_only=True))
model.eval()  # Set the model to evaluation mode

# Get the label names from the PathMNIST info
info = INFO['pathmnist']
label_names = list(info['label'].values())

print("\nTesting saved images:")
for i, (saved_image, saved_label) in enumerate(testsaved_loader):
    # Make predictions
    saved_output = model(saved_image)
    _, saved_predicted = torch.max(saved_output, 1)

    # Get text labels for true and predicted labels
    true_label_name = label_names[saved_label.item()]
    predicted_label_name = label_names[saved_predicted.item()]

    print(f"Image {i + 1}:")
    print(f"  -- True Label: {true_label_name} ({saved_label.item()})")
    print(f"  -- Predicted Label: {predicted_label_name} ({saved_predicted.item()})")

   # Unnormalize the image before displaying
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    unnormalized_image = unnormalize(saved_image.clone().squeeze(), mean, std)  # Clone to avoid modifying the original

    # Display the unnormalized image
    plt.figure(figsize=(2, 2))
    plt.imshow(unnormalized_image.permute(1, 2, 0).clip(0, 1))  # Clip to [0, 1] range
    plt.axis('off')
    plt.show()

**Backup Code:
Code to load Test-Dataset**

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms, models
import medmnist
from medmnist import INFO, PathMNIST
from tqdm import tqdm
import matplotlib.pyplot as plt

# 1. Load and preprocess dataset (224x224 resolution)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Pretrained model normalization
])

info = INFO['pathmnist']
DataClass = getattr(medmnist, info['python_class'])

# Load the PathMNIST dataset with the desired resolution:
# train_dataset = PathMNIST(split='train', download=True, transform=transform, as_rgb=True, size=224)
# val_dataset = PathMNIST(split='val', download=True, transform=transform, as_rgb=True, size=224)
test_dataset = PathMNIST(split='test', download=True, transform=transform, as_rgb=True, size=224)

# DataLoader for training, validation, and testing
# train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

**Code to display all labels**

In [None]:
import medmnist

# Get info for PathMNIST
info = medmnist.INFO['pathmnist']

# Print the list of text labels
print("Text labels for PathMNIST:", info['label'])

label_text_values = list(info['label'].values())  # Get all values from label_text dictionary
print (label_text_values)

**Code to classify test-datatset**

In [None]:
# code to check classification on test-dataset
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms, models
import medmnist
from medmnist import INFO, PathMNIST
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, multilabel_confusion_matrix
from sklearn.metrics import roc_auc_score
import numpy as np


# Load the best model
model_path = '/content/drive/MyDrive/ColabNotebooks/pathmnist_224_best_auc_model.pth'
model = PathMNISTClassifier(num_classes=9)
model.load_state_dict(torch.load(model_path))
model.eval()  # Set the model to evaluation mode

# Test and get predictions
all_true_labels = []
all_predicted_labels = []
all_predicted_probs = []  # For AUC calculation

with torch.no_grad():
    for images, labels in tqdm(test_loader, desc="Testing"):
        outputs = model(images)
        _, predicted_labels = torch.max(outputs, 1)
        # Corrected: Use append instead of extend
        for label in labels.cpu().numpy():
            all_true_labels.append(label.item())
        for predicted_label in predicted_labels.cpu().numpy():
            all_predicted_labels.append(predicted_label.item())
        all_predicted_probs.extend(outputs.cpu().numpy()) # Extend is fine here as we need the full probability distribution

# Print true and predicted classes
label_names = list(info['label'].values())  # Get all values from label_text dictionary

for i in range(len(all_true_labels)):
    true_label_num = all_true_labels[i]
    predicted_label_num = all_predicted_labels[i]
    true_label_text = label_names[true_label_num]  # Get text label
    predicted_label_text = label_names[predicted_label_num]  # Get text label

    print(f"Sample {i+1}:")
    print(f"  True: {true_label_num} ({true_label_text})")
    print(f"  Predicted: {predicted_label_num} ({predicted_label_text})")

In [None]:
from sklearn.metrics import confusion_matrix, multilabel_confusion_matrix

**Code to display the results from classification of Test-Dataset**

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, multilabel_confusion_matrix
import numpy as np
import seaborn as sns # Import seaborn for better visualization

# ... (Rest of the code remains the same)

# 7. Calculate and plot confusion matrices for each label
label_names = list(info['label'].values())
num_classes = len(label_names)

# Overall confusion matrix
cm = confusion_matrix(all_true_labels, all_predicted_labels)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=label_names, yticklabels=label_names)
plt.title("Overall Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()

# Confusion matrix for each label using multilabel_confusion_matrix
mcm = multilabel_confusion_matrix(all_true_labels, all_predicted_labels, labels=range(num_classes))

print("\nConfusion Matrices for Each Label:")
for i, label_name in enumerate(label_names):
    plt.figure(figsize=(6, 4))
    sns.heatmap(mcm[i], annot=True, fmt="d", cmap="Blues",
                xticklabels=["Negative", "Positive"], yticklabels=["Negative", "Positive"])
    plt.title(f"Confusion Matrix for {label_name}")
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.show()