## Install LymphoMNIST
This Jupyter notebook demonstrates how to install LymphoMNIST, preprocess the dataset, train and evaluate a model, and calculate inference speed. Follow the instructions step-by-step for a comprehensive understanding.


### Install Required Packages
Begin by installing LymphoMNIST and other required libraries.

In [None]:
!pip install LymphoMNIST torch torchvision numpy torchsummary

### Load Required Packages

In [1]:
import torch
from torchvision import models
from torch import nn
from torch.utils.data import DataLoader, Dataset, random_split
from LymphoMNIST.LymphoMNIST import LymphoMNIST
from torchvision import transforms
from torchsummary import summary

### Define Dataset and Helper Functions

In [10]:

BATCH_SIZE = 64

class FilteredLymphoMNIST(Dataset):
    def __init__(self, original_dataset, labels_to_keep):
        self.original_dataset = original_dataset
        self.indices = [i for i, (_, label) in enumerate(original_dataset) if label in labels_to_keep]

    def __getitem__(self, index):
        return self.original_dataset[self.indices[index]]

    def __len__(self):
        return len(self.indices)

def get_dataloaders(train_ds, val_ds, batch_size=BATCH_SIZE, **kwargs):
    val_size = len(val_ds) // 2
    test_size = len(val_ds) - val_size
    val_ds, test_ds = random_split(val_ds, [val_size, test_size])
    return (
        DataLoader(train_ds, batch_size=batch_size, shuffle=True, **kwargs),
        DataLoader(val_ds, batch_size=batch_size, shuffle=False, **kwargs),
        DataLoader(test_ds, batch_size=batch_size, shuffle=False, **kwargs),
    )
    

# Function to calculate accuracy
def calculate_accuracy(loader, model, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy
    

# Hyperparameters
params = {
    'batch_size': 16,
    'im_size': 48,  # Resize dimension used during training
    'model_checkpoint': "../checkpoint/KD_21 October 14:13_resnet50_qt-1channel-no_normalize.pt"  # Path to the saved model
}

BIGGER = 48

import torchvision.transforms as T
transform_student = T.Compose([
    T.Resize((BIGGER, BIGGER)),
    T.ToTensor(),
    # T.Normalize([0.4819], [0.1484]),
])

### Initialize the Dataset

In [4]:
# Initialize datasets
labels_to_keep = [0, 1]
original_train_ds = LymphoMNIST(root='../dataset', train=True, download=True, transform=transform_student, num_classes=3)
original_test_ds = LymphoMNIST(root='../dataset', train=False, download=True, transform=transform_student, num_classes=3)

# Filter datasets
train_ds = FilteredLymphoMNIST(original_train_ds, labels_to_keep)
test_ds = FilteredLymphoMNIST(original_test_ds, labels_to_keep)

# Get dataloaders
train_dl, val_dl, test_dl = get_dataloaders(train_ds, test_ds, batch_size=params['batch_size'], num_workers=4)

Dataset already exists. Skipping download.
Dataset already exists. Skipping download.


### Define the Model Architecture

In [5]:
# Load model
device = 'cuda' if torch.cuda.is_available() else 'cpu'

class QuantizedCNN(nn.Module):
    def __init__(self, num_classes=2, input_size=(1, 28, 28)):
        super(QuantizedCNN, self).__init__()
        self.num_classes = num_classes
        self.features = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        )
        with torch.no_grad():
            dummy_input = torch.zeros(1, *input_size)
            dummy_output = self.features(dummy_input)
            num_ftrs = dummy_output.numel() // dummy_output.size(0)
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.5),
            nn.Linear(num_ftrs, num_classes),
            nn.Softmax(dim=1)
        )
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x
    
    
# Load saved weights
model = QuantizedCNN(num_classes=2, input_size=(1, BIGGER, BIGGER)).to(device)

model.load_state_dict(torch.load(params['model_checkpoint'], map_location=device))

model = model.to(device)


In [6]:
def evaluate_model(model, dataloader, device):
    y_pred = []
    y_true = []

    # Set model to evaluation mode
    model.eval()

    with torch.no_grad():
        for image, target in dataloader:
            image, target = image.to(device), target.to(device)
            outputs = model(image)
            output = (torch.max(outputs, 1)[1]).data.cpu().numpy()
            y_pred.extend(output)  # Save Prediction
            target = target.data.cpu().numpy()
            y_true.extend(target)  # Save target

    return y_true, y_pred

In [None]:
from torchsummary import summary
summary(model, (1, 48, 48))

### Calculate Model Accuracy

In [12]:
# Calculate and print accuracies
train_acc = calculate_accuracy(train_dl, model, device)
print(f'Train Accuracy: {train_acc:.2f}%')


Train Accuracy: 88.07%


In [11]:

val_acc = calculate_accuracy(val_dl, model, device)
print(f'Validation Accuracy: {val_acc:.2f}%')

Validation Accuracy: 87.45%


In [13]:
test_acc = calculate_accuracy(test_dl, model, device)
print(f'Test Accuracy: {test_acc:.2f}%')

Test Accuracy: 87.94%


## Inference time

In [12]:
import time

# Hyperparameters
params = {
    'batch_size': 16,  # Standard batch size
    'im_size': 48,     # Resize dimension used during training
    'num_warmup_batches': 5,  # Number of warmup batches
    'num_batches': 100,  # Number of batches to measure (should cover 1000 images)
}

In [None]:

import numpy as np
# Function to calculate inference speed
def calculate_inference_speed(loader, model, device, num_batches, warmup_batches):
    model.eval()
    total_time = 0.0
    images_processed = 0
    
    # Warm-up loop
    with torch.no_grad():
        for i, (images, _) in enumerate(loader):
            if i >= warmup_batches:
                break
            images = images.to(device)
            _ = model(images)

    # Timed inference loop
    with torch.no_grad():
        for i, (images, _) in enumerate(loader):
            if i >= num_batches:
                break
            images = images.to(device)

            start_time = time.time()  # Start timing
            _ = model(images)
            end_time = time.time()  # End timing

            # Update total time and images processed
            total_time += (end_time - start_time)
            images_processed += images.size(0)

    avg_inference_time_per_image = total_time / images_processed
    images_per_second = 1.0 / avg_inference_time_per_image
    return avg_inference_time_per_image, images_per_second

# Run the inference test for 50 iterations and collect the results
inference_times = []
inference_speeds = []

for _ in range(50):
    avg_time, throughput = calculate_inference_speed(test_dl, model, device, params['num_batches'], params['num_warmup_batches'])
    inference_times.append(avg_time)
    inference_speeds.append(throughput)

# Calculate the average and standard deviation of inference times and speeds
mean_time = np.mean(inference_times)
std_time = np.std(inference_times)
mean_speed = np.mean(inference_speeds)
std_speed = np.std(inference_speeds)

print(f'Average Inference Time per Image (over 50 runs): {mean_time:.6f} seconds ± {std_time:.6f}')
print(f'Average Inference Speed (over 50 runs): {mean_speed:.2f} images/second ± {std_speed:.2f}')

Average Inference Time per Image (over 50 runs): 0.000015 seconds ± 0.000001
Average Inference Speed (over 50 runs): 66891.07 images/second ± 5215.63

# Keras inferencing

In [None]:

# %%
import torch
from torchvision import models
from torch import nn
from torch.utils.data import DataLoader, Dataset, random_split
from LymphoMNIST.LymphoMNIST import LymphoMNIST
from torchvision import transforms
from torchsummary import summary

# %% [markdown]
# ### Define Dataset and Helper Functions

# %%

BATCH_SIZE = 64

class FilteredLymphoMNIST(Dataset):
    def __init__(self, original_dataset, labels_to_keep):
        self.original_dataset = original_dataset
        self.indices = [i for i, (_, label) in enumerate(original_dataset) if label in labels_to_keep]

    def __getitem__(self, index):
        return self.original_dataset[self.indices[index]]

    def __len__(self):
        return len(self.indices)

def get_dataloaders(train_ds, val_ds, batch_size=BATCH_SIZE, **kwargs):
    val_size = len(val_ds) // 2
    test_size = len(val_ds) - val_size
    val_ds, test_ds = random_split(val_ds, [val_size, test_size])
    return (
        DataLoader(train_ds, batch_size=batch_size, shuffle=True, **kwargs),
        DataLoader(val_ds, batch_size=batch_size, shuffle=False, **kwargs),
        DataLoader(test_ds, batch_size=batch_size, shuffle=False, **kwargs),
    )
    

# Function to calculate accuracy
def calculate_accuracy(loader, model, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy
    

# Hyperparameters
params = {
    'batch_size': 16,
    'im_size': 48,  # Resize dimension used during training
}


import torchvision.transforms as T
transform_student = T.Compose([
    T.Resize((params['im_size'], params['im_size'])),
    T.ToTensor(),
    T.Normalize([0.4819], [0.1484]),
])

# %% [markdown]
# ### Initialize the Dataset

# %%
# Initialize datasets
labels_to_keep = [0, 1]
original_train_ds = LymphoMNIST(root='../dataset', train=True, download=True, transform=transform_student, num_classes=3)
original_test_ds = LymphoMNIST(root='../dataset', train=False, download=True, transform=transform_student, num_classes=3)

# Filter datasets
train_ds = FilteredLymphoMNIST(original_train_ds, labels_to_keep)
test_ds = FilteredLymphoMNIST(original_test_ds, labels_to_keep)



# Get dataloaders
train_dl, val_dl, test_dl = get_dataloaders(train_ds, test_ds, batch_size=params['batch_size'], num_workers=4)

In [3]:

import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, ReLU, Softmax
from tensorflow.keras.models import Model
import torch
import torch.nn as nn

# --- Create and Load Keras Model ---
def create_keras_model():
    input_layer = Input(shape=(48, 48, 1), name='input_layer')
    x = Conv2D(16, (3, 3), padding='valid')(input_layer)
    x = ReLU()(x)
    x = MaxPooling2D(pool_size=2, strides=2, padding='valid')(x)
    x = Conv2D(16, (3, 3), padding='valid')(x)
    x = ReLU()(x)  # Add ReLU here
    x = MaxPooling2D(pool_size=2, strides=2, padding='valid')(x)
    # x = Permute((3, 1, 2))(x)  # Add Permute layer here
    x = Flatten(name='flatten')(x)
    x = Dropout(0.5)(x)
    x = Dense(2)(x)
    output_layer = Softmax()(x)
    model = Model(inputs=input_layer, outputs=output_layer)
    return model


keras_model = create_keras_model()
keras_model.load_weights('../converted_keras_model_final.h5')


In [None]:

# Function to calculate accuracy using Keras model
def calculate_accuracy_keras(loader, model):
    correct = 0
    total = 0
    for images, labels in loader:
        # images: Tensor of shape (batch_size, channels, height, width)
        # labels: Tensor of shape (batch_size)
        
        # Convert images to numpy arrays and reshape to (batch_size, height, width, channels)
        images_np = images.numpy()
        images_np = images_np.transpose(0, 2, 3, 1)  # Convert from (N, C, H, W) to (N, H, W, C)
        
        # Perform inference with Keras model
        predictions = model.predict(images_np)
        
        # Get predicted labels
        predicted_labels = np.argmax(predictions, axis=1)
        labels_np = labels.numpy()
        
        total += labels_np.shape[0]
        correct += (predicted_labels == labels_np).sum()
        
    accuracy = 100 * correct / total
    return accuracy

# Calculate and print accuracy on the test dataset
test_accuracy = calculate_accuracy_keras(test_dl, keras_model)
print(f'Test Accuracy with Keras model: {test_accuracy:.2f}%')