In [None]:
# from teacher_train import val_transform, FilteredLymphoMNIST, get_dataloaders
# from teacher_train import WeightedRandomSampler, balanced_weights
from LymphoMNIST.LymphoMNIST import LymphoMNIST
from torchvision import transforms
from torch.utils.data import DataLoader, WeightedRandomSampler, Dataset
import torch


In [None]:
# Dataset class to filter by labels
class FilteredLymphoMNIST(Dataset):
    def __init__(self, original_dataset, labels_to_keep):
        self.original_dataset = original_dataset
        self.labels_to_keep = labels_to_keep
        self.label_map = {label: i for i, label in enumerate(labels_to_keep)}
        self.indices = [i for i, (_, label) in enumerate(original_dataset) if label in labels_to_keep]

    def __getitem__(self, index):
        original_index = self.indices[index]
        image, label = self.original_dataset[original_index]
        return image, self.label_map[label.item()]

    def __len__(self):
        return len(self.indices)
    
# Function to get dataloaders
def get_dataloaders(train_ds, val_ds, split=(0.5, 0.5), batch_size=64, sampler=None, *args, **kwargs):
    lengths = [int(len(val_ds) * frac) for frac in split]
    lengths[1] += len(val_ds) - sum(lengths)  # Correct split length sum
    val_ds, test_ds = torch.utils.data.random_split(val_ds, lengths)

    shuffle = False if sampler else True
    train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=shuffle, sampler=sampler, *args, **kwargs)
    val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=False, *args, **kwargs)
    test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=False, *args, **kwargs)

    return train_dl, val_dl, test_dl

# Dataset and data augmentation classes
class ConvertToRGB:
    def __call__(self, tensor):
        if tensor.shape[0] == 1:
            tensor = tensor.repeat(3, 1, 1)
        return tensor
    

# # Balanced weights function for weighted sampling
# def balanced_weights(dataset, nclasses):
#     count = [0] * nclasses
#     for _, label in dataset:
#         count[label] += 1
#     N = float(sum(count))
#     weight_per_class = [N / float(count[i]) for i in range(nclasses)]
#     return [weight_per_class[label] for _, label in dataset]


In [None]:
# our hyperparameters
params = {
    'lr': 1e-5,
    'batch_size': 16,
    'epochs': 10000,
    'model': "Teacher_final-3c",
    'im_size': 120,
}



In [None]:
# Define transforms
im_size = params['im_size']
val_transform = transforms.Compose([
    transforms.Resize((im_size, im_size)),
    transforms.ToTensor(),
    transforms.Normalize([0.4819], [0.1484]),
    ConvertToRGB()
])

In [None]:

# Initialize dataset
original_train_ds = LymphoMNIST(root='../dataset', train=True, download=True, transform=val_transform, num_classes=3)
original_test_ds = LymphoMNIST(root='../dataset', train=False, download=True, transform=val_transform, num_classes=3)


# Specify labels to keep
labels_to_keep = [0, 1] # 0: B, 1: T4, 2: T8

# Initialize filtered dataset with labels to keep
train_ds = FilteredLymphoMNIST(original_train_ds, labels_to_keep)
test_ds= FilteredLymphoMNIST(original_test_ds, labels_to_keep)

# weights = balanced_weights(train_ds, len(labels_to_keep))
# sampler = WeightedRandomSampler(weights, len(weights))
# Create the dataloaders
# train_dl, val_dl, test_dl = get_dataloaders(train_ds,
#                                             test_ds,
#                                             split=(0.5, 0.5),
#                                             batch_size=params['batch_size'],
#                                             # sampler=sampler,
#                                             num_workers=4
#                                            )

In [None]:
import torch
from torchvision import models
from torch import nn

def calculate_accuracy(loader, model, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy



device = 'cuda' if torch.cuda.is_available() else 'cpu'


# Load the model
model = models.resnet50()
model.fc = nn.Linear(model.fc.in_features, len(labels_to_keep))

# Load the saved weights and map them to the correct device
model.load_state_dict(torch.load("../checkpoint/Final_models/Teacher_imsize-120_30 September 22_37.pt", map_location=device))

model = model.to(device)


In [None]:
from torchsummary import summary
summary(model, (3, 120, 120))

## Latency calculation

In [None]:
from typing import Iterator, Tuple, Any
from collections import defaultdict
import csv
import os
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def calculate_true_latency(
    loader: Iterator[Tuple[torch.Tensor, Any]],
    model: torch.nn.Module,
    device: torch.device,
    num_batches: int = 20,
    warmup_batches: int = 500,
    percentiles: tuple = (50, 90, 95, 99)
) -> dict:
    """
    Calculate true model latency using CUDA events and proper synchronization.
   
    Args:
        loader: DataLoader iterator
        model: PyTorch model
        device: Device to run inference on
        num_batches: Number of batches to measure
        warmup_batches: Number of warmup batches
        percentiles: Tuple of percentiles to calculate
       
    Returns:
        Dictionary containing latency statistics
    """
    model.eval()
    latencies = []
    batch_sizes = []
   
    # Create CUDA events for accurate GPU timing
    # Create CUDA events for accurate GPU timing
    start_event = torch.cuda.Event(enable_timing=True)
    end_event = torch.cuda.Event(enable_timing=True)
    
   
    # Warmup phase
    with torch.no_grad():
        for i, (images, *_) in enumerate(loader):
            if i >= warmup_batches:
                break
            images = images.to(device, non_blocking=True)
            _ = model(images)
   
    # Measurement phase
    with torch.no_grad():
        for i, (images, *_) in enumerate(loader):
            if i >= num_batches:
                break
               
            # Move data to device asynchronously
            images = images.to(device, non_blocking=True)
            
            torch.cuda.synchronize()
            start_event.record()
           
            # Run inference
            _ = model(images)
           

            end_event.record()
            end_event.synchronize()
            latency = start_event.elapsed_time(end_event)

           
            latencies.append(latency)
            batch_sizes.append(images.size(0))
   
    # Convert to numpy for statistics
    latencies = np.array(latencies)
    batch_sizes = np.array(batch_sizes)
    per_image_latencies = latencies / batch_sizes
   
    # Calculate statistics
    stats = {
        'mean_latency_ms_per_batch': float(np.mean(latencies)),
        'std_latency_ms_per_batch': float(np.std(latencies)),
        'mean_latency_ms_per_image': float(np.mean(per_image_latencies)),
        'std_latency_ms_per_image': float(np.std(per_image_latencies)),
        'throughput_imgs_per_sec': float(np.mean(batch_sizes / (latencies / 1000))),
    }
   
    # Add percentiles
    for p in percentiles:
        stats[f'p{p}_latency_ms_per_batch'] = float(np.percentile(latencies, p))
        stats[f'p{p}_latency_ms_per_image'] = float(np.percentile(per_image_latencies, p))
   
    return stats

def run_latency_test(model, test_dl, device, num_batches=20, runs=5, output_csv="latency_results.csv"):
    """Run multiple latency tests, aggregate results, and save them to a CSV file."""
    all_stats = defaultdict(list)

    for run in range(runs):
        print(f"\nRun {run + 1}/{runs}")
        stats = calculate_true_latency(test_dl, model, device, num_batches=num_batches)

        for key, value in stats.items():
            all_stats[key].append(value)

    # Calculate aggregate statistics
    aggregate_stats = {
        key: {
            'mean': float(np.mean(values)),
            'std': float(np.std(values))
        }
        for key, values in all_stats.items()
    }

    print("\n=== Aggregate Results ===")
    print(f"Runs: {runs}")
    print(f"Mean per-image latency: {aggregate_stats['mean_latency_ms_per_image']['mean']:.4f} ms ± {aggregate_stats['mean_latency_ms_per_image']['std']:.4f} ms")
    print(f"Mean per-batch latency: {aggregate_stats['mean_latency_ms_per_batch']['mean']:.4f} ms ± {aggregate_stats['mean_latency_ms_per_batch']['std']:.4f} ms")
    print(f"Mean throughput: {aggregate_stats['throughput_imgs_per_sec']['mean']:.2f} imgs/sec ± {aggregate_stats['throughput_imgs_per_sec']['std']:.2f}")

    # Save results to a CSV file
    csv_exists = os.path.exists(output_csv)
    with open(output_csv, mode='a', newline='') as file:
        writer = csv.writer(file)

        # Write the header only if the file is newly created
        if not csv_exists:
            writer.writerow([
                "Batch Size",
                "Mean Latency per Image (ms)", "Std Latency per Image (ms)",
                "Mean Latency per Batch (ms)", "Std Latency per Batch (ms)",
                "Mean Throughput (imgs/sec)", "Std Throughput (imgs/sec)"
            ])

        # Write the row for the current batch size
        writer.writerow([
            test_dl.batch_size,
            aggregate_stats['mean_latency_ms_per_image']['mean'],
            aggregate_stats['mean_latency_ms_per_image']['std'],
            aggregate_stats['mean_latency_ms_per_batch']['mean'],
            aggregate_stats['mean_latency_ms_per_batch']['std'],
            aggregate_stats['throughput_imgs_per_sec']['mean'],
            aggregate_stats['throughput_imgs_per_sec']['std'],
        ])

    return aggregate_stats


# Example Usage
batch_sizes = [2**i for i in range(0, 20)]  # Powers of 2 from 1 to 1024
output_csv = "latency_results_teacher_final.csv"

# Remove the CSV file if it already exists (optional, for fresh runs)
if os.path.exists(output_csv):
    os.remove(output_csv)

print("\n=== Batch Size Latency and Throughput Testing ===")

# Loop through each batch size and test latency
for batch_size in batch_sizes:
    print(f"\nTesting with batch size: {batch_size}")
    train_dl, val_dl, test_dl = get_dataloaders(train_ds, test_ds, batch_size=batch_size, num_workers=4)
    
    run_latency_test(model, test_dl, device, num_batches=100, runs=5, output_csv=output_csv)

print(f"\nResults saved to {output_csv}")

## Plot

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load data into a pandas DataFrame
csv_file = "latency_results_teacher_final.csv"  # Replace with your CSV file name
data = pd.read_csv(csv_file)

# Plot Mean Latency Per Image and Per Batch
plt.figure(figsize=(10, 6))

# Plot mean latency per image
plt.errorbar(
    data["Batch Size"],
    data["Mean Latency per Image (ms)"],
    yerr=data["Std Latency per Image (ms)"],
    fmt='o-',
    label="Mean Latency per Image (ms)",
    capsize=5
)

# Plot mean latency per batch
# plt.errorbar(
#     data["Batch Size"],
#     data["Mean Latency per Batch (ms)"],
#     yerr=data["Std Latency per Batch (ms)"],
#     fmt='s-',
#     label="Mean Latency per Batch (ms)",
#     capsize=5
# )

# Set log scale for x-axis
plt.xscale("log", base=2)

# Add labels, title, and legend
plt.xlabel("Batch Size (log scale)")
plt.ylabel("Latency (ms)")
plt.title("Mean Latency Per Image and Per Batch Across Batch Sizes")
plt.legend()
plt.grid(True)
plt.tight_layout()

# Show the plot
plt.show()




# Plot Throughput with Error Bars
plt.figure(figsize=(10, 6))
plt.errorbar(data["Batch Size"], data["Mean Throughput (imgs/sec)"], yerr=data["Std Throughput (imgs/sec)"],
             fmt='^-', label="Throughput", capsize=5, color='green')
plt.xscale("log", base=2)  # Log scale for batch sizes
plt.xlabel("Batch Size (log scale)")
plt.ylabel("Throughput (images/sec)")
plt.title("Model Throughput Across Batch Sizes")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
