In [9]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import xml.etree.ElementTree as ET
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.models as models
import torchvision.transforms as transforms
from tqdm import tqdm

# 1. Define paths for ImageNet validation images and annotations
IMAGE_DIR = "/home/kajm20/mnist/ILSVRC/Data/CLS-LOC/val"  # Path to validation images
ANNOTATION_DIR = "/home/kajm20/mnist/ILSVRC/Annotations/CLS-LOC/val"  # Path to XML annotations

# 2. Define transformations for EfficientNet input (resize, crop, normalize)
imagenet_transform = transforms.Compose([
    transforms.Resize(256),  # Resize the image to 256x256
    transforms.CenterCrop(224),  # Crop the image to 224x224
    transforms.ToTensor(),  # Convert image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize with ImageNet mean/std
])

# 3. Load the synset mapping
synset_mapping_path = "/home/kajm20/mnist/ILSVRC/LOC_synset_mapping.txt"
wordnet_to_imagenet = {}

# Load synset mapping from file
with open(synset_mapping_path) as f:
    for idx, line in enumerate(f.readlines()):
        wordnet_id, _ = line.split(' ', 1)  # Get WordNet ID from the line (skip class name)
        wordnet_to_imagenet[wordnet_id] = idx  # Map WordNet ID to class index

# 4. Define the custom dataset class
class ImageNetValDataset(Dataset):
    def __init__(self, image_dir, annotation_dir, transform=None):
        self.image_dir = image_dir
        self.annotation_dir = annotation_dir
        self.transform = transform

        # Get all annotation file names
        self.annotation_files = sorted(os.listdir(annotation_dir))

    def __len__(self):
        return len(self.annotation_files)

    def __getitem__(self, idx):
        # Get annotation file path
        annotation_path = os.path.join(self.annotation_dir, self.annotation_files[idx])
        
        # Parse XML to extract class label
        tree = ET.parse(annotation_path)
        root = tree.getroot()
        wordnet_id = root.find("object").find("name").text  # WordNet ID, e.g., 'n01751748'

        # Use the synset mapping to convert WordNet ID to ImageNet class index
        class_idx = wordnet_to_imagenet.get(wordnet_id, -1)  # Default to -1 if not found (shouldn't happen)

        # Get image filename from XML and construct image path
        image_filename = root.find("filename").text + ".JPEG"
        image_path = os.path.join(self.image_dir, image_filename)

        # Load and transform image
        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        return image, class_idx

# 5. Initialize the dataset and dataloader
imagenet_val_dataset = ImageNetValDataset(IMAGE_DIR, ANNOTATION_DIR, transform=imagenet_transform)
imagenet_val_loader = DataLoader(imagenet_val_dataset, batch_size=32, shuffle=False, num_workers=4)

# 6. Define the model (EfficientNet-B0 with pre-trained weights)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.efficientnet_b0(weights='DEFAULT')  # Load pre-trained EfficientNet-B0 model
model.to(device)
model.eval()  # Set the model to evaluation mode




EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [7]:
# 7. Define the evaluation function
def evaluate_model(model, dataloader):
    correct = 0
    total = 0

    with torch.no_grad():  # Disable gradient calculation for inference
        for images, labels in tqdm(dataloader):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images) 
            _, predicted = torch.max(outputs, 1)  # Get highest probability class
            correct += (predicted == labels).sum().item()  # Count correct predictions
            total += labels.size(0)

    accuracy = (correct / total) * 100
    return accuracy

# 8. Evaluate the model on ImageNet validation set
accuracy = evaluate_model(model, imagenet_val_loader)
print(f"EfficientNet-B0 Top-1 Accuracy on ImageNet: {accuracy:.2f}%")

  0%|          | 0/25000 [00:00<?, ?it/s]

torch.Size([2, 3, 224, 224])
torch.Size([2, 1000])





ZeroDivisionError: division by zero

In [13]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import xml.etree.ElementTree as ET
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.models as models
import torchvision.transforms as transforms
from tqdm import tqdm

# 1. Define paths for ImageNet validation images and annotations
IMAGE_DIR = "/home/kajm20/mnist/ILSVRC/Data/CLS-LOC/val"  # Path to validation images
ANNOTATION_DIR = "/home/kajm20/mnist/ILSVRC/Annotations/CLS-LOC/val"  # Path to XML annotations

# 2. Define transformations for EfficientNet input (resize, crop, normalize)
imagenet_transform = transforms.Compose([
    transforms.Resize(256),  # Resize the image to 256x256
    transforms.CenterCrop(224),  # Crop the image to 224x224
    transforms.ToTensor(),  # Convert image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize with ImageNet mean/std
])

# 3. Load the synset mapping
synset_mapping_path = "/home/kajm20/mnist/ILSVRC/LOC_synset_mapping.txt"
wordnet_to_imagenet = {}

# Load synset mapping from file
with open(synset_mapping_path) as f:
    for idx, line in enumerate(f.readlines()):
        wordnet_id, _ = line.split(' ', 1)  # Get WordNet ID from the line (skip class name)
        wordnet_to_imagenet[wordnet_id] = idx  # Map WordNet ID to class index

# 4. Define the custom dataset class
class ImageNetValDataset(Dataset):
    def __init__(self, image_dir, annotation_dir, transform=None):
        self.image_dir = image_dir
        self.annotation_dir = annotation_dir
        self.transform = transform

        # Get all annotation file names
        self.annotation_files = sorted(os.listdir(annotation_dir))

    def __len__(self):
        return len(self.annotation_files)

    def __getitem__(self, idx):
        # Get annotation file path
        annotation_path = os.path.join(self.annotation_dir, self.annotation_files[idx])
        
        # Parse XML to extract class label
        tree = ET.parse(annotation_path)
        root = tree.getroot()
        wordnet_id = root.find("object").find("name").text  # WordNet ID, e.g., 'n01751748'

        # Use the synset mapping to convert WordNet ID to ImageNet class index
        class_idx = wordnet_to_imagenet.get(wordnet_id, -1)  # Default to -1 if not found (shouldn't happen)

        # Get image filename from XML and construct image path
        image_filename = root.find("filename").text + ".JPEG"
        image_path = os.path.join(self.image_dir, image_filename)

        # Load image without transformation (for raw tensor shape printing)
        raw_image = Image.open(image_path).convert("RGB")
        raw_tensor = transforms.ToTensor()(raw_image)  # Convert to tensor before normalization

        # Apply transformations
        if self.transform:
            image = self.transform(raw_image)
        else:
            image = raw_tensor  # Just in case transform is None

        return raw_tensor, image, class_idx  # Return raw image tensor, transformed image tensor, and label

# 5. Initialize the dataset and dataloader
imagenet_val_dataset = ImageNetValDataset(IMAGE_DIR, ANNOTATION_DIR, transform=imagenet_transform)
imagenet_val_loader = DataLoader(imagenet_val_dataset, batch_size=1, shuffle=False, num_workers=4)

# 6. Define the model (EfficientNet-B0 with pre-trained weights)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.efficientnet_b0(weights='DEFAULT')  # Load pre-trained EfficientNet-B0 model
model.to(device)
model.eval()  # Set the model to evaluation mode

# Dictionary to store layer activations
activations = {}

# Hook function to store the output of each layer
def hook_fn(module, input, output):
    activations[module] = output.detach().cpu()

# Register hooks on all layers
for name, layer in model.named_modules():
    if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear) or isinstance(layer, nn.BatchNorm2d):
        layer.register_forward_hook(hook_fn)

# 7. Define the evaluation function
def evaluate_model(model, dataloader):
    correct = 0
    total = 0
    first_layer_input = None  # Store input image before first layer

    with torch.no_grad():  # Disable gradient calculation for inference
        for raw_images, transformed_images, labels in tqdm(dataloader):
            raw_images, transformed_images, labels = raw_images.to(device), transformed_images.to(device), labels.to(device)

            # Store the transformed input image before it goes into the first layer
            first_layer_input = transformed_images.cpu().detach().clone()

            # Print the shapes instead of the tensors
            print("\n--- Shapes Before and After Transformation ---")
            print(f"Raw Image Tensor Shape (Before Normalization): {raw_images.shape}")  # Should be (1, 3, H, W)
            print(f"Transformed Image Tensor Shape (After Normalization): {transformed_images.shape}")  # Should be (1, 3, 224, 224)

            # Forward pass
            outputs = model(transformed_images) 
            _, predicted = torch.max(outputs, 1)  # Get highest probability class
            correct += (predicted == labels).sum().item()  # Count correct predictions
            total += labels.size(0)

            # Print stored activations for debugging
            print("\n--- Layer Output Shapes ---")
            for layer, activation in activations.items():
                print(f"{layer}: {activation.shape}")

            # Break after 1st batch for visualization
            break  

    accuracy = (correct / total) * 100
    return accuracy, first_layer_input, activations

# 8. Evaluate the model on ImageNet validation set
accuracy, first_layer_input, activations = evaluate_model(model, imagenet_val_loader)
print(f"EfficientNet-B0 Top-1 Accuracy on ImageNet: {accuracy:.2f}%")

# Save the first layer input for later analysis
torch.save(first_layer_input, "first_layer_input.pt")
torch.save(activations, "activations.pt")

print("Saved input image and activations.")


  0%|          | 0/50000 [00:00<?, ?it/s]



--- Shapes Before and After Transformation ---
Raw Image Tensor Shape (Before Normalization): torch.Size([1, 3, 375, 500])
Transformed Image Tensor Shape (After Normalization): torch.Size([1, 3, 224, 224])

--- Layer Output Shapes ---
Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False): torch.Size([1, 32, 112, 112])
BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True): torch.Size([1, 32, 112, 112])
Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False): torch.Size([1, 32, 112, 112])
BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True): torch.Size([1, 32, 112, 112])
Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1)): torch.Size([1, 8, 1, 1])
Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1)): torch.Size([1, 32, 1, 1])
Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False): torch.Size([1, 16, 112, 112])
BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_st

In [15]:
import os
import torch
import torch.nn as nn
import xml.etree.ElementTree as ET
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.models as models
import torchvision.transforms as transforms
from tqdm import tqdm

# 1. Define paths for ImageNet validation images and annotations
IMAGE_DIR = "/home/kajm20/mnist/ILSVRC/Data/CLS-LOC/val"  # Path to validation images
ANNOTATION_DIR = "/home/kajm20/mnist/ILSVRC/Annotations/CLS-LOC/val"  # Path to XML annotations

# 2. Define transformations for EfficientNet input (resize, crop, normalize)
imagenet_transform = transforms.Compose([
    transforms.Resize(256),  
    transforms.CenterCrop(224),  
    transforms.ToTensor(),  
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  
])

# 3. Load the synset mapping
synset_mapping_path = "/home/kajm20/mnist/ILSVRC/LOC_synset_mapping.txt"
wordnet_to_imagenet = {}

with open(synset_mapping_path) as f:
    for idx, line in enumerate(f.readlines()):
        wordnet_id, _ = line.split(' ', 1)
        wordnet_to_imagenet[wordnet_id] = idx  

# 4. Define the custom dataset class
class ImageNetValDataset(Dataset):
    def __init__(self, image_dir, annotation_dir, transform=None):
        self.image_dir = image_dir
        self.annotation_dir = annotation_dir
        self.transform = transform
        self.annotation_files = sorted(os.listdir(annotation_dir))

    def __len__(self):
        return len(self.annotation_files)

    def __getitem__(self, idx):
        annotation_path = os.path.join(self.annotation_dir, self.annotation_files[idx])
        tree = ET.parse(annotation_path)
        root = tree.getroot()
        wordnet_id = root.find("object").find("name").text  

        class_idx = wordnet_to_imagenet.get(wordnet_id, -1)  
        image_filename = root.find("filename").text + ".JPEG"
        image_path = os.path.join(self.image_dir, image_filename)

        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        return image, class_idx

# 5. Initialize the dataset and dataloader
imagenet_val_dataset = ImageNetValDataset(IMAGE_DIR, ANNOTATION_DIR, transform=imagenet_transform)
imagenet_val_loader = DataLoader(imagenet_val_dataset, batch_size=1, shuffle=False, num_workers=4)

# 6. Define the model (EfficientNet-B0 with pre-trained weights)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.efficientnet_b0(weights='DEFAULT')  
model.to(device)
model.eval()  

# 7. Dictionary to store activations
activations = {}

# 8. Hook function to store activations for **ALL** layers
def hook_fn(module, input, output):
    layer_name = f"{module.__class__.__name__} ({id(module)})"
    if isinstance(output, torch.Tensor):  
        activations[layer_name] = output.shape  

# 9. Recursively register hooks for **all** layers
for name, layer in model.named_modules():
    if isinstance(layer, (nn.Conv2d, nn.BatchNorm2d, nn.SiLU, nn.Linear)):
        layer.register_forward_hook(hook_fn)

# 10. Define the evaluation function
def evaluate_model(model, dataloader):
    correct = 0
    total = 0

    with torch.no_grad():  
        for images, labels in tqdm(dataloader):
            images, labels = images.to(device), labels.to(device)
            activations.clear()  
            
            print(f"\nRaw Image Tensor Shape (Before Normalization): {images.shape}")  
            
            outputs = model(images)  

            _, predicted = torch.max(outputs, 1)  
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

            # Print activations for all layers
            print("\n--- Layer Activations ---")
            for layer, shape in activations.items():
                print(f"{layer}: {shape}")
            break  

    accuracy = (correct / total) * 100
    return accuracy

# 11. Evaluate the model on ImageNet validation set
accuracy = evaluate_model(model, imagenet_val_loader)
print(f"EfficientNet-B0 Top-1 Accuracy on ImageNet: {accuracy:.2f}%")


  0%|          | 0/50000 [00:00<?, ?it/s]


Raw Image Tensor Shape (Before Normalization): torch.Size([1, 3, 224, 224])

--- Layer Activations ---
Conv2d (139326491122640): torch.Size([1, 32, 112, 112])
BatchNorm2d (139326499503328): torch.Size([1, 32, 112, 112])
SiLU (139326473888512): torch.Size([1, 32, 112, 112])
Conv2d (139326491123280): torch.Size([1, 32, 112, 112])
BatchNorm2d (139326491538528): torch.Size([1, 32, 112, 112])
SiLU (139326473886592): torch.Size([1, 32, 112, 112])
Conv2d (139326491122960): torch.Size([1, 8, 1, 1])
SiLU (139326473885392): torch.Size([1, 8, 1, 1])
Conv2d (139326491120400): torch.Size([1, 32, 1, 1])
Conv2d (139326491119440): torch.Size([1, 16, 112, 112])
BatchNorm2d (139326491539344): torch.Size([1, 16, 112, 112])
Conv2d (139326491118800): torch.Size([1, 96, 112, 112])
BatchNorm2d (139326491537712): torch.Size([1, 96, 112, 112])
SiLU (139326473884672): torch.Size([1, 96, 112, 112])
Conv2d (139326491119120): torch.Size([1, 96, 56, 56])
BatchNorm2d (139326491541248): torch.Size([1, 96, 56, 56])
S


