In [None]:
import numpy as np
import torchvision.models as tvm
# from torchsummary import summary

import torch.nn as nn
import torch
import random
from tqdm import tqdm
from torch.utils.data import DataLoader

import os

import torch.optim as to
import matplotlib.pyplot as plt
# from ray import tune
# from ray.train import Checkpoint
# from ray.air import session
# from ray.tune.schedulers import


from datetime import datetime
# from vit_pytorch import ViT
from torch.utils.data import DataLoader, Dataset, random_split, TensorDataset

%run model.ipynb
#%run backbone.ipynb


cuda


In [None]:




def replace_context_modules(model, MyModule, dilation):
    # Access the fourth stage (index 3)
    stage = model.stages[3]

    # Replace context_module in blocks 1 through 5
    for i in range(1, 7):
        block = stage.blocks[i]

        # Get the input and output channels from the original context_module
        in_channels = block.context_module.main.qkv.conv.in_channels
        out_channels = block.context_module.main.proj.conv.out_channels

        # Replace the context_module with your custom module
        block.context_module = nn.Sequential(
            MyModule(in_channels, nn.ReLU, dilation = dilation)
        )




In [None]:
import numpy as np
class CAGA(nn.Module):
    def __init__(self,
            in_channels,
            activation,
            heads = 3,
            dim = 8,
            expand_ratio = 4,
            head_dim = 16,
            dilation = (1,2),
            random_seed = 82
            ):

        # Set global random seeds for reproducibility
        self._set_global_seeds(random_seed)

        super( CAGA, self).__init__()
        self.heads = heads
        self.dim = dim
        scale = dim
        self.scale = dim ** -0.5
        self.head_dim = head_dim
        self.dilation = dilation

        self.total_layer = 4

        # Reproducible layer initialization
        self.get_begin = self._init_depthwise_separable_conv(
            DepthWiseSeperableConvLayer(in_channels, self.heads * self.head_dim)
        )

        self.get_qkv = nn.ModuleList([
            nn.Sequential(
                self._init_conv(nn.Conv2d(
                    self.head_dim,
                    self.head_dim,
                    3,
                    groups=self.head_dim,
                    dilation=di
                )),
                self._init_conv(nn.Conv2d(self.head_dim, 3 * self.dim, 1, groups=1))
            )
            for di in dilation
        ])

        self.convert_to_headdim = self._init_conv(
            nn.Conv2d(len(dilation) * self.dim, self.head_dim, 1)
        )

        self.mix = nn.Sequential(
            self._init_conv(nn.Conv2d(
                self.dim,
                self.dim * 3,
                1
            )),
            nn.ReLU()
        )

        self.proj = self._init_conv(
            nn.Conv2d(self.heads * self.dim * len(self.dilation), in_channels, 1)
        )

        # Deterministic BatchNorm
        self.norm = nn.BatchNorm2d(num_features=in_channels, affine=True)
        nn.init.constant_(self.norm.weight, 1)
        nn.init.constant_(self.norm.bias, 0)

    def _set_global_seeds(self, seed):
        """Set seeds for reproducibility across libraries."""
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        np.random.seed(seed)
        random.seed(seed)

    def _init_conv(self, conv_layer):
        """Initialize convolutional layer weights deterministically."""
        nn.init.xavier_uniform_(conv_layer.weight)
        if conv_layer.bias is not None:
            nn.init.constant_(conv_layer.bias, 0)
        return conv_layer

    def _init_depthwise_separable_conv(self, conv_layer):
        """Initialize depthwise separable convolution layers."""
        # Assuming DepthWiseSeperableConvLayer has similar structure to standard conv
        for m in conv_layer.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
        return conv_layer
        # self.norm = nn.LayerNorm([8, 256, 14, 14])



    def attention(self,q,k,v , shape):

        B, C, H, W = shape



        q, k, v = q.float(), k.float(), v.float()


        q = q * self.scale
        att_map = q.transpose(-2, -1) @ k


        att_map = att_map.softmax(dim=-1)


        out = v @ att_map.transpose(-2, -1)

        out = out.view(B , -1 , H , W)



        return out

    def forward(self,x):
        B, C, H, W = x.shape



            # print(op)

        x_copy = x

        all_heads  = self.get_begin(x)

        multi_layer = all_heads.split([self.head_dim]*self.heads , dim=1)

        all_heads_after_op = [[]]*self.heads



        for j in range(self.heads):

            for op in self.get_qkv:

                all_heads_after_op[j].append(op(multi_layer[j]))


        all_final = []


        for i in range(self.heads):
            out_all = []
            for j in range(len(self.dilation ) ):



                q , k , v = all_heads_after_op[i][j].split([self.dim, self.dim, self.dim], dim=1)
                shape = q.shape
                q, k, v = q.flatten(2), k.flatten(2), v.flatten(2)
                out = self.attention(q , k , v , shape)



                # print(out.shape)
                shape_ahead = all_heads_after_op[i][j+1].shape[3]

                temp = torchvision.transforms.functional.resize(out , (shape_ahead,shape_ahead))


                # temp = self.mix[j+1](torch.reshape(out , (8,8, shape_ahead , shape_ahead))).clone()
                out = F.interpolate(out, size=( H , W ), mode='bilinear')
                out = out.view(B, self.dim, H, W)
                # print(self.mix[j+1](out).shape)

                if j+1 != len(self.dilation ):
                    # print(j , all_heads_after_op[i][j+1].shape)
                    temp = self.mix(temp).clone()
                    all_heads_after_op[i][j+1] = all_heads_after_op[i][j+1] + temp

                out_all.append(out)




            out_all_one = torch.cat(out_all, dim=1)
            all_final.append(out_all_one)
            if i+1 != self.heads:
                all_heads_after_op[i+1] += self.convert_to_headdim(out_all_one)


        # out = F.interpolate(x, size=( H , W ), mode='bilinear')

        # print("out1" , out.shape)

      # we need to billinear intterpolate before append
        all_concat = torch.cat(all_final, dim=1)


        x_final = self.proj(all_concat) + x_copy # try oncat later

        return self.norm (x_final)

In [None]:

def change_classifier(model, model_name, num_classes=2, dropout=0.5,
                     neurons1=4096, neurons2=1024, neurons3=256, neurons4=512, n_layers=2):
    """
    Change the classifier head of various vision models

    Args:
        model: The base model to modify
        model_name: Name/type of the model to determine input features
        num_classes: Number of output classes
        dropout: Dropout rate
        neurons1-4: Number of neurons in each layer
        n_layers: Number of layers in classifier (1-4)
    """
    # Define input features based on model architecture
    input_features = {
        'resnet101.a1_in1k': 2048,
        'deit3_medium_patch16_224': 512,
        'coatnet_1_rw_224.sw_in1k': 768,
        'mobilenetv3_large_100.ra_in1k': 1280,
        'vit_base_patch16_224' : 768,
        'efficientvit_l1.r224_in1k': 3072
    }

    in_features = input_features.get(model_name, 3072)  # Default to 3072 if model not found

    # Create the classifier based on number of layers
    if n_layers == 1:
        classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features, neurons1),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(neurons1, num_classes),
            nn.Sigmoid() if num_classes == 1 else nn.Softmax(dim=1)
        )

    elif n_layers == 2:
        classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features, neurons1),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout),
            nn.Linear(neurons1, neurons2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(neurons2, num_classes),
            nn.Sigmoid() if num_classes == 1 else nn.Softmax(dim=1)
        )

    elif n_layers == 3:
        classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features, neurons1),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout),
            nn.Linear(neurons1, neurons2),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout),
            nn.Linear(neurons2, neurons3),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(neurons3, num_classes),
            nn.Sigmoid() if num_classes == 1 else nn.Softmax(dim=1)
        )

    else:  # 4 layers
        classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features, neurons1),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout),
            nn.Linear(neurons1, neurons2),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout),
            nn.Linear(neurons2, neurons3),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(neurons3, neurons4),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(neurons4, num_classes),
            nn.Sigmoid() if num_classes == 1 else nn.Softmax(dim=1)
        )

    # Determine where to attach the classifier based on model type
    if hasattr(model, 'head'):
      if hasattr(model.head, 'classifier'):
        model.head.classifier = classifier

      elif hasattr(model.head, 'fc'):
        model.head.fc = classifier
      else:
         model.head = classifier
    elif hasattr(model, 'fc'):
      model.fc = classifier
    elif hasattr(model, 'classifier'):
      model.classifier = classifier



    else:
        raise AttributeError("Model structure not supported. Cannot find classifier or head attribute.")

    return model




In [None]:
HYPERPARAMS = {
    # Training parameters
    'lr': 0.0000648047274,
    'weight_decay': 0.01,
    
    # Model architecture
    'neurons1': 1024,
    'neurons2': 2048,
    'n_layers': 2,
    'dilation': (1, 2),  # Dilations for CAGA module
    'loss': 'FCE',  # Options: 'FocalCE', 'CE', 'BCE'
}

model = timm.create_model('efficientvit_l1.r224_in1k', pretrained=True)

replace_context_modules(model, MyModule , 82)
change_classifier(model, 'efficientvit_l1.r224_in1k', dropout=HYPERPARAMS['dropout'], neurons1=HYPERPARAMS['neurons1'], neurons2=HYPERPARAMS['neurons2'], neurons3=HYPERPARAMS['neurons3'], neurons4=HYPERPARAMS['neurons4'], n_layers=HYPERPARAMS['n_layers'])


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/211M [00:00<?, ?B/s]

EfficientVitLarge(
  (stem): Stem(
    (in_conv): ConvNormAct(
      (dropout): Dropout(p=0.0, inplace=False)
      (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (norm): BatchNorm2d(32, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True)
      (act): GELUTanh()
    )
    (res0): ResidualBlock(
      (pre_norm): Identity()
      (main): ConvBlock(
        (conv1): ConvNormAct(
          (dropout): Dropout(p=0.0, inplace=False)
          (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (norm): BatchNorm2d(32, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True)
          (act): GELUTanh()
        )
        (conv2): ConvNormAct(
          (dropout): Dropout(p=0.0, inplace=False)
          (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (norm): BatchNorm2d(32, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True)
      

In [None]:
model.load_state_dict(torch.load('/MSLD/GradCAM_CAGA_msld_0.996875_modelweights', weights_only=True , map_location=torch.device('cpu')))

<All keys matched successfully>

In [None]:
print(model.stages[-1])

EfficientVitLargeStage(
  (blocks): Sequential(
    (0): ResidualBlock(
      (pre_norm): Identity()
      (main): MBConv(
        (inverted_conv): ConvNormAct(
          (dropout): Dropout(p=0.0, inplace=False)
          (conv): Conv2d(256, 6144, kernel_size=(1, 1), stride=(1, 1))
          (norm): Identity()
          (act): GELUTanh()
        )
        (depth_conv): ConvNormAct(
          (dropout): Dropout(p=0.0, inplace=False)
          (conv): Conv2d(6144, 6144, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=6144)
          (norm): Identity()
          (act): GELUTanh()
        )
        (point_conv): ConvNormAct(
          (dropout): Dropout(p=0.0, inplace=False)
          (conv): Conv2d(6144, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm): BatchNorm2d(512, eps=1e-07, momentum=0.1, affine=True, track_running_stats=True)
          (act): Identity()
        )
      )
    )
    (1): EfficientVitBlock(
      (context_module): Sequential(
        (0):

0.07654052439721065


In [None]:
import torch
from pytorch_grad_cam import GradCAM, HiResCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM, FullGrad
import matplotlib.pyplot as plt
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
import torchvision.transforms as transforms
from pytorch_grad_cam.utils.image import show_cam_on_image
from PIL import Image
import numpy as np
import cv2
# 0 for Mpox
#1 is for other

def visualize_grad_cam(model, image_path, target_layers, mpox_dir):
    """
    Visualize Grad-CAM using Captum library

    Args:
    - model: PyTorch model
    - image_path: Path to input image
    - target_layer: Specific layer for gradient computation
    """
    # Image preprocessing
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])

    image_path = mpox_dir
    rgb_img = cv2.imread(image_path)
    rgb_img = cv2.cvtColor(rgb_img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
    rgb_img = rgb_img / 255.0

    # Load and preprocess image
    image = Image.open(image_path)
    input_tensor = transform(image).unsqueeze(0)

    with GradCAM(model=model, target_layers=target_layers) as cam:
      grayscale_cam = cam(input_tensor=input_tensor, targets=targets)
      grayscale_cam = grayscale_cam[0, :]
      visualization = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)
      # You can also get the model outputs without having to redo inference
      cam_output_path = '/M31_02_Gradcam.jpg'
      #cv2.imwrite('', visualization)
      cv2.imwrite(cam_output_path, visualization)
      model_outputs = cam.outputs

    return model_outputs

targets = [ClassifierOutputTarget(0)]
#images_tested
##Mpox
###M31_02_00
###M38_01_00
###M49_03_00

##Others
###NM42_01_00
###NM98_02_00
###NM99_02_00
# Required for citation in academic work
target_layer = model.stages
mpox_dir = '/data/Augmented Images MSLD/Augmented Images/Monkeypox_augmented/M31_02_00.jpg'
model_ouputs = visualize_grad_cam(model, mpox_dir, target_layer, mpox_dir)

In [None]:
model_ouputs

tensor([[5.7615e-11, 1.0000e+00]], grad_fn=<SoftmaxBackward0>)