<a href="https://colab.research.google.com/github/OneFineStarstuff/State-of-the-Art/blob/main/Attention_Mechanisms_in_Computer_Vision.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class AttentionLayer(nn.Module):
    def __init__(self, input_dim):
        super(AttentionLayer, self).__init__()
        self.fc = nn.Linear(input_dim, input_dim)

    def forward(self, x):
        batch_size, num_channels, height, width = x.size()
        x = x.view(batch_size, num_channels, -1)  # Flatten the spatial dimensions
        attention_scores = F.softmax(self.fc(x), dim=-1)
        x = x * attention_scores
        return x.view(batch_size, num_channels, height, width)  # Restore the original dimensions

class CNNWithAttention(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_classes):
        super(CNNWithAttention, self).__init__()
        self.conv = nn.Conv2d(input_dim, hidden_dim, kernel_size=3, padding=1)
        self.attention = AttentionLayer(32 * 32)  # Adjusted to handle the spatial dimensions correctly
        self.fc = nn.Linear(hidden_dim * 32 * 32, num_classes)

    def forward(self, x):
        x = F.relu(self.conv(x))
        x = self.attention(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Example usage
model = CNNWithAttention(input_dim=3, hidden_dim=64, num_classes=10)
img = torch.randn(32, 3, 32, 32)  # Batch of 32 RGB images, 32x32 pixels
output = model(img)
print("CNN with Attention output shape:", output.shape)