#**Normal Conformer Model**

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Detect device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Feed Forward Network
class FeedForward(nn.Module):
    def __init__(self, dim, hidden_dim, dropout=0.1):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, dim),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        return self.net(x)

# Convolution Module
class ConvModule(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.pointwise_conv1 = nn.Conv1d(dim, 2 * dim, kernel_size=1)
        self.depthwise_conv = nn.Conv1d(dim, dim, kernel_size=3, padding=1, groups=dim)
        self.bn = nn.BatchNorm1d(dim)
        self.pointwise_conv2 = nn.Conv1d(dim, dim, kernel_size=1)
        self.activation = nn.SiLU()

    def forward(self, x):
        x = x.transpose(1, 2)  # [B, C, N] for 1D conv
        x = self.pointwise_conv1(x)
        x = self.activation(x)
        x = self.depthwise_conv(x[:, :x.shape[1]//2])  # half input
        x = self.bn(x)
        x = self.activation(x)
        x = self.pointwise_conv2(x)
        x = x.transpose(1, 2)
        return x

# Multi-Head Attention with LayerNorm
class MultiHeadAttention(nn.Module):
    def __init__(self, dim, heads=8, dropout=0.1):
        super().__init__()
        self.attn = nn.MultiheadAttention(embed_dim=dim, num_heads=heads, dropout=dropout, batch_first=True)
        self.norm = nn.LayerNorm(dim)

    def forward(self, x):
        x_norm = self.norm(x)
        attn_out, _ = self.attn(x_norm, x_norm, x_norm)
        return attn_out

# Conformer Block
class ConformerBlock(nn.Module):
    def __init__(self, dim, ff_dim, heads=8, dropout=0.1):
        super().__init__()
        self.ffn1 = FeedForward(dim, ff_dim, dropout)
        self.attn = MultiHeadAttention(dim, heads, dropout)
        self.conv = ConvModule(dim)
        self.ffn2 = FeedForward(dim, ff_dim, dropout)
        self.norm = nn.LayerNorm(dim)

    def forward(self, x):
        x = x + 0.5 * self.ffn1(x)
        x = x + self.attn(x)
        x = x + self.conv(x)
        x = x + 0.5 * self.ffn2(x)
        return self.norm(x)

# Vision Conformer Model
class VisionConformer(nn.Module):
    def __init__(self, img_size=32, patch_size=4, in_ch=3, num_classes=10, dim=128, depth=6, ff_dim=256, heads=8):
        super().__init__()
        self.patch_embed = nn.Conv2d(in_ch, dim, kernel_size=patch_size, stride=patch_size)
        num_patches = (img_size // patch_size) ** 2
        self.pos_embed = nn.Parameter(torch.randn(1, num_patches, dim))
        self.blocks = nn.Sequential(*[
            ConformerBlock(dim, ff_dim, heads) for _ in range(depth)
        ])
        self.classifier = nn.Linear(dim, num_classes)

    def forward(self, x):
        x = self.patch_embed(x)  # [B, C, H, W]
        x = x.flatten(2).transpose(1, 2)  # [B, N, C]
        x = x + self.pos_embed
        x = self.blocks(x)
        x = x.mean(dim=1)  # Global average pooling
        return self.classifier(x)


Using device: cpu


**Loading Dataset**

In [None]:
import os
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

data_dir = "/content/drive/MyDrive/B_Cancer"  # Change this

transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Adjust size as needed
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])  # Assuming grayscale; update if RGB
])

train_dataset = datasets.ImageFolder(os.path.join(data_dir, 'Train'), transform=transform)
val_dataset   = datasets.ImageFolder(os.path.join(data_dir, 'Val'), transform=transform)
test_dataset  = datasets.ImageFolder(os.path.join(data_dir, 'Test'), transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=32)
test_loader  = DataLoader(test_dataset, batch_size=32)

**Calling the model**

In [None]:
# For 128x128 images, patch size 16 is common
model = VisionConformer(
    img_size=128,
    patch_size=16,
    in_ch=3,  # Use 1 if grayscale
    num_classes=2,  # cancer or not
    dim=128,
    depth=6,
    ff_dim=256,
    heads=8
).to(device)

**Start the Training**

In [None]:
import torch.nn as nn
import torch

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

for epoch in range(5):  # Try more epochs if needed
    model.train()
    running_loss = 0.0
    correct, total = 0, 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    acc = 100 * correct / total
    print(f"Epoch {epoch+1} - Loss: {running_loss:.4f} - Train Acc: {acc:.2f}%")

Epoch 1 - Loss: 314.6966 - Train Acc: 78.70%
Epoch 2 - Loss: 282.8201 - Train Acc: 81.05%
Epoch 3 - Loss: 271.3221 - Train Acc: 81.92%
Epoch 4 - Loss: 264.0399 - Train Acc: 82.56%
Epoch 5 - Loss: 254.9180 - Train Acc: 83.22%


**Evaluating the model by seeing the test and val accuracy**

In [None]:
def evaluate(model, loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total

val_acc = evaluate(model, val_loader)
test_acc = evaluate(model, test_loader)
print(f"Validation Accuracy: {val_acc:.2f}%")
print(f"Test Accuracy: {test_acc:.2f}%")

Validation Accuracy: 81.25%
Test Accuracy: 81.83%
