In [1]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.optim import AdamW
from transformers import ViTForImageClassification, ViTFeatureExtractor
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import numpy as np


  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [2]:
import torch
import torch.nn as nn
from torchvision.transforms import ToTensor, Resize, Normalize, Compose
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt


In [3]:
#Patch embedding layer
class PatchEmbedding(nn.Module):
    def __init__(self, img_size, patch_size, in_channels, embed_dim):
        super().__init__()
        self.patch_size = patch_size
        self.projection = nn.Conv2d(
            in_channels, embed_dim, kernel_size=patch_size, stride=patch_size
        )

    def forward(self, x):
        # Input: (batch_size, in_channels, img_size, img_size)
        x = self.projection(x)  # Output: (batch_size, embed_dim, n_patches, n_patches)
        x = x.flatten(2)  # Flatten patches
        x = x.transpose(1, 2)  # Rearrange for transformer input
        return x


In [4]:
#Multi head self attention
class MultiHeadSelfAttention(nn.Module):
    def __init__(self, embed_dim, num_heads):
        super().__init__()
        self.attention = nn.MultiheadAttention(embed_dim, num_heads)

    def forward(self, x):
        # x: (n_patches, batch_size, embed_dim)
        attn_output, _ = self.attention(x, x, x)
        return attn_output


In [5]:
#Transformer Encoder
class TransformerEncoder(nn.Module):
    def __init__(self, embed_dim, num_heads, ff_dim, dropout=0.1):
        super().__init__()
        self.attention = MultiHeadSelfAttention(embed_dim, num_heads)
        self.norm1 = nn.LayerNorm(embed_dim)
        self.feedforward = nn.Sequential(
            nn.Linear(embed_dim, ff_dim),
            nn.GELU(),
            nn.Linear(ff_dim, embed_dim)
        )
        self.norm2 = nn.LayerNorm(embed_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        # Self-attention
        attn_output = self.attention(x)
        x = self.norm1(x + self.dropout(attn_output))

        # Feedforward
        ff_output = self.feedforward(x)
        x = self.norm2(x + self.dropout(ff_output))
        return x


In [6]:
#Vision Transformer

In [7]:
class VisionTransformer(nn.Module):
    def __init__(self, img_size, patch_size, in_channels, num_classes, embed_dim, num_heads, ff_dim, num_layers):
        super().__init__()
        self.patch_embedding = PatchEmbedding(img_size, patch_size, in_channels, embed_dim)
        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
        self.positional_encoding = nn.Parameter(torch.zeros(1, (img_size // patch_size) ** 2 + 1, embed_dim))

        # Stack transformer encoders
        self.encoder_layers = nn.ModuleList([
            TransformerEncoder(embed_dim, num_heads, ff_dim)
            for _ in range(num_layers)
        ])

        self.mlp_head = nn.Sequential(
            nn.LayerNorm(embed_dim),
            nn.Linear(embed_dim, num_classes)
        )

    def forward(self, x):
        batch_size = x.size(0)
        x = self.patch_embedding(x)

        # Add CLS token
        cls_tokens = self.cls_token.expand(batch_size, -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)

        # Add positional encoding
        x += self.positional_encoding

        # Pass through transformer encoders
        x = x.permute(1, 0, 2)  # Rearrange for PyTorch multi-head attention
        for layer in self.encoder_layers:
            x = layer(x)
        x = x.permute(1, 0, 2)  # Rearrange back

        # Classification head
        cls_output = x[:, 0]
        return self.mlp_head(cls_output)


In [8]:
transform = Compose([
    Resize((224, 224)),
    ToTensor(),
    Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

train_dataset = ImageFolder('C:/Users/pc/Pictures/Brain_tumor/Training/', transform=transform)
test_dataset = ImageFolder('C:/Users/pc/Pictures/Brain_tumor/Testing', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [9]:
model = VisionTransformer(
    img_size=224, patch_size=16, in_channels=3, num_classes=4,
    embed_dim=768, num_heads=12, ff_dim=3072, num_layers=12
)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)


In [11]:
epochs = 5

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader):.4f}')


Epoch 1, Loss: 1.1931
Epoch 2, Loss: 1.1341
Epoch 3, Loss: 1.0895
Epoch 4, Loss: 1.0874
Epoch 5, Loss: 1.0670


In [12]:
model.eval()
correct, total = 0, 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {100 * correct / total:.2f}%')


Accuracy: 48.21%


In [13]:
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report


In [16]:
model.eval()  # Set the model to evaluation mode
all_preds = []
all_labels = []

with torch.no_grad():  # Disable gradient calculation during testing
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        # Store predictions and true labels
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Calculate metrics
precision = precision_score(all_labels, all_preds, average='weighted')
recall = recall_score(all_labels, all_preds, average='weighted')
f1 = f1_score(all_labels, all_preds, average='weighted')
accuracy = sum(np.array(all_preds) == np.array(all_labels)) / len(all_labels)

# Display results
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')

# Classification report for detailed metrics per class
print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=train_dataset.classes))


Accuracy: 0.4821
Precision: 0.5524
Recall: 0.4821
F1 Score: 0.4352

Classification Report:
              precision    recall  f1-score   support

      glioma       0.79      0.08      0.14       300
  meningioma       0.27      0.27      0.27       306
     notumor       0.66      0.67      0.67       405
   pituitary       0.45      0.86      0.59       300

    accuracy                           0.48      1311
   macro avg       0.54      0.47      0.42      1311
weighted avg       0.55      0.48      0.44      1311

