In [55]:
import sys
sys.path.append("/kaggle/input/going-modular/going_modular")  

In [56]:
import engine


> > **fine tuned**

# 1 **FTVT_B 16**

In [4]:
import matplotlib.pyplot as plt
import torch
import torchvision
from torch import nn
from torchvision import transforms
from torchinfo import summary
import os
from torchvision import datasets
from torch.utils.data import DataLoader

# Set the device
device = "cuda" if torch.cuda.is_available() else "cpu"

def set_seeds(seed: int = 42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

# 1. Get pretrained weights for ViT-Base
pretrained_vit_weights = torchvision.models.ViT_B_16_Weights.DEFAULT 

# 2. Setup a ViT model instance with pretrained weights
pretrained_vit = torchvision.models.vit_b_16(weights=pretrained_vit_weights).to(device)

# 3. Freeze the base parameters
for parameter in pretrained_vit.parameters():
    parameter.requires_grad = False

# 4. Modify the classifier head with BatchNormalization and Dense layers
class_names = ['glioma_tumor', 'meningioma_tumor', 'no_tumor', 'pituitary_tumor']

set_seeds()
# Modify the classifier head
pretrained_vit.heads = nn.Sequential(
    nn.BatchNorm1d(768),
    nn.Linear(in_features=768, out_features=512),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(in_features=512, out_features=256),  # Experiment with different values
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(in_features=256, out_features=len(class_names))
).to(device)


Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to /root/.cache/torch/hub/checkpoints/vit_b_16-c867db91.pth
100%|██████████| 330M/330M [00:01<00:00, 243MB/s]  


In [5]:
# 5. Check the modified model summary
summary(model=pretrained_vit, 
        input_size=(32, 3, 224, 224),  # Adjust the input size according to your data
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
)




Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
VisionTransformer (VisionTransformer)                        [32, 3, 224, 224]    [32, 4]              768                  Partial
├─Conv2d (conv_proj)                                         [32, 3, 224, 224]    [32, 768, 14, 14]    (590,592)            False
├─Encoder (encoder)                                          [32, 197, 768]       [32, 197, 768]       151,296              False
│    └─Dropout (dropout)                                     [32, 197, 768]       [32, 197, 768]       --                   --
│    └─Sequential (layers)                                   [32, 197, 768]       [32, 197, 768]       --                   False
│    │    └─EncoderBlock (encoder_layer_0)                   [32, 197, 768]       [32, 197, 768]       (7,087,872)          False
│    │    └─EncoderBlock (encoder_layer_1)                   [32, 197, 768]       [32, 

In [6]:
# Setup directory paths to train and test images
train_dir = '/kaggle/input/brain-tumor-mri-dataset/Testing'
test_dir = '/kaggle/input/brain-tumor-mri-dataset/Testing'

# 6. Get automatic transforms from pretrained ViT weights
pretrained_vit_transforms = pretrained_vit_weights.transforms()

# 7. Create dataloaders with modified ViT model
NUM_WORKERS = os.cpu_count()

def create_dataloaders(
    train_dir: str, 
    test_dir: str, 
    transform: transforms.Compose, 
    batch_size: int, 
    num_workers: int=NUM_WORKERS
):

  # Use ImageFolder to create dataset(s)
  train_data = datasets.ImageFolder(train_dir, transform=transform)
  test_data = datasets.ImageFolder(test_dir, transform=transform)

  # Get class names
  class_names = train_data.classes

  # Turn images into data loaders
  train_dataloader = DataLoader(
      train_data,
      batch_size=batch_size,
      shuffle=True,
      num_workers=num_workers,
      pin_memory=True,
  )
  test_dataloader = DataLoader(
      test_data,
      batch_size=batch_size,
      shuffle=False,
      num_workers=num_workers,
      pin_memory=True,
  )

  return train_dataloader, test_dataloader, class_names

# 8. Setup dataloaders with modified ViT model
train_dataloader_pretrained, test_dataloader_pretrained, class_names = create_dataloaders(train_dir=train_dir,
                                                                                                     test_dir=test_dir,
                                                                                                     transform=pretrained_vit_transforms,
                                                                                                     batch_size=16)

# The rest of the code remains the same for training the modified model

In [7]:


# Create optimizer and loss function
optimizer = torch.optim.Adam(params=pretrained_vit.parameters(), 
                             lr=1e-4)
loss_fn = torch.nn.CrossEntropyLoss()


# Train the classifier head of the pretrained ViT feature extractor model
set_seeds()
pretrained_vit_results = engine.train(model=pretrained_vit,
                                      train_dataloader=train_dataloader_pretrained,
                                      test_dataloader=test_dataloader_pretrained,
                                      optimizer=optimizer,
                                      loss_fn=loss_fn,
                                      epochs=10,
                                      device=device)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.1573 | train_acc: 0.5387 | test_loss: 0.8272 | test_acc: 0.7545
Epoch: 2 | train_loss: 0.7526 | train_acc: 0.7301 | test_loss: 0.5241 | test_acc: 0.8398
Epoch: 3 | train_loss: 0.5569 | train_acc: 0.8093 | test_loss: 0.3765 | test_acc: 0.8795
Epoch: 4 | train_loss: 0.4249 | train_acc: 0.8506 | test_loss: 0.2877 | test_acc: 0.9138
Epoch: 5 | train_loss: 0.3631 | train_acc: 0.8718 | test_loss: 0.2288 | test_acc: 0.9298
Epoch: 6 | train_loss: 0.2938 | train_acc: 0.9024 | test_loss: 0.1846 | test_acc: 0.9404
Epoch: 7 | train_loss: 0.2642 | train_acc: 0.9070 | test_loss: 0.1490 | test_acc: 0.9595
Epoch: 8 | train_loss: 0.2425 | train_acc: 0.9152 | test_loss: 0.1229 | test_acc: 0.9665
Epoch: 9 | train_loss: 0.2020 | train_acc: 0.9276 | test_loss: 0.1019 | test_acc: 0.9756
Epoch: 10 | train_loss: 0.1963 | train_acc: 0.9397 | test_loss: 0.0820 | test_acc: 0.9809


In [8]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
# Evaluate the model on the testing dataset
def evaluate_model(model, test_dataloader):
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for images, labels in test_dataloader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())
    return y_true, y_pred

# Evaluate the model and get the true and predicted labels
y_true, y_pred = evaluate_model(pretrained_vit, test_dataloader_pretrained)

# Calculate metrics
accuracy = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average='weighted')
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)

# Print metrics and confusion matrix
print("Accuracy:", accuracy)
print("F1 Score:", f1)
print("Precision:", precision)
print("Recall:", recall)
print("Confusion Matrix:\n", conf_matrix)



Accuracy: 0.9809305873379099
F1 Score: 0.9809502294078271
Precision: 0.9810757276315637
Recall: 0.9809305873379099
Confusion Matrix:
 [[289   9   1   1]
 [  3 298   1   4]
 [  0   2 403   0]
 [  1   3   0 296]]


In [9]:
from sklearn.metrics import classification_report

# After training the model, make predictions on the test dataset
pretrained_vit.eval()
predictions = []
targets = []

with torch.no_grad():
    for images, labels in test_dataloader_pretrained:
        images, labels = images.to(device), labels.to(device)
        outputs = pretrained_vit(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())
        targets.extend(labels.cpu().numpy())

# Calculate classification report
report = classification_report(targets, predictions, target_names=class_names, digits=4)
print(report)


              precision    recall  f1-score   support

      glioma     0.9863    0.9633    0.9747       300
  meningioma     0.9551    0.9739    0.9644       306
     notumor     0.9951    0.9951    0.9951       405
   pituitary     0.9834    0.9867    0.9850       300

    accuracy                         0.9809      1311
   macro avg     0.9800    0.9797    0.9798      1311
weighted avg     0.9811    0.9809    0.9810      1311



In [10]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score

# After training the model, make predictions on the test dataset
pretrained_vit.eval()
predictions = []
targets = []

# Store misclassified images
misclassified_images = []

with torch.no_grad():
    for images, labels in test_dataloader_pretrained:
        images, labels = images.to(device), labels.to(device)
        outputs = pretrained_vit(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())
        targets.extend(labels.cpu().numpy())
        
        # Check for misclassified images
        misclassified_indices = (predicted != labels).nonzero()
        misclassified_images.extend([images[idx].cpu() for idx in misclassified_indices])

# Convert targets to integers
targets_int = [int(target) for target in targets]

# Calculate accuracy
accuracy = accuracy_score(targets_int, predictions)
print("Overall Accuracy:", accuracy)

# Calculate test size for each class
test_size_per_class = [sum(1 for target in targets_int if target == i) for i in range(len(class_names))]

# Print misclassification information for each class
for i, class_name in enumerate(class_names):
    num_misclassified_class = sum(1 for label, prediction in zip(targets_int, predictions) if label == i and label != prediction)
    print(f"Accuracy for class '{class_name}': {accuracy_score([1 if target == i else 0 for target in targets_int], [1 if prediction == i else 0 for prediction in predictions])}")
    print(f"Number of Misclassified Images in class '{class_name}': {num_misclassified_class}/{test_size_per_class[i]}")


Overall Accuracy: 0.9809305873379099
Accuracy for class 'glioma': 0.988558352402746
Number of Misclassified Images in class 'glioma': 11/300
Accuracy for class 'meningioma': 0.9832189168573608
Number of Misclassified Images in class 'meningioma': 8/306
Accuracy for class 'notumor': 0.9969488939740656
Number of Misclassified Images in class 'notumor': 2/405
Accuracy for class 'pituitary': 0.9931350114416476
Number of Misclassified Images in class 'pituitary': 4/300


# 2 **FTVT_B_32**

In [11]:
import matplotlib.pyplot as plt
import torch
import torchvision
from torch import nn
from torchvision import transforms
from torchinfo import summary
import os
from torchvision import datasets
from torch.utils.data import DataLoader

# Set the device
device = "cuda" if torch.cuda.is_available() else "cpu"

def set_seeds(seed: int = 42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

# 1. Get pretrained weights for ViT-Base
pretrained_vit_weights = torchvision.models.ViT_B_32_Weights.DEFAULT 

# 2. Setup a ViT model instance with pretrained weights
pretrained_vit = torchvision.models.vit_b_32(weights=pretrained_vit_weights).to(device)

# 3. Freeze the base parameters
for parameter in pretrained_vit.parameters():
    parameter.requires_grad = False

# 4. Modify the classifier head with BatchNormalization and Dense layers
class_names = ['glioma_tumor', 'meningioma_tumor', 'no_tumor', 'pituitary_tumor']

set_seeds()
# Modify the classifier head with more layers or nodes
pretrained_vit.heads = nn.Sequential(
    nn.BatchNorm1d(768),
    nn.Linear(in_features=768, out_features=512),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(in_features=512, out_features=256),  # Experiment with different values
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(in_features=256, out_features=len(class_names))
).to(device)




Downloading: "https://download.pytorch.org/models/vit_b_32-d86f8d99.pth" to /root/.cache/torch/hub/checkpoints/vit_b_32-d86f8d99.pth
100%|██████████| 337M/337M [00:01<00:00, 245MB/s]  


In [12]:
# Assuming input size is (3, 224, 224), adjust it based on your actual input size
#print(pretrained_vit)
# 5. Check the modified model summary
summary(model=pretrained_vit, 
        input_size=(32, 3, 224, 224),  # Adjust the input size according to your data
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
)


Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
VisionTransformer (VisionTransformer)                        [32, 3, 224, 224]    [32, 4]              768                  Partial
├─Conv2d (conv_proj)                                         [32, 3, 224, 224]    [32, 768, 7, 7]      (2,360,064)          False
├─Encoder (encoder)                                          [32, 50, 768]        [32, 50, 768]        38,400               False
│    └─Dropout (dropout)                                     [32, 50, 768]        [32, 50, 768]        --                   --
│    └─Sequential (layers)                                   [32, 50, 768]        [32, 50, 768]        --                   False
│    │    └─EncoderBlock (encoder_layer_0)                   [32, 50, 768]        [32, 50, 768]        (7,087,872)          False
│    │    └─EncoderBlock (encoder_layer_1)                   [32, 50, 768]        [32, 

In [13]:
# Setup directory paths to train and test images
train_dir = '/kaggle/input/brain-tumor-mri-dataset/Testing'
test_dir = '/kaggle/input/brain-tumor-mri-dataset/Testing'

# 6. Get automatic transforms from pretrained ViT weights
pretrained_vit_transforms = pretrained_vit_weights.transforms()

# 7. Create dataloaders with modified ViT model
NUM_WORKERS = os.cpu_count()

def create_dataloaders(
    train_dir: str, 
    test_dir: str, 
    transform: transforms.Compose, 
    batch_size: int, 
    num_workers: int=NUM_WORKERS
):

  # Use ImageFolder to create dataset(s)
  train_data = datasets.ImageFolder(train_dir, transform=transform)
  test_data = datasets.ImageFolder(test_dir, transform=transform)

  # Get class names
  class_names = train_data.classes

  # Turn images into data loaders
  train_dataloader = DataLoader(
      train_data,
      batch_size=batch_size,
      shuffle=True,
      num_workers=num_workers,
      pin_memory=True,
  )
  test_dataloader = DataLoader(
      test_data,
      batch_size=batch_size,
      shuffle=False,
      num_workers=num_workers,
      pin_memory=True,
  )

  return train_dataloader, test_dataloader, class_names

# 8. Setup dataloaders with modified ViT model
train_dataloader_pretrained, test_dataloader_pretrained, class_names = create_dataloaders(train_dir=train_dir,
                                                                                                     test_dir=test_dir,
                                                                                                     transform=pretrained_vit_transforms,
                                                                                                     batch_size=16)

# The rest of the code remains the same for training the modified model

In [14]:

# Create optimizer and loss function
optimizer = torch.optim.Adam(params=pretrained_vit.parameters(), 
                             lr=1e-4)
loss_fn = torch.nn.CrossEntropyLoss()


# Train the classifier head of the pretrained ViT feature extractor model
set_seeds()
pretrained_vit_results = engine.train(model=pretrained_vit,
                                      train_dataloader=train_dataloader_pretrained,
                                      test_dataloader=test_dataloader_pretrained,
                                      optimizer=optimizer,
                                      loss_fn=loss_fn,
                                      epochs=10,
                                      device=device)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.1332 | train_acc: 0.5684 | test_loss: 0.8166 | test_acc: 0.7667
Epoch: 2 | train_loss: 0.7624 | train_acc: 0.7301 | test_loss: 0.5450 | test_acc: 0.8269
Epoch: 3 | train_loss: 0.5879 | train_acc: 0.7979 | test_loss: 0.4127 | test_acc: 0.8528
Epoch: 4 | train_loss: 0.4625 | train_acc: 0.8330 | test_loss: 0.3266 | test_acc: 0.8947
Epoch: 5 | train_loss: 0.4242 | train_acc: 0.8458 | test_loss: 0.2730 | test_acc: 0.9115
Epoch: 6 | train_loss: 0.3444 | train_acc: 0.8780 | test_loss: 0.2295 | test_acc: 0.9275
Epoch: 7 | train_loss: 0.3141 | train_acc: 0.8810 | test_loss: 0.1889 | test_acc: 0.9435
Epoch: 8 | train_loss: 0.2903 | train_acc: 0.8955 | test_loss: 0.1602 | test_acc: 0.9557
Epoch: 9 | train_loss: 0.2487 | train_acc: 0.9116 | test_loss: 0.1360 | test_acc: 0.9649
Epoch: 10 | train_loss: 0.2446 | train_acc: 0.9138 | test_loss: 0.1181 | test_acc: 0.9687


In [15]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
# Evaluate the model on the testing dataset
def evaluate_model(model, test_dataloader):
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for images, labels in test_dataloader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())
    return y_true, y_pred

# Evaluate the model and get the true and predicted labels
y_true, y_pred = evaluate_model(pretrained_vit, test_dataloader_pretrained)

# Calculate metrics
accuracy = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average='weighted')
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)

# Print metrics and confusion matrix
print("Accuracy:", accuracy)
print("F1 Score:", f1)
print("Precision:", precision)
print("Recall:", recall)
print("Confusion Matrix:\n", conf_matrix)

Accuracy: 0.9687261632341724
F1 Score: 0.9688457096046929
Precision: 0.9697962923425875
Recall: 0.9687261632341724
Confusion Matrix:
 [[278  22   0   0]
 [  2 295   3   6]
 [  0   1 404   0]
 [  1   6   0 293]]


In [16]:
from sklearn.metrics import classification_report
from sklearn.metrics import classification_report

# After training the model, make predictions on the test dataset
pretrained_vit.eval()
predictions = []
targets = []

with torch.no_grad():
    for images, labels in test_dataloader_pretrained:
        images, labels = images.to(device), labels.to(device)
        outputs = pretrained_vit(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())
        targets.extend(labels.cpu().numpy())

# Calculate classification report
report = classification_report(targets, predictions, target_names=class_names, digits=4)
print(report)


              precision    recall  f1-score   support

      glioma     0.9893    0.9267    0.9570       300
  meningioma     0.9105    0.9641    0.9365       306
     notumor     0.9926    0.9975    0.9951       405
   pituitary     0.9799    0.9767    0.9783       300

    accuracy                         0.9687      1311
   macro avg     0.9681    0.9662    0.9667      1311
weighted avg     0.9698    0.9687    0.9688      1311



In [17]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score

# After training the model, make predictions on the test dataset
pretrained_vit.eval()
predictions = []
targets = []

# Store misclassified images
misclassified_images = []

with torch.no_grad():
    for images, labels in test_dataloader_pretrained:
        images, labels = images.to(device), labels.to(device)
        outputs = pretrained_vit(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())
        targets.extend(labels.cpu().numpy())
        
        # Check for misclassified images
        misclassified_indices = (predicted != labels).nonzero()
        misclassified_images.extend([images[idx].cpu() for idx in misclassified_indices])

# Convert targets to integers
targets_int = [int(target) for target in targets]

# Calculate accuracy
accuracy = accuracy_score(targets_int, predictions)
print("Overall Accuracy:", accuracy)

# Calculate test size for each class
test_size_per_class = [sum(1 for target in targets_int if target == i) for i in range(len(class_names))]

# Print misclassification information for each class
for i, class_name in enumerate(class_names):
    num_misclassified_class = sum(1 for label, prediction in zip(targets_int, predictions) if label == i and label != prediction)
    print(f"Accuracy for class '{class_name}': {accuracy_score([1 if target == i else 0 for target in targets_int], [1 if prediction == i else 0 for prediction in predictions])}")
    print(f"Number of Misclassified Images in class '{class_name}': {num_misclassified_class}/{test_size_per_class[i]}")

Overall Accuracy: 0.9687261632341724
Accuracy for class 'glioma': 0.9809305873379099
Number of Misclassified Images in class 'glioma': 22/300
Accuracy for class 'meningioma': 0.969488939740656
Number of Misclassified Images in class 'meningioma': 11/306
Accuracy for class 'notumor': 0.9969488939740656
Number of Misclassified Images in class 'notumor': 1/405
Accuracy for class 'pituitary': 0.9900839054157132
Number of Misclassified Images in class 'pituitary': 7/300


# 3 FTVT_L_32

In [18]:
import matplotlib.pyplot as plt
import torch
import torchvision
from torch import nn
from torchvision import transforms
from torchinfo import summary
import os
from torchvision import datasets
from torch.utils.data import DataLoader

# Set the device
device = "cuda" if torch.cuda.is_available() else "cpu"

def set_seeds(seed: int = 42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

# 1. Get pretrained weights for ViT-Base
pretrained_vit_weights = torchvision.models.ViT_L_32_Weights.DEFAULT 

# 2. Setup a ViT model instance with pretrained weights
pretrained_vit = torchvision.models.vit_l_32(weights=pretrained_vit_weights).to(device)

# 3. Freeze the base parameters
for parameter in pretrained_vit.parameters():
    parameter.requires_grad = False

# 4. Modify the classifier head with BatchNormalization and Dense layers
class_names = ['glioma_tumor', 'meningioma_tumor', 'no_tumor', 'pituitary_tumor']

set_seeds()
# Modify the classifier head with more layers or nodes
pretrained_vit.heads = nn.Sequential(
    nn.BatchNorm1d(1024),
    nn.Linear(in_features=1024, out_features=512),  # Adjust input size to match the previous layer's output size
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(in_features=512, out_features=256),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(in_features=256, out_features=len(class_names))
).to(device)


Downloading: "https://download.pytorch.org/models/vit_l_32-c7638314.pth" to /root/.cache/torch/hub/checkpoints/vit_l_32-c7638314.pth
100%|██████████| 1.14G/1.14G [00:05<00:00, 221MB/s] 


In [19]:
# Assuming input size is (3, 224, 224), adjust it based on your actual input size
#print(pretrained_vit)
# 5. Check the modified model summary
summary(model=pretrained_vit, 
        input_size=(32, 3, 224, 224),  # Adjust the input size according to your data
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
)


Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
VisionTransformer (VisionTransformer)                        [32, 3, 224, 224]    [32, 4]              1,024                Partial
├─Conv2d (conv_proj)                                         [32, 3, 224, 224]    [32, 1024, 7, 7]     (3,146,752)          False
├─Encoder (encoder)                                          [32, 50, 1024]       [32, 50, 1024]       51,200               False
│    └─Dropout (dropout)                                     [32, 50, 1024]       [32, 50, 1024]       --                   --
│    └─Sequential (layers)                                   [32, 50, 1024]       [32, 50, 1024]       --                   False
│    │    └─EncoderBlock (encoder_layer_0)                   [32, 50, 1024]       [32, 50, 1024]       (12,596,224)         False
│    │    └─EncoderBlock (encoder_layer_1)                   [32, 50, 1024]       [32, 

In [20]:
# Setup directory paths to train and test images
train_dir = '/kaggle/input/brain-tumor-mri-dataset/Testing'
test_dir = '/kaggle/input/brain-tumor-mri-dataset/Testing'

# 6. Get automatic transforms from pretrained ViT weights
pretrained_vit_transforms = pretrained_vit_weights.transforms()

# 7. Create dataloaders with modified ViT model
NUM_WORKERS = os.cpu_count()

def create_dataloaders(
    train_dir: str, 
    test_dir: str, 
    transform: transforms.Compose, 
    batch_size: int, 
    num_workers: int=NUM_WORKERS
):

  # Use ImageFolder to create dataset(s)
  train_data = datasets.ImageFolder(train_dir, transform=transform)
  test_data = datasets.ImageFolder(test_dir, transform=transform)

  # Get class names
  class_names = train_data.classes

  # Turn images into data loaders
  train_dataloader = DataLoader(
      train_data,
      batch_size=batch_size,
      shuffle=True,
      num_workers=num_workers,
      pin_memory=True,
  )
  test_dataloader = DataLoader(
      test_data,
      batch_size=batch_size,
      shuffle=False,
      num_workers=num_workers,
      pin_memory=True,
  )

  return train_dataloader, test_dataloader, class_names

# 8. Setup dataloaders with modified ViT model
train_dataloader_pretrained, test_dataloader_pretrained, class_names = create_dataloaders(train_dir=train_dir,
                                                                                                     test_dir=test_dir,
                                                                                                     transform=pretrained_vit_transforms,
                                                                                                     batch_size=16)

# The rest of the code remains the same for training the modified model

In [21]:

# Create optimizer and loss function
optimizer = torch.optim.Adam(params=pretrained_vit.parameters(), 
                             lr=1e-4)
loss_fn = torch.nn.CrossEntropyLoss()


# Train the classifier head of the pretrained ViT feature extractor model
set_seeds()
pretrained_vit_results = engine.train(model=pretrained_vit,
                                      train_dataloader=train_dataloader_pretrained,
                                      test_dataloader=test_dataloader_pretrained,
                                      optimizer=optimizer,
                                      loss_fn=loss_fn,
                                      epochs=10,
                                      device=device)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.1240 | train_acc: 0.5578 | test_loss: 0.8084 | test_acc: 0.7718
Epoch: 2 | train_loss: 0.7541 | train_acc: 0.7323 | test_loss: 0.5376 | test_acc: 0.8391
Epoch: 3 | train_loss: 0.5640 | train_acc: 0.8101 | test_loss: 0.3881 | test_acc: 0.8680
Epoch: 4 | train_loss: 0.4394 | train_acc: 0.8360 | test_loss: 0.3003 | test_acc: 0.8993
Epoch: 5 | train_loss: 0.3803 | train_acc: 0.8588 | test_loss: 0.2360 | test_acc: 0.9306
Epoch: 6 | train_loss: 0.3067 | train_acc: 0.9024 | test_loss: 0.1921 | test_acc: 0.9436
Epoch: 7 | train_loss: 0.2604 | train_acc: 0.9092 | test_loss: 0.1479 | test_acc: 0.9588
Epoch: 8 | train_loss: 0.2418 | train_acc: 0.9169 | test_loss: 0.1183 | test_acc: 0.9710
Epoch: 9 | train_loss: 0.2111 | train_acc: 0.9314 | test_loss: 0.0940 | test_acc: 0.9809
Epoch: 10 | train_loss: 0.1935 | train_acc: 0.9344 | test_loss: 0.0750 | test_acc: 0.9863


In [22]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
# Evaluate the model on the testing dataset
def evaluate_model(model, test_dataloader):
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for images, labels in test_dataloader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())
    return y_true, y_pred

# Evaluate the model and get the true and predicted labels
y_true, y_pred = evaluate_model(pretrained_vit, test_dataloader_pretrained)

# Calculate metrics
accuracy = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average='weighted')
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)

# Print metrics and confusion matrix
print("Accuracy:", accuracy)
print("F1 Score:", f1)
print("Precision:", precision)
print("Recall:", recall)
print("Confusion Matrix:\n", conf_matrix)

Accuracy: 0.9862700228832952
F1 Score: 0.986287309838429
Precision: 0.9863661338039442
Recall: 0.9862700228832952
Confusion Matrix:
 [[292   8   0   0]
 [  2 300   1   3]
 [  0   1 404   0]
 [  1   2   0 297]]


In [23]:
from sklearn.metrics import classification_report

# After training the model, make predictions on the test dataset
pretrained_vit.eval()
predictions = []
targets = []

with torch.no_grad():
    for images, labels in test_dataloader_pretrained:
        images, labels = images.to(device), labels.to(device)
        outputs = pretrained_vit(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())
        targets.extend(labels.cpu().numpy())

# Calculate classification report
report = classification_report(targets, predictions, target_names=class_names, digits=4)
print(report)


              precision    recall  f1-score   support

      glioma     0.9898    0.9733    0.9815       300
  meningioma     0.9646    0.9804    0.9724       306
     notumor     0.9975    0.9975    0.9975       405
   pituitary     0.9900    0.9900    0.9900       300

    accuracy                         0.9863      1311
   macro avg     0.9855    0.9853    0.9854      1311
weighted avg     0.9864    0.9863    0.9863      1311



In [24]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score

# After training the model, make predictions on the test dataset
pretrained_vit.eval()
predictions = []
targets = []

# Store misclassified images
misclassified_images = []

with torch.no_grad():
    for images, labels in test_dataloader_pretrained:
        images, labels = images.to(device), labels.to(device)
        outputs = pretrained_vit(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())
        targets.extend(labels.cpu().numpy())
        
        # Check for misclassified images
        misclassified_indices = (predicted != labels).nonzero()
        misclassified_images.extend([images[idx].cpu() for idx in misclassified_indices])

# Convert targets to integers
targets_int = [int(target) for target in targets]

# Calculate accuracy
accuracy = accuracy_score(targets_int, predictions)
print("Overall Accuracy:", accuracy)

# Calculate test size for each class
test_size_per_class = [sum(1 for target in targets_int if target == i) for i in range(len(class_names))]

# Print misclassification information for each class
for i, class_name in enumerate(class_names):
    num_misclassified_class = sum(1 for label, prediction in zip(targets_int, predictions) if label == i and label != prediction)
    print(f"Accuracy for class '{class_name}': {accuracy_score([1 if target == i else 0 for target in targets_int], [1 if prediction == i else 0 for prediction in predictions])}")
    print(f"Number of Misclassified Images in class '{class_name}': {num_misclassified_class}/{test_size_per_class[i]}")

Overall Accuracy: 0.9862700228832952
Accuracy for class 'glioma': 0.9916094584286804
Number of Misclassified Images in class 'glioma': 8/300
Accuracy for class 'meningioma': 0.9870327993897788
Number of Misclassified Images in class 'meningioma': 6/306
Accuracy for class 'notumor': 0.9984744469870328
Number of Misclassified Images in class 'notumor': 1/405
Accuracy for class 'pituitary': 0.9954233409610984
Number of Misclassified Images in class 'pituitary': 3/300


# 4 **FTVT_L_16**

In [57]:
import matplotlib.pyplot as plt
import torch

import torchvision
from torch import nn
from torchvision import transforms
from torchinfo import summary
import os
from torchvision import datasets
from torch.utils.data import DataLoader

# Set the device
device = "cuda" if torch.cuda.is_available() else "cpu"

def set_seeds(seed: int = 42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

# 1. Get pretrained weights for ViT-Base
pretrained_vit_weights = torchvision.models.ViT_L_16_Weights.DEFAULT 

# 2. Setup a ViT model instance with pretrained weights
pretrained_vit = torchvision.models.vit_l_16(weights=pretrained_vit_weights).to(device)

# 3. Freeze the base parameters
for parameter in pretrained_vit.parameters():
    parameter.requires_grad = False

# 4. Modify the classifier head with BatchNormalization and Dense layers
class_names = ['glioma_tumor', 'meningioma_tumor', 'no_tumor', 'pituitary_tumor']

set_seeds()
# Modify the classifier head with more layers or nodes
pretrained_vit.heads = nn.Sequential(
    nn.BatchNorm1d(1024),
    nn.Linear(in_features=1024, out_features=512),  # Adjust input size to match the previous layer's output size
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(in_features=512, out_features=256),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(in_features=256, out_features=len(class_names))
).to(device)


Downloading: "https://download.pytorch.org/models/vit_l_16-852ce7e3.pth" to /root/.cache/torch/hub/checkpoints/vit_l_16-852ce7e3.pth
100%|██████████| 1.13G/1.13G [00:05<00:00, 220MB/s] 


In [58]:
# Assuming input size is (3, 224, 224), adjust it based on your actual input size
#print(pretrained_vit)
# 5. Check the modified model summary
summary(model=pretrained_vit, 
        input_size=(32, 3, 224, 224),  # Adjust the input size according to your data
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
)


Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
VisionTransformer (VisionTransformer)                        [32, 3, 224, 224]    [32, 4]              1,024                Partial
├─Conv2d (conv_proj)                                         [32, 3, 224, 224]    [32, 1024, 14, 14]   (787,456)            False
├─Encoder (encoder)                                          [32, 197, 1024]      [32, 197, 1024]      201,728              False
│    └─Dropout (dropout)                                     [32, 197, 1024]      [32, 197, 1024]      --                   --
│    └─Sequential (layers)                                   [32, 197, 1024]      [32, 197, 1024]      --                   False
│    │    └─EncoderBlock (encoder_layer_0)                   [32, 197, 1024]      [32, 197, 1024]      (12,596,224)         False
│    │    └─EncoderBlock (encoder_layer_1)                   [32, 197, 1024]      [32, 

In [59]:
# Setup directory paths to train and test images
train_dir = '/kaggle/input/brain-tumor-mri-dataset/Testing'
test_dir = '/kaggle/input/brain-tumor-mri-dataset/Testing'

# 6. Get automatic transforms from pretrained ViT weights
pretrained_vit_transforms = pretrained_vit_weights.transforms()

# 7. Create dataloaders with modified ViT model
NUM_WORKERS = os.cpu_count()

def create_dataloaders(
    train_dir: str, 
    test_dir: str, 
    transform: transforms.Compose, 
    batch_size: int, 
    num_workers: int=NUM_WORKERS
):

  # Use ImageFolder to create dataset(s)
  train_data = datasets.ImageFolder(train_dir, transform=transform)
  test_data = datasets.ImageFolder(test_dir, transform=transform)

  # Get class names
  class_names = train_data.classes

  # Turn images into data loaders
  train_dataloader = DataLoader(
      train_data,
      batch_size=batch_size,
      shuffle=True,
      num_workers=num_workers,
      pin_memory=True,
  )
  test_dataloader = DataLoader(
      test_data,
      batch_size=batch_size,
      shuffle=False,
      num_workers=num_workers,
      pin_memory=True,
  )

  return train_dataloader, test_dataloader, class_names

# 8. Setup dataloaders with modified ViT model
train_dataloader_pretrained, test_dataloader_pretrained, class_names = create_dataloaders(train_dir=train_dir,
                                                                                                     test_dir=test_dir,
                                                                                                     transform=pretrained_vit_transforms,
                                                                                                     batch_size=16)

# The rest of the code remains the same for training the modified model


In [60]:

# Create optimizer and loss function
optimizer = torch.optim.Adam(params=pretrained_vit.parameters(), 
                             lr=1e-4)
loss_fn = torch.nn.CrossEntropyLoss()


# Train the classifier head of the pretrained ViT feature extractor model
set_seeds()
pretrained_vit_results = engine.train(model=pretrained_vit,
                                      train_dataloader=train_dataloader_pretrained,
                                      test_dataloader=test_dataloader_pretrained,
                                      optimizer=optimizer,
                                 
                                      loss_fn=loss_fn,
                                      epochs=10,
                                      device=device)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0826 | train_acc: 0.6111 | test_loss: 0.7311 | test_acc: 0.8034
Epoch: 2 | train_loss: 0.6639 | train_acc: 0.7659 | test_loss: 0.4291 | test_acc: 0.8825
Epoch: 3 | train_loss: 0.4629 | train_acc: 0.8514 | test_loss: 0.2944 | test_acc: 0.9138
Epoch: 4 | train_loss: 0.3564 | train_acc: 0.8871 | test_loss: 0.2173 | test_acc: 0.9351
Epoch: 5 | train_loss: 0.2985 | train_acc: 0.9062 | test_loss: 0.1678 | test_acc: 0.9526
Epoch: 6 | train_loss: 0.2337 | train_acc: 0.9215 | test_loss: 0.1335 | test_acc: 0.9657
Epoch: 7 | train_loss: 0.2131 | train_acc: 0.9199 | test_loss: 0.1074 | test_acc: 0.9710
Epoch: 8 | train_loss: 0.1870 | train_acc: 0.9352 | test_loss: 0.0873 | test_acc: 0.9763
Epoch: 9 | train_loss: 0.1688 | train_acc: 0.9512 | test_loss: 0.0646 | test_acc: 0.9847
Epoch: 10 | train_loss: 0.1563 | train_acc: 0.9550 | test_loss: 0.0532 | test_acc: 0.9870


In [61]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
# Evaluate the model on the testing dataset
def evaluate_model(model, test_dataloader):
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for images, labels in test_dataloader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())
    return y_true, y_pred

# Evaluate the model and get the true and predicted labels
y_true, y_pred = evaluate_model(pretrained_vit, test_dataloader_pretrained)

# Calculate metrics
accuracy = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average='weighted')
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)

# Print metrics and confusion matrix
print("Accuracy:", accuracy)
print("F1 Score:", f1)
print("Precision:", precision)
print("Recall:", recall)
print("Confusion Matrix:\n", conf_matrix)



Accuracy: 0.9870327993897788
F1 Score: 0.9870232372572859
Precision: 0.9870554527448101
Recall: 0.9870327993897788
Confusion Matrix:
 [[295   4   0   1]
 [  3 297   1   5]
 [  0   2 403   0]
 [  0   1   0 299]]


In [30]:
from sklearn.metrics import classification_report

# After training the model, make predictions on the test dataset
pretrained_vit.eval()
predictions = []
targets = []

with torch.no_grad():
    for images, labels in test_dataloader_pretrained:
        images, labels = images.to(device), labels.to(device)
        outputs = pretrained_vit(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())
        targets.extend(labels.cpu().numpy())

# Calculate classification report
report = classification_report(targets, predictions, target_names=class_names, digits=4)
print(report)


              precision    recall  f1-score   support

      glioma     0.9899    0.9833    0.9866       300
  meningioma     0.9770    0.9706    0.9738       306
     notumor     0.9975    0.9951    0.9963       405
   pituitary     0.9803    0.9967    0.9884       300

    accuracy                         0.9870      1311
   macro avg     0.9862    0.9864    0.9863      1311
weighted avg     0.9871    0.9870    0.9870      1311



In [31]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score

# After training the model, make predictions on the test dataset
pretrained_vit.eval()
predictions = []
targets = []

# Store misclassified images
misclassified_images = []

with torch.no_grad():
    for images, labels in test_dataloader_pretrained:
        images, labels = images.to(device), labels.to(device)
        outputs = pretrained_vit(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())
        targets.extend(labels.cpu().numpy())
        
        # Check for misclassified images
        misclassified_indices = (predicted != labels).nonzero()
        misclassified_images.extend([images[idx].cpu() for idx in misclassified_indices])

# Convert targets to integers
targets_int = [int(target) for target in targets]

# Calculate accuracy
accuracy = accuracy_score(targets_int, predictions)
print("Overall Accuracy:", accuracy)

# Calculate test size for each class
test_size_per_class = [sum(1 for target in targets_int if target == i) for i in range(len(class_names))]

# Print misclassification information for each class
for i, class_name in enumerate(class_names):
    num_misclassified_class = sum(1 for label, prediction in zip(targets_int, predictions) if label == i and label != prediction)
    print(f"Accuracy for class '{class_name}': {accuracy_score([1 if target == i else 0 for target in targets_int], [1 if prediction == i else 0 for prediction in predictions])}")
    print(f"Number of Misclassified Images in class '{class_name}': {num_misclassified_class}/{test_size_per_class[i]}")

Overall Accuracy: 0.9870327993897788
Accuracy for class 'glioma': 0.9938977879481312
Number of Misclassified Images in class 'glioma': 5/300
Accuracy for class 'meningioma': 0.9877955758962624
Number of Misclassified Images in class 'meningioma': 9/306
Accuracy for class 'notumor': 0.9977116704805492
Number of Misclassified Images in class 'notumor': 2/405
Accuracy for class 'pituitary': 0.9946605644546148
Number of Misclassified Images in class 'pituitary': 1/300
