<h1> 1. Imports </h1>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision
from torchvision import transforms
from torchvision import models
from torchvision.io import read_image

import os
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [2]:
def read_labels(root_dir: str, le: LabelEncoder):
    dict = {"filepath":[], "labels":[]}

    for root, _, files in os.walk(root_dir):
        for file in files:
            dict["filepath"].append(os.path.join(root, file))

            dict["labels"].append(root.split("\\")[-1])

    labels_df = pd.DataFrame(dict)
    labels_df['labels'] = le.fit_transform(labels_df['labels'])
    return labels_df

In [3]:
le = LabelEncoder()
labels_df = read_labels("data", le)

In [4]:
labels_df

Unnamed: 0,filepath,labels
0,data\Testing\glioma_tumor\image(1).jpg,0
1,data\Testing\glioma_tumor\image(10).jpg,0
2,data\Testing\glioma_tumor\image(100).jpg,0
3,data\Testing\glioma_tumor\image(11).jpg,0
4,data\Testing\glioma_tumor\image(12).jpg,0
...,...,...
3259,data\Training\pituitary_tumor\p (95).jpg,3
3260,data\Training\pituitary_tumor\p (96).jpg,3
3261,data\Training\pituitary_tumor\p (97).jpg,3
3262,data\Training\pituitary_tumor\p (98).jpg,3


In [5]:
class BrainTumorDataset(Dataset):
    def __init__(self, root_dir, labels_df, transform=None):
        self.root_dir = root_dir
        self.transform = transform

        self.labels_df = labels_df

    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        image = read_image(self.labels_df.iloc[idx, 0])
        label = self.labels_df[idx, 1]

        if self.transform:
            image = self.transform(image)
        
        return image, label

In [6]:
dataset_train = BrainTumorDataset("data/Training", labels_df)

dataset_test = BrainTumorDataset("data/Testing", labels_df)

In [None]:
class VGG_BrainTumorNet(nn.Module):
    def __init__(self, image_channels, output_classes):

        super(VGG_BrainTumorNet, self).__init__()

        self.image_channels = image_channels
        self.output_classes = output_classes

        self.conv_layers = [64, 64, "pool", 128, 128, "pool", 256, 256, 156, "pool", 512, 512, 512, "pool", 512, 512, 512, "pool"]

        self.model = self._build_architecture()

    def _build_architecture(self):
        layers = []
        num_max_pool = 0

        input_channels = self.image_channels

        # conv layers
        for x in self.conv_layers:
            if type(x) == int:
                output_channels = x

                layers += [
                    nn.Conv2d(input_channels, output_channels, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
                    nn.BatchNorm2d(output_channels),
                    nn.ReLU()
                    ]
            
            else:
                layers += [nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))]
                num_max_pool+=1

        
        conv_output_channels = (224 // (2**num_max_pool)) ** 2

        # dense
        layers += [
            nn.Flatten(),
            nn.Linear(512 * conv_output_channels, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, self.output_classes)
        ]

        return nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

In [7]:
m = models.vit_b_32(pretrained=True)

Downloading: "https://download.pytorch.org/models/vit_b_32-d86f8d99.pth" to C:\Users\6trze/.cache\torch\hub\checkpoints\vit_b_32-d86f8d99.pth


  0%|          | 0.00/337M [00:00<?, ?B/s]

In [18]:
print(m)

VisionTransformer(
  (conv_proj): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32))
  (encoder): Encoder(
    (dropout): Dropout(p=0.0, inplace=False)
    (layers): Sequential(
      (encoder_layer_0): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_attention): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
        )
        (dropout): Dropout(p=0.0, inplace=False)
        (ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): MLPBlock(
          (linear_1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU()
          (dropout_1): Dropout(p=0.0, inplace=False)
          (linear_2): Linear(in_features=3072, out_features=768, bias=True)
          (dropout_2): Dropout(p=0.0, inplace=False)
        )
      )
      (encoder_layer_1): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
 

In [None]:
class ViT_BrainTumor(nn.Module):
    def __init__(self, image_channels, output_classes):
        super(ViT_BrainTumor, self).__init__()

        self.image_channels = image_channels
        self.output_classes = output_classes

        model = models.vit_b_16(pretrained=True)

        self.conv_proj = model.conv_proj
        self.encoder = model.encoder
        self.heads = nn.Sequential(
            nn.Linear(768, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, self.output_classes)
        )

    def forward(self, x):
        x1 = self.conv_proj(x)
        x2 = self.encoder(x1)
        x3 = self.heads(x2)

        return x3