In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [10]:
!pip install medmnist

Collecting medmnist
  Downloading medmnist-3.0.2-py3-none-any.whl.metadata (14 kB)
Collecting fire (from medmnist)
  Downloading fire-0.7.0.tar.gz (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading medmnist-3.0.2-py3-none-any.whl (25 kB)
Building wheels for collected packages: fire
  Building wheel for fire (setup.py) ... [?25l[?25hdone
  Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=26f4cfcf76c992e72f6b63c9a90e457359e7b9123ff0548f629fb4ca66dc7fe2
  Stored in directory: /root/.cache/pip/wheels/19/39/2f/2d3cadc408a8804103f1c34ddd4b9f6a93497b11fa96fe738e
Successfully built fire
Installing collected packages: fire, medmnist
Successfully installed fire-0.7.0 medmnist-3.0.2


In [11]:
import  medmnist 
from medmnist import BreastMNIST
import numpy as np
import torch
import matplotlib.pyplot as plt

In [16]:
dataset = BreastMNIST(split='train', download=True)

Using downloaded and verified file: /root/.medmnist/breastmnist.npz


In [17]:
print(dataset)

Dataset BreastMNIST of size 28 (breastmnist)
    Number of datapoints: 546
    Root location: /root/.medmnist
    Split: train
    Task: binary-class
    Number of channels: 1
    Meaning of labels: {'0': 'malignant', '1': 'normal, benign'}
    Number of samples: {'train': 546, 'val': 78, 'test': 156}
    Description: The BreastMNIST is based on a dataset of 780 breast ultrasound images. It is categorized into 3 classes: normal, benign, and malignant. As we use low-resolution images, we simplify the task into binary classification by combining normal and benign as positive and classifying them against malignant as negative. We split the source dataset with a ratio of 7:1:2 into training, validation and test set. The source images of 1×500×500 are resized into 1×28×28.
    License: CC BY 4.0


In [23]:
import numpy as np
import torch
import cv2
import torch.nn as nn
from transformers import ViTModel, ViTConfig
from torchvision import transforms
from torch.optim import Adam
from torch.utils.data import DataLoader
from tqdm import tqdm

#Pretrained model checkpoint
model_checkpoint = 'google/vit-base-patch16-224-in21k'
     

class ImageDataset(torch.utils.data.Dataset):

  def __init__(self, input_data):
        
      self.input_data = input_data
      # Transform input data
      self.transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((224, 224), antialias=True),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], 
                             std=[0.5, 0.5, 0.5])
        ])

  def __len__(self):
      return len(self.input_data)
    
  def get_images(self, idx):
      return self.transform(self.input_data[idx]['image'])
  
  def get_labels(self, idx):
      return self.input_data[idx]['label']
  
  def __getitem__(self, idx):
      # Get input data in a batch
      train_images = self.get_images(idx)
      train_labels = self.get_labels(idx)

      return train_images, train_labels
     

class ViT(nn.Module):

  def __init__(self, config=ViTConfig(), num_labels=20, 
               model_checkpoint='google/vit-base-patch16-224-in21k'):

        super(ViT, self).__init__()

        self.vit = ViTModel.from_pretrained(model_checkpoint, add_pooling_layer=False)
        self.classifier = (
            nn.Linear(config.hidden_size, num_labels) 
        )

  def forward(self, x):

    x = self.vit(x)['last_hidden_state']
    # Use the embedding of [CLS] token
    output = self.classifier(x[:, 0, :])

    return output

In [24]:
def model_train(dataset, epochs, learning_rate, bs):

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # Load nodel, loss function, and optimizer
    model = ViT().to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    optimizer = Adam(model.parameters(), lr=learning_rate)

    # Load batch image
    train_dataset = ImageDataset(dataset)
    train_dataloader = DataLoader(train_dataset, num_workers=1, batch_size=bs, shuffle=True)

    # Fine tuning loop
    for i in range(epochs):
        total_acc_train = 0
        total_loss_train = 0.0

        for train_image, train_label in tqdm(train_dataloader):
            output = model(train_image.to(device))
            loss = criterion(output, train_label.to(device))
            acc = (output.argmax(dim=1) == train_label.to(device)).sum().item()
            total_acc_train += acc
            total_loss_train += loss.item()

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        print(f'Epochs: {i + 1} | Loss: {total_loss_train / len(train_dataset): .3f} | Accuracy: {total_acc_train / len(train_dataset): .3f}')

    return model

# Hyperparameters
EPOCHS = 10
LEARNING_RATE = 1e-4
BATCH_SIZE = 8

# Train the model
trained_model = model_train(dataset['train'], EPOCHS, LEARNING_RATE, BATCH_SIZE)

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [30]:
import numpy as np
import torch
import torch.nn as nn
from transformers import ViTModel
from torchvision import transforms
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import medmnist
from medmnist import BreastMNIST

# Pretrained model checkpoint
MODEL_CHECKPOINT = 'google/vit-base-patch16-224-in21k'

# Define constants
NUM_CLASSES = 2  # BreastMNIST is a binary classification task
BATCH_SIZE = 64
EPOCHS = 10

class BreastMNISTDataset(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),  # Resize for ViT
            transforms.Grayscale(3),  # Convert grayscale to 3-channel RGB
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
        ])
    
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        img, label = self.dataset[idx]
        img = self.transform(img)  # Apply transformations
        label = torch.tensor(label, dtype=torch.long).squeeze()  # Ensure label shape
        return img, label


# Load BreastMNIST dataset
train_data = BreastMNIST(split='train', download=True)
test_data = BreastMNIST(split='test', download=True)

# Create dataset instances
train_dataset = BreastMNISTDataset(train_data)
test_dataset = BreastMNISTDataset(test_data)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Define ViT model for classification
class ViTClassifier(nn.Module):
    def __init__(self, num_classes=NUM_CLASSES):
        super(ViTClassifier, self).__init__()
        self.vit = ViTModel.from_pretrained(MODEL_CHECKPOINT, add_pooling_layer=False)
        self.classifier = nn.Linear(self.vit.config.hidden_size, num_classes)

    def forward(self, x):
        x = self.vit(x)['last_hidden_state']
        output = self.classifier(x[:, 0, :])  # Use CLS token embedding
        return output

# Initialize model, optimizer, and loss function
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ViTClassifier(NUM_CLASSES).to(device)
optimizer = Adam(model.parameters(), lr=2e-5)
criterion = nn.CrossEntropyLoss()

# Training function
def train(model, dataloader, optimizer, criterion, epochs=EPOCHS):
    model.train()
    for epoch in range(epochs):
        loop = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")
        total_loss = 0
        for images, labels in loop:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            loop.set_postfix(loss=total_loss / len(dataloader))
    print("Training complete.")

# Train the model
train(model, train_loader, optimizer, criterion, epochs=EPOCHS)

# Evaluate model
def evaluate(model, dataloader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            predictions = torch.argmax(outputs, dim=1)
            correct += (predictions == labels).sum().item()
            total += labels.size(0)
    print(f"Test Accuracy: {100 * correct / total:.2f}%")

# Evaluate on test set
evaluate(model, test_loader)


Using downloaded and verified file: /root/.medmnist/breastmnist.npz
Using downloaded and verified file: /root/.medmnist/breastmnist.npz


Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTModel: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Epoch 1/10: 100%|██████████| 9/9 [00:10<00:00,  1.18s/it, loss=0.597] 
Epoch 2/10: 100%|██████████| 9/9 [00:10<00:00,  1.18s/it, loss=0.508] 
Epoch 3/10: 100%|██████████| 9/9 [00:10<00:00,  1.18s/it, loss=0.454] 
Epoch 4/10: 100%|██████████| 9/9 [00:10<00:00,  1.18s/it, loss=0.375] 
Epoch 5/10: 100%|██████████| 9/9 [00:10<00:00,  1.18s/it, loss=0.312] 
Epoch 6/10: 100%|██████████| 9/9 

Training complete.
Test Accuracy: 85.26%


In [38]:
import numpy as np
import torch
import torch.nn as nn
from transformers import ViTModel, ViTConfig
from torchvision import transforms
from torch.optim import AdamW
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import medmnist
from medmnist import BreastMNIST

# Define constants
NUM_CLASSES = 2  # BreastMNIST is a binary classification task
BATCH_SIZE = 32
EPOCHS = 10

# Define dataset class
class BreastMNISTDataset(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),  # Resize for ViT
            transforms.Grayscale(3),  # Convert grayscale to 3-channel RGB
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
        ])
    
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        img, label = self.dataset[idx]
        img = self.transform(img)  # Apply transformations
        label = torch.tensor(label, dtype=torch.long).squeeze()  # Ensure label shape
        return img, label

# Load BreastMNIST dataset
train_data = BreastMNIST(split='train', download=True)
test_data = BreastMNIST(split='test', download=True)

# Create dataset instances
train_dataset = BreastMNISTDataset(train_data)
test_dataset = BreastMNISTDataset(test_data)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Define ViT model for classification from scratch (no pre-trained weights)
class ViTClassifier(nn.Module):
    def __init__(self, num_classes=NUM_CLASSES):
        super(ViTClassifier, self).__init__()
        # Use a custom configuration for the model (e.g., 224x224 image input size)
        config = ViTConfig(
            hidden_size=768,
            num_attention_heads=12,
            num_hidden_layers=12,
            intermediate_size=3072,
            image_size=224,
            patch_size=16,
            num_channels=3,  # RGB
            num_labels=num_classes  # Set for binary classification
        )
        self.vit = ViTModel(config)  # Initialize without pre-trained weights
        self.classifier = nn.Linear(config.hidden_size, num_classes)

    def forward(self, x):
        x = self.vit(x)['last_hidden_state']
        output = self.classifier(x[:, 0, :])  # Use CLS token embedding
        return output

# Initialize model, optimizer, and loss function
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ViTClassifier(NUM_CLASSES).to(device)
optimizer = AdamW(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

# Training function
def train(model, dataloader, optimizer, criterion, epochs=EPOCHS):
    model.train()
    for epoch in range(epochs):
        loop = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")
        total_loss = 0
        for images, labels in loop:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            loop.set_postfix(loss=total_loss / len(dataloader))
    print("Training complete.")

# Train the model
train(model, train_loader, optimizer, criterion, epochs=EPOCHS)

# Evaluate model
def evaluate(model, dataloader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            predictions = torch.argmax(outputs, dim=1)
            correct += (predictions == labels).sum().item()
            total += labels.size(0)
    print(f"Test Accuracy: {100 * correct / total:.2f}%")

# Evaluate on test set
evaluate(model, test_loader)


Using downloaded and verified file: /root/.medmnist/breastmnist.npz
Using downloaded and verified file: /root/.medmnist/breastmnist.npz


Epoch 1/10: 100%|██████████| 18/18 [00:10<00:00,  1.67it/s, loss=1.1]  
Epoch 2/10: 100%|██████████| 18/18 [00:10<00:00,  1.67it/s, loss=0.642]
Epoch 3/10: 100%|██████████| 18/18 [00:10<00:00,  1.67it/s, loss=0.572]
Epoch 4/10: 100%|██████████| 18/18 [00:10<00:00,  1.66it/s, loss=0.586]
Epoch 5/10: 100%|██████████| 18/18 [00:10<00:00,  1.66it/s, loss=0.605]
Epoch 6/10: 100%|██████████| 18/18 [00:10<00:00,  1.65it/s, loss=0.56] 
Epoch 7/10: 100%|██████████| 18/18 [00:10<00:00,  1.65it/s, loss=0.589]
Epoch 8/10: 100%|██████████| 18/18 [00:10<00:00,  1.66it/s, loss=0.585]
Epoch 9/10: 100%|██████████| 18/18 [00:10<00:00,  1.66it/s, loss=0.546]
Epoch 10/10: 100%|██████████| 18/18 [00:10<00:00,  1.65it/s, loss=0.55] 


Training complete.
Test Accuracy: 73.08%
