In [1]:
!pip install medmnist
!pip install wandb
!pip install transformers
!pip install --upgrade transformers

!pip install pillow
!pip install evaluate
!pip install torch



In [2]:
!wandb login


[34m[1mwandb[0m: Currently logged in as: [33mmlabrie0208[0m ([33mmlabrie0208-polytechnique-montr-al[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
import numpy as np
import torch
print(torch.cuda.is_available())
from torch.utils.data import Dataset, random_split, DataLoader
from transformers import (
    ViTHybridImageProcessor,
    ViTHybridForImageClassification,
    TrainingArguments,
    Trainer,
)
from medmnist import INFO
from medmnist.dataset import MedMNIST
import medmnist.dataset as mds
from PIL import Image
import evaluate
from medmnist import PathMNIST, DermaMNIST, BloodMNIST, RetinaMNIST
from data_augmentation import *
from evaluate import load



True


# Dataset

In [4]:
class CustomPathMNIST(PathMNIST):
  def __init__(self, *args, **kwargs):
    super().__init__(*args, **kwargs)
    #self.resize= Resize(size=224, method=ResizeMethod.Squish)


  def __getitem__(self, idx):
    image, label = super().__getitem__(idx)
    #resized_image = self.resize(image)
    item = {'pixel_values' : image, 'labels' : label}
    return item

## Loading Dataset

In [5]:
# Download and load datasets
print("Downloading datasets...")
path_dataset = PathMNIST(split="train", download=True, as_rgb=True)
derma_dataset = DermaMNIST(split="train", download=True, as_rgb=True)
blood_dataset = BloodMNIST(split="train", download=True, as_rgb=True)
retina_dataset = RetinaMNIST(split="train", download=True, as_rgb=True)

val_path_dataset = PathMNIST(split="val", download=True)
val_derma_dataset = DermaMNIST(split="val", download=True)
val_blood_dataset = BloodMNIST(split="val", download=True)
val_retina_dataset = RetinaMNIST(split="val", download=True)

test_path_dataset = PathMNIST(split="test", download=True)
test_derma_dataset = DermaMNIST(split="test", download=True)
test_blood_dataset = BloodMNIST(split="test", download=True)
test_retina_dataset = RetinaMNIST(split="test", download=True)

# Create a unified label set across the datasets
pathmnist_info = INFO["pathmnist"]
dermamnist_info = INFO["dermamnist"]
bloodmnist_info = INFO["bloodmnist"]
retinamnist_info = INFO["retinamnist"]

labels = set(pathmnist_info["label"].values())
labels.update(dermamnist_info["label"].values())
labels.update(bloodmnist_info["label"].values())
labels.update(retinamnist_info["label"].values())
labels = sorted(list(labels))
num_labels = len(labels)
print("Number of unique labels:", num_labels)


Downloading datasets...
Number of unique labels: 29


In [6]:
import torchvision.transforms as T
class CustomMNIST(Dataset):
  def __init__(self, dataset, *args, **kwargs):

    super(CustomMNIST, self).__init__()

    self.data_transform = transforms.Compose([
      transforms.Resize((384, 384)),
      transforms.ToTensor(),
      transforms.Normalize(mean=[.5], std=[.5])
      ])

    self.dataset = dataset
    self.transform = T.ToPILImage()

  def __len__(self):
    return len(self.dataset)


  def __getitem__(self, idx):
    image, label = self.dataset[idx]

    if isinstance(image, torch.Tensor):
          image = self.transform(image)

    image = self.data_transform(image)

    item = {'pixel_values' : image, 'labels' : label}
    return item

In [7]:
train_subset = 1080
concat_dataset = ConcatDataset(path_dataset, derma_dataset, blood_dataset, retina_dataset, train_subset)
augmented_dataset = DatasetAugmentation(concat_dataset)
train_dataset = CustomMNIST(augmented_dataset)

validation_subset = 120
concat_val_dataset = ConcatDataset(val_path_dataset, val_derma_dataset, val_blood_dataset, val_retina_dataset, validation_subset)
val_dataset = CustomMNIST(concat_val_dataset)

test_subset = 400
concat_test_dataset = ConcatDataset(test_path_dataset, test_derma_dataset, test_blood_dataset, test_retina_dataset, test_subset)
test_dataset = CustomMNIST(concat_test_dataset)

Concatenating datasets
Concatenating datasets
Concatenating datasets


In [8]:
print(len(train_dataset))
print(len(val_dataset))
print(len(test_dataset))

34560
480
1600


# Training

In [9]:
feature_extractor = ViTHybridImageProcessor.from_pretrained('google/vit-hybrid-base-bit-384')
baseline_model = ViTHybridForImageClassification.from_pretrained('google/vit-hybrid-base-bit-384')

baseline_model.config.num_labels = num_labels


model = ViTHybridForImageClassification.from_pretrained('google/vit-hybrid-base-bit-384')
model.config.num_labels = num_labels

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [10]:
# Adjusted training arguments to use a smaller batch size and gradient accumulation.
training_args = TrainingArguments(
    output_dir="./vit-hybrid-medmnist",
    per_device_train_batch_size=32,
    per_device_eval_batch_size=128,
    gradient_accumulation_steps=4,
    num_train_epochs=10,
    save_strategy="epoch",
    logging_dir='./logs',
    learning_rate=5e-5,
    report_to=["wandb"],
    run_name="vit-hybrid-medmnist-2e",
)

# Load the accuracy metric using the evaluate library.
accuracy_metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy_metric.compute(predictions=predictions, references=labels)


In [11]:
torch.cuda.empty_cache()

baseline_trainer = Trainer(
    model=baseline_model,
    args=training_args,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

# Evaluate the baseline model on the validation set.
baseline_metrics = baseline_trainer.evaluate()
print("Baseline (pre-fine-tuning) metrics:", baseline_metrics)


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mmlabrie0208[0m ([33mmlabrie0208-polytechnique-montr-al[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Baseline (pre-fine-tuning) metrics: {'eval_loss': 10.060395240783691, 'eval_model_preparation_time': 0.006, 'eval_accuracy': 0.0, 'eval_runtime': 12.1852, 'eval_samples_per_second': 39.392, 'eval_steps_per_second': 0.328}


In [12]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"


# Create a Trainer for fine-tuning our model.
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

torch.cuda.empty_cache()

# Start the fine-tuning process.
train_results = trainer.train()

save_directory = "./vit-hybrid-medmnist-finetuned"
trainer.save_model(save_directory)
print(f"Model saved at {save_directory}")

Step,Training Loss
500,0.6797
1000,0.1368
1500,0.0273
2000,0.0067
2500,0.0007


Model saved at ./vit-hybrid-medmnist-finetuned


In [13]:
# After training, evaluate the fine-tuned model on the validation set.
finetuned_metrics = trainer.evaluate()
print("Fine-tuned metrics:", finetuned_metrics)


Fine-tuned metrics: {'eval_loss': 1.3628571033477783, 'eval_accuracy': 0.7916666666666666, 'eval_runtime': 11.9169, 'eval_samples_per_second': 40.279, 'eval_steps_per_second': 0.336, 'epoch': 10.0}


In [14]:
save_directory = "./vit-hybrid-medmnist-finetuned"
trainer.save_model(save_directory)
print(f"Model saved at {save_directory}")

trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()


Model saved at ./vit-hybrid-medmnist-finetuned
***** train metrics *****
  epoch                    =          10.0
  total_flos               = 84533489071GF
  train_loss               =        0.1577
  train_runtime            =    5:58:57.83
  train_samples_per_second =        16.046
  train_steps_per_second   =         0.125


In [15]:
metrics = trainer.evaluate(val_dataset)
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)


***** eval metrics *****
  epoch                   =       10.0
  eval_accuracy           =     0.7917
  eval_loss               =     1.3629
  eval_runtime            = 0:00:11.77
  eval_samples_per_second =      40.77
  eval_steps_per_second   =       0.34


In [16]:
def TopKAccuracy(test_dataset, k):
  correct_predictions = 0

  for idx in range(len(test_dataset)):
    image = test_dataset[idx]['pixel_values'].to(model.device)
    outputs = model(image.unsqueeze(0))
    logits = outputs.logits

    values, indices = torch.topk(logits, k)
    indices = indices.tolist()

    if test_dataset[idx]['labels'].item() in indices[0]:
      correct_predictions += 1

  print(f"Fine-tuning Top-{k} Accuracy on test dataset: {(correct_predictions / len(test_dataset))*100}%")


TopKAccuracy(test_dataset, 1)
TopKAccuracy(test_dataset, 5)

Fine-tuning Top-1 Accuracy on test dataset: 76.6875%
Fine-tuning Top-5 Accuracy on test dataset: 96.1875%


In [17]:
def predict(idx):
  image = test_dataset[idx]['pixel_values'].to(model.device)

  outputs = model(image.unsqueeze(0))
  logits = outputs.logits

  predicted_class_idx = logits.argmax(-1).item()
  real_class_idx = test_dataset[idx]['labels'].item()
  print("Predicted class:", model.config.id2label[f'{predicted_class_idx}'])
  print("Real Class: ", model.config.id2label[f'{real_class_idx}'])

In [18]:
model = ViTHybridForImageClassification.from_pretrained(save_directory)

correct_predictions = 0

for idx in range(len(test_dataset)):
  image = test_dataset[idx]['pixel_values'].to(model.device)
  outputs = model(image.unsqueeze(0))
  logits = outputs.logits

  predicted_class_idx = logits.argmax(-1).item()

  if predicted_class_idx == test_dataset[idx]['labels'].item():
    correct_predictions += 1

print(f"Fine-tuning accuracy on test dataset: {(correct_predictions / len(test_dataset))*100}%")

RuntimeError: Error(s) in loading state_dict for Linear:
	size mismatch for bias: copying a param with shape torch.Size([1000]) from checkpoint, the shape in current model is torch.Size([29]).