In [None]:
!pip install transformers datasets evaluate accelerate

In [2]:
import torch
from transformers import ViTForImageClassification, ViTImageProcessor, TrainingArguments, Trainer
from datasets import load_dataset
import evaluate
from matplotlib import pyplot as plt

In [3]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("masoudnickparvar/brain-tumor-mri-dataset")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'brain-tumor-mri-dataset' dataset.
Path to dataset files: /kaggle/input/brain-tumor-mri-dataset


In [None]:
dataset = load_dataset("imagefolder", data_dir="/kaggle/input/brain-tumor-mri-dataset/Training")

In [5]:
dataset["train"][0]

{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=512x512>,
 'label': 0}

In [6]:
split_dataset = dataset["train"].train_test_split(test_size=0.1)

dataset["train"] = split_dataset["train"]
dataset["validation"] = split_dataset["test"]

In [7]:
processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")

Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

In [None]:
from PIL import Image
import torch

def transform(example):

    image = example["image"]
    for i in range(len(image)):
      if image[i].mode != "RGB":
         image[i] = image[i].convert("RGB")


    inputs = processor(images=image, return_tensors="pt")


    inputs = {k: v.squeeze(0) for k, v in inputs.items()}
    inputs["labels"] = example["label"]
    return inputs

In [9]:
prepared_ds = dataset.with_transform(transform)

In [10]:
num_labels = dataset["train"].features["label"].num_classes
model = ViTForImageClassification.from_pretrained(
    "google/vit-base-patch16-224-in21k",
    num_labels=num_labels
)
print(num_labels)

config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


4


In [None]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(-1)
    return metric.compute(predictions=preds, references=labels)

In [None]:
training_args = TrainingArguments(
    output_dir="./vit-finetuned",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    eval_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=10,
    learning_rate=5e-5,
    logging_dir="./logs",
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    push_to_hub=False,
    report_to=[],
    remove_unused_columns=False
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=prepared_ds["train"],
    eval_dataset=prepared_ds["validation"],
    compute_metrics=compute_metrics,
)

In [14]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,0.1209,0.10925,0.97028
2,0.0543,0.055983,0.987762
3,0.0196,0.112927,0.975524
4,0.005,0.061312,0.984266
5,0.0036,0.026231,0.994755
6,0.0027,0.034605,0.993007
7,0.0022,0.03149,0.994755
8,0.0019,0.032379,0.994755
9,0.0017,0.032577,0.994755
10,0.0016,0.032726,0.994755


TrainOutput(global_step=3220, training_loss=0.04207233460215123, metrics={'train_runtime': 2207.4377, 'train_samples_per_second': 23.285, 'train_steps_per_second': 1.459, 'total_flos': 3.9831596645105664e+18, 'train_loss': 0.04207233460215123, 'epoch': 10.0})

In [None]:
test_ds=load_dataset("imagefolder", data_dir="/kaggle/input/brain-tumor-mri-dataset/Testing")
prepared_test_ds = test_ds.with_transform(transform)
prepared_test_ds

In [None]:
plt.figure(figsize=(10,10))

for i in range(4):
  image=test_ds["train"][i]["image"]
  image=image.convert("RGB")
  plt.subplot(2,2,i+1)
  plt.imshow(test_ds["train"][i]["image"])



In [None]:
result=trainer.predict(test_dataset=prepared_test_ds["train"])

In [None]:
print(result)

In [19]:
# so test accuracy is (0.9947) !!!