# TumorVision





The script below uses CLIP, fine-tuned on a simple brain tumor MRI dataset available [here](https://huggingface.co/datasets/tanzuhuggingface/brainmri), for the task of brain tumor detection. CLIP uses both images and text, mapping them into a shared embedding space. This space is learned using contrastive learning, so that semantically related image-text pairs are close together, while unrelated pairs are far apart. The approach is inspired by the notebook presented in class.

### Install the necessary libraries


In [None]:

!pip install --upgrade transformers
!pip install accelerate
!pip install datasets torch pillow matplotlib scikit-learn bitsandbytes

import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score

from datasets import load_dataset
from transformers import CLIPModel, CLIPProcessor


### Log into hugging face

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
import torch, platform
print('Torch:', torch.__version__, '| CUDA:', torch.cuda.is_available(), '| GPUs:', torch.cuda.device_count())
if torch.cuda.is_available():
    print('GPU:', torch.cuda.get_device_name(0))
print('Python:', platform.python_version())

### Import dataset

In [None]:
ds = load_dataset("tanzuhuggingface/brainmri")

In [None]:
ds

### Split dataset to train and test

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
X = list(ds['train']['image'])
y = list(ds['train']['label'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Import necessary libraries and model

In [None]:

BATCH_SIZE = 16
EPOCHS = 2
LR = 1e-5
MODEL_ID = "openai/clip-vit-base-patch32"

device = "cuda" if torch.cuda.is_available() else "cpu"

clip_model = CLIPModel.from_pretrained(MODEL_ID).to(device)
clip_processor = CLIPProcessor.from_pretrained(MODEL_ID)
optimizer = torch.optim.AdamW(clip_model.parameters(), lr=LR)
loss_fn = torch.nn.CrossEntropyLoss()

### Add text prompts

In [None]:
text_prompts = [
    "a brain MRI with no tumor",
    "a brain MRI showing a tumor"
]

### Load dataset via dataloader

In [None]:
class BrainMRIDataset(Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {
            "image": self.images[idx],
            "label": self.labels[idx]
        }

def collate_fn(batch):
    images = [item["image"] for item in batch]
    labels = torch.tensor([item["label"] for item in batch], dtype=torch.long)
    return {"images": images, "labels": labels}

train_dataset = BrainMRIDataset(X_train, y_train)
test_dataset  = BrainMRIDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE,
                          shuffle=True, collate_fn=collate_fn)
test_loader  = DataLoader(test_dataset, batch_size=BATCH_SIZE,
                          shuffle=False, collate_fn=collate_fn)

### Lets look at some images from the dataset

In [None]:
num_images =2
plt.figure(figsize=(16, 14))

for i in range(num_images):
  image = X_train[i]
  label = y_train[i]
  text = text_prompts[label]

  plt.subplot(1, num_images, i + 1)
  plt.imshow(image)
  plt.title(text, fontsize= 14)
  plt.axis('off')
  plt.tight_layout()

plt.show()

### Training and evaluation loop

In [None]:
val_auc_history = []

for epoch in range(EPOCHS):
    clip_model.train()
    train_loss = 0

    for batch in train_loader:
        images = batch["images"]
        labels = batch["labels"].to(device)

        # Process batch with CLIP processor
        inputs = clip_processor(
            text=text_prompts,
            images=images,
            return_tensors="pt",
            padding=True
        ).to(device)

        optimizer.zero_grad()
        outputs = clip_model(**inputs)

        logits = outputs.logits_per_image  # (batch, 2)
        loss = loss_fn(logits, labels)

        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    train_loss /= len(train_loader)

    # validation
    clip_model.eval()
    val_loss = 0
    all_preds = []
    all_labels = []
    all_probs = []

    with torch.no_grad():
        for batch in test_loader:
            images = batch["images"]
            labels = batch["labels"].to(device)

            inputs = clip_processor(
                text=text_prompts,
                images=images,
                return_tensors="pt",
                padding=True
            ).to(device)

            outputs = clip_model(**inputs)
            logits = outputs.logits_per_image
            loss = loss_fn(logits, labels)

            val_loss += loss.item()

            preds = torch.argmax(logits, dim=1)
            probs = torch.softmax(logits, dim=1)[:, 1]  # probability of tumor

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())

    val_loss /= len(test_loader)
    val_acc = accuracy_score(all_labels, all_preds)
    val_auc = roc_auc_score(all_labels, all_probs)
    val_auc_history.append(val_auc)

    print(f"Epoch {epoch+1}/{EPOCHS} "
          f"- Train Loss: {train_loss} "
          f"- Val Loss: {val_loss} "
          f"- Val Acc: {val_acc} "
          f"- Val AUC: {val_auc}")

#  Plot Validation AUC

plt.plot(val_auc_history)
plt.title("Validation AUC over Epochs")
plt.xlabel("Epoch")
plt.ylabel("AUC")
plt.grid(True)
plt.show()