In [1]:
import os
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from transformers import ViTForImageClassification, ViTFeatureExtractor, TrainingArguments, Trainer
from PIL import Image
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# -------- Step 1: Load Dataset --------
class BreastCancerDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []

        # Read all images and assign labels based on filenames
        for img_name in os.listdir(root_dir):
            if img_name.endswith(".png"):
                self.image_paths.append(os.path.join(root_dir, img_name))
                self.labels.append(0 if "SOB_B" in img_name else 1)  # 0=Benign, 1=Malignant

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return {"pixel_values": image, "labels": torch.tensor(label)}

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Load datasets
train_dataset = BreastCancerDataset(r"D:\01 STUDY MATERIAL\ai project\mkfold\combined fold 1\train_400x", transform=transform)
test_dataset = BreastCancerDataset(r"D:\01 STUDY MATERIAL\ai project\mkfold\combined fold 1\test_400x", transform=transform)

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Load ViT feature extractor
feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")

# -------- Step 2: Define ViT Model --------
model = ViTForImageClassification.from_pretrained(
    "google/vit-base-patch16-224-in21k",
    num_labels=2,  # Binary classification
    id2label={0: "Benign", 1: "Malignant"},
    label2id={"Benign": 0, "Malignant": 1},
)
model.to(device)

# -------- Step 3: Define Evaluation Metrics --------
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    
    accuracy = accuracy_score(labels, predictions)
    precision = precision_score(labels, predictions)
    recall = recall_score(labels, predictions)
    f1 = f1_score(labels, predictions)
    
    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

# -------- Step 4: Define Training Arguments --------
training_args = TrainingArguments(
    output_dir="./vit_cancer_detection",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,
    save_total_limit=2,
    logging_dir="./logs",
    report_to="none",
    load_best_model_at_end=True,
    push_to_hub=False,
    fp16=True,  # Mixed precision training
)

# -------- Step 5: Train Model --------
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=feature_extractor,
    compute_metrics=compute_metrics,
)

trainer.train()

# -------- Step 6: Save Model --------
trainer.save_model("vit_binary_model_400x")
print("Model saved successfully!")

# -------- Step 7: Load Best Model and Evaluate --------
model = ViTForImageClassification.from_pretrained("vit_binary_model_400X")
model.to(device)

trainer.model = model
metrics = trainer.evaluate()
print("Final Evaluation Metrics:", metrics)

# -------- Step 8: Make Predictions on Random Image --------
import random
import os

def predict_random_image(test_dir):
    random_class = random.choice(['benign', 'malignant'])
    random_image = random.choice(os.listdir(os.path.join(test_dir, random_class)))
    image_path = os.path.join(test_dir, random_class, random_image)
    image = Image.open(image_path).convert("RGB")
    inputs = feature_extractor(image, return_tensors="pt").to(device)

    with torch.no_grad():
        logits = model(**inputs).logits

    predicted_class = torch.argmax(logits, dim=-1).item()
    return "Benign" if predicted_class == 0 else "Malignant"

# Example prediction
test_dir = 'D:/01 STUDY MATERIAL/ai project/Breast-Splitted/test'
print("Prediction for random test image:", predict_random_image(test_dir))


  from .autonotebook import tqdm as notebook_tqdm


Using device: cuda


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.356116,0.876336,0.90799,0.897129,0.902527
2,No log,0.437544,0.871756,0.907317,0.889952,0.898551
3,No log,0.644361,0.836641,0.830149,0.935407,0.87964
4,No log,0.515774,0.883969,0.919118,0.897129,0.90799
5,No log,0.523825,0.883969,0.911058,0.906699,0.908873


Model saved successfully!


Final Evaluation Metrics: {'eval_loss': 0.3560798466205597, 'eval_accuracy': 0.8763358778625954, 'eval_precision': 0.9079903147699758, 'eval_recall': 0.8971291866028708, 'eval_f1': 0.9025270758122743, 'eval_runtime': 19.2367, 'eval_samples_per_second': 34.049, 'eval_steps_per_second': 2.131, 'epoch': 5.0}
Prediction for random test image: Benign
