<a href="https://colab.research.google.com/github/NighatShaheen/Leaf-angle-classification/blob/main/Copy_of_Leaf_Angle_Classification_VIT_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [14]:
from pathlib import Path

root = Path("/content/drive/MyDrive/Colab Notebooks")
[x.name for x in root.iterdir()][:50]


['leafclassification11.ipynb',
 'Copy of vision_transformer_leaf_angle.ipynb',
 'Copy of Vision Transformer (ViT) on CIFAR-10 from scratch using PyTorch.ipynb',
 'Leaf_Angle_Classidifcation_VIT.ipynb',
 'Untitled8.ipynb',
 'Untitled1.ipynb',
 'Vision Transformer (ViT) on CIFAR-10 from scratch using PyTorch.ipynb',
 'Leaf_Angle_Classification_VIT_Model.ipynb',
 'Copy_of_Leaf_Angle_Classification_VIT_Model.ipynb']

In [17]:
import json
from pathlib import Path

path = Path("/content/drive/MyDrive/Colab_Notebooks/Copy_of_Leaf_Angle_Classification_VIT_Model.ipynb")

nb = json.loads(path.read_text(encoding="utf-8"))

# Remove broken widgets metadata
nb.get("metadata", {}).pop("widgets", None)

# Remove per-cell widget metadata too (just in case)
for cell in nb.get("cells", []):
    cell.get("metadata", {}).pop("widgets", None)

fixed_path = path.with_name(path.stem + "_fixed.ipynb")
fixed_path.write_text(json.dumps(nb, ensure_ascii=False, indent=1), encoding="utf-8")

print("Fixed notebook saved to:", fixed_path)


Fixed notebook saved to: /content/drive/MyDrive/Colab_Notebooks/Copy_of_Leaf_Angle_Classification_VIT_Model_fixed.ipynb


In [None]:
import os
import torch
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from transformers import ViTImageProcessor, ViTForImageClassification
from torch.optim import AdamW
from google.colab import drive
drive.mount('/content/drive')

# Define output_folder here to ensure it's in scope
output_folder = '/content/drive/MyDrive/Final_Pisek_dataset'

# -----------------------------
# Config
# -----------------------------
MODEL_ID = "google/vit-base-patch16-224"  # ViT-B/16 (Corrected from patch32)
# TRAIN_DIR and VAL_DIR are not needed as we will use the existing output_folder
# and split it.
NUM_CLASSES = 5                           # <-- set this
BATCH_SIZE = 16
EPOCHS = 5
LR = 1e-3                                 # higher LR is ok when training only head

device = "cuda" if torch.cuda.is_available() else "cpu"

# -----------------------------
# Model + Processor
# -----------------------------
processor = ViTImageProcessor.from_pretrained(MODEL_ID)
model = ViTForImageClassification.from_pretrained(
    MODEL_ID,
    num_labels=NUM_CLASSES,
    ignore_mismatched_sizes=True
).to(device)

# Optional: set class labels (nice for inference)
# If you use ImageFolder, class order is alphabetical by folder name.
# We'll set these after creating the dataset.

# -----------------------------
# Freeze backbone (train head only)
# -----------------------------
for p in model.vit.parameters():
    p.requires_grad = False

# Ensure classifier is trainable (it will be, but explicit is nice)
for p in model.classifier.parameters():
    p.requires_grad = True

# -----------------------------
# Data preparation using existing output_folder
# -----------------------------
# Define a raw ImageFolder dataset that yields PIL images (no ToTensor here)

# Explicitly check if the directory exists
if not os.path.isdir(output_folder):
    raise FileNotFoundError(
        f"Directory not found: {output_folder}. "
        "Please ensure Google Drive is mounted and the path is correct."
        "You may need to re-run the `drive.mount('/content/drive')` cell if it was not executed recently."
    )

full_raw_dataset = ImageFolder(root=output_folder)

# Split the full dataset into training and validation subsets
train_size = int(0.8 * len(full_raw_dataset))
val_size = len(full_raw_dataset) - train_size
hf_train_subset, hf_val_subset = torch.utils.data.random_split(full_raw_dataset, [train_size, val_size])

# Custom Dataset class to apply the HuggingFace processor's transform
class HFTransformedDataset(torch.utils.data.Dataset):
    def __init__(self, subset, processor_transform):
        self.subset = subset
        self.processor_transform = processor_transform

    def __getitem__(self, idx):
        # subset[idx] returns (PIL_image, label) because full_raw_dataset has no ToTensor
        image, label = self.subset[idx]
        # Apply the HuggingFace processor transform to the PIL image
        processed_pixel_values = self.processor_transform(images=image.convert("RGB"), return_tensors="pt")["pixel_values"][0]
        return processed_pixel_values, label

    def __len__(self):
        return len(self.subset)

train_ds = HFTransformedDataset(hf_train_subset, processor_transform=processor)
val_ds   = HFTransformedDataset(hf_val_subset,   processor_transform=processor)

# Set id2label / label2id from the full raw ImageFolder classes
id2label = {i: c for i, c in enumerate(full_raw_dataset.classes)}
label2id = {c: i for c, i in id2label.items()}
model.config.id2label = id2label
model.config.label2id = label2id

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

# -----------------------------
# Optimizer (ONLY head params)s
# -----------------------------
optimizer = AdamW(model.classifier.parameters(), lr=LR)

# -----------------------------
# Train / Eval
# -----------------------------
def accuracy_on_loader(loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            logits = model(pixel_values=x).logits
            preds = logits.argmax(dim=-1)
            correct += (preds == y).sum().item()
            total += y.numel()
    return correct / max(total, 1)

for epoch in range(1, EPOCHS + 1):
    model.train()
    total_loss = 0.0

    for x, y in train_loader:
        x, y = x.to(device), y.to(device)

        outputs = model(pixel_values=x, labels=y)
        loss = outputs.loss

        optimizer.zero_grad(set_to_none=True)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    train_acc = accuracy_on_loader(train_loader)
    val_acc = accuracy_on_loader(val_loader)

    print(f"Epoch {epoch}/{EPOCHS} | loss={total_loss/len(train_loader):.4f} | train_acc={train_acc:.4f} | val_acc={val_acc:.4f}")

# -----------------------------
# Save
# -----------------------------
os.makedirs("vitb32_head_finetuned", exist_ok=True)
model.save_pretrained("vitb32_head_finetuned")
processor.save_pretrained("vitb32_head_finetuned")
print("Saved to vitb32_head_finetuned/")



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([5]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([5, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


KeyboardInterrupt: 