<a href="https://colab.research.google.com/github/Afra17/BootCamp_Sdaia_DL_W4/blob/main/Project_Wee4_SDAIA_DL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>



# **Medina Landmark Classification: ViT Fine-Tuning & LoRA**


In [None]:
import torch
import os
import json
from PIL import Image
from torchvision import transforms
from datasets import load_dataset
from transformers import ViTForImageClassification, ViTImageProcessor, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model

In [None]:
class_names = [
    "Uhud-Martyrs-Square",
    "Uhud-mounten",
    "Ghars-Well",
    "Quba-Mosque",
    "The-Prophet's-Mosque",
    "Urwah-Ibn-Az-Zubayr-Palace"
]
label2id = {name: i for i, name in enumerate(class_names)}
id2label = {i: name for i, name in enumerate(class_names)}


In [None]:
print(label2id)

In [None]:
data_url = "https://github.com/Afra17/BootCamp_Sdaia_DL_W4/raw/7a803578a580ef2a14b4e2d44da51ae54f29e5fe/dataset_small%20(1).zip"
!wget "{data_url}" -O data.zip
!unzip -q data.zip -d /content/dataset_files

print("DONE")

In [None]:
image_dir = "/content/dataset_files"
dataset = load_dataset("imagefolder", data_dir=image_dir, split="train")

In [None]:
def add_labels(example):
    path = example['image'].filename if hasattr(example['image'], 'filename') else ""
    example['labels'] = 0
    for i, name in enumerate(class_names):
        if name in path:
            example['labels'] = i
            break
    return example

dataset = dataset.map(add_labels)

In [None]:
dataset

# **Split Datasets to two (small-large)**

In [None]:
main_split = dataset.train_test_split(test_size=0.3, seed=42)
large_raw_data = main_split["train"]
small_raw_data = main_split["test"]

In [None]:
large_splits = large_raw_data.train_test_split(test_size=0.2, seed=42)
large_train_raw = large_splits["train"]
large_test_raw  = large_splits["test"]

small_splits = small_raw_data.train_test_split(test_size=0.2, seed=42)
small_train_raw = small_splits["train"]
small_test_raw  = small_splits["test"]

In [None]:
large_splits

In [None]:
def transform_fn(examples):
    examples["pixel_values"] = [
        augmentation_transforms(img.convert("RGB")) for img in examples["image"]
    ]

    if "label" in examples:
        examples["labels"] = examples["label"]
    output_columns = ["pixel_values", "labels"]
    return {k: examples[k] for k in output_columns if k in examples}

large_train = large_train_raw.with_transform(transform_fn)
large_test  = large_test_raw.with_transform(transform_fn)


small_train = small_train_raw.with_transform(transform_fn)
small_test  = small_test_raw.with_transform(transform_fn)

In [None]:
import matplotlib.pyplot as plt

unique_labels = sorted(list(set(main_split["train"]["labels"])))
num_classes = len(unique_labels)

cols = 3
rows = (num_classes // cols) + (1 if num_classes % cols != 0 else 0)
plt.figure(figsize=(15, rows * 5))

for i, label_id in enumerate(unique_labels):
    idx = main_split["train"]["labels"].index(label_id)

    raw_sample = main_split["train"][idx]

    img = raw_sample["image"]
    class_name = id2label[label_id]

    plt.subplot(rows, cols, i + 1)
    plt.imshow(img)
    plt.title(f"Class: {class_name}\n(Original Image)", fontsize=12, fontweight='bold')
    plt.axis("off")

plt.tight_layout()
plt.show()

# **Do Augmentation**

In [None]:
augmentation_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [None]:
import matplotlib.pyplot as plt

example = large_train[5]

image_data = example["pixel_values"]

# 3. Display it
plt.imshow(image_data.permute(1, 2, 0) if hasattr(image_data, 'permute') else image_data)
plt.title("Augmented Image")
plt.axis("off")
plt.show()

# **MODEL "Vision Transformer"**




In [None]:
model_id = "google/vit-base-patch16-224-in21k"
processor = ViTImageProcessor.from_pretrained(model_id)

model = ViTForImageClassification.from_pretrained(
    model_id,
    num_labels=len(class_names),
    id2label=id2label,
    label2id=label2id
)

# **Full Fine-Tuning (Samll- large dataset)**

# **Large Datasets**

In [None]:
!pip install evaluate

In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"

training_args = TrainingArguments(
    output_dir="./vit-full-finetune",
    per_device_train_batch_size=8,
    learning_rate=2e-5,
    num_train_epochs=15,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    remove_unused_columns=False,
    logging_steps=1,
    report_to="none"
)


In [None]:
import numpy as np
import evaluate
metric = evaluate.load("accuracy")
def compute_metrics(eval_pred):

    logits, labels = eval_pred

    predictions = np.argmax(logits, axis=-1)

    return metric.compute(predictions=predictions, references=labels)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=large_train,
    eval_dataset=large_test,
    compute_metrics=compute_metrics
)

trainer.train()

# train_loss: 0.6378657621996743
# Accuracy=0.769231
-----
# train_runtime': 244.4977s




# **Small Datasets**

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train,
    eval_dataset=small_test,
    compute_metrics=compute_metrics
)

trainer.train()

# training_loss=0.8075989776187473,
# Accuracy=0.833333

------
# train_runtime': 153.6173s

 # **Perform Parameter Efficient Fine-Tuning (PEFT) by LORA**

In [None]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules=["query", "value"],
    lora_dropout=0.1,
    modules_to_save=["classifier"]
)

model = get_peft_model(model, lora_config)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
model.print_trainable_parameters()

In [None]:
training_args = TrainingArguments(
    output_dir="./medina-vit-lora-final",
    remove_unused_columns=False,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    learning_rate=1e-3,
    num_train_epochs=30,
    logging_steps=5,
    save_strategy="epoch",
    fp16=True if device == "cuda" else False,
    report_to="none"
)

# **Large Datasets**

In [None]:
def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=large_train,
    eval_dataset=large_test,
    compute_metrics=compute_metrics
)

trainer.train()

# training_loss=0.02864039983504858
-----
# train_runtime: 110.2336s


# **Small Datasets**

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train,
    eval_dataset=small_test,
    compute_metrics=compute_metrics
)

trainer.train()

# training_loss=0.0009836134015737722
-----
# train_runtime: 15.287s