<a href="https://colab.research.google.com/github/Nilufayeasmin299/Vision-Transformer/blob/main/Vision_Transformer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Install dependencies
!pip install timm

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->timm)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->timm)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->timm)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->timm)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->timm)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch->timm)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch->tim

In [None]:
#  Extract dataset and repo
import zipfile
import os

In [None]:
# Define paths
repo_zip_path = "/content/drive/MyDrive/ViT-pytorch-main.zip"
dataset_zip_path = "/content/drive/MyDrive/hymenoptera_data.zip"
repo_extract_path = "/content/drive/MyDrive/ViT-pytorch-main"
dataset_extract_path = "/content/drive/MyDrive/hymenoptera_data"

In [None]:
# Extract function
def extract_zip(zip_path, extract_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

In [None]:
# Extract both zip files
extract_zip(repo_zip_path, repo_extract_path)
extract_zip(dataset_zip_path, dataset_extract_path)

In [None]:
# Load dataset
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from timm import create_model
from sklearn.metrics import precision_score, recall_score, f1_score

In [None]:
# Define transformations
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])


In [None]:
# Load dataset
trainset = datasets.ImageFolder(root="/content/drive/MyDrive/hymenoptera_data/train", transform=transform_train) # Corrected root path
testset = datasets.ImageFolder(root="/content/drive/MyDrive/hymenoptera_data/val", transform=transform_test) # Corrected root path

In [None]:
train_loader = DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)
test_loader = DataLoader(testset, batch_size=32, shuffle=False, num_workers=2)

In [None]:
# Modify Vision Transformer for Binary Classification
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Load pre-trained ViT model from TIMM
model = create_model("vit_base_patch16_224", pretrained=True, num_classes=2)
model = model.to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

In [None]:
# Define Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=3e-5)

In [None]:
#  Train the Model
num_epochs = 5  # Keep small for quick execution
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct, total = 0, 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    train_acc = 100 * correct / total
    train_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}%")


Epoch [1/5], Loss: 0.0178, Train Accuracy: 99.59%
Epoch [2/5], Loss: 0.0282, Train Accuracy: 99.59%
Epoch [3/5], Loss: 0.0048, Train Accuracy: 100.00%
Epoch [4/5], Loss: 0.0106, Train Accuracy: 100.00%
Epoch [5/5], Loss: 0.0215, Train Accuracy: 99.59%


In [None]:
# Evaluate Model & Compute Metrics
model.eval()
correct, total = 0, 0
all_preds, all_labels = [], []
test_loss = 0.0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        test_loss += loss.item()

        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

In [None]:
# Calculate performance metrics
test_acc = 100 * correct / total
test_loss = test_loss / len(test_loader)
precision = precision_score(all_labels, all_preds, average='macro') * 100
recall = recall_score(all_labels, all_preds, average='macro') * 100
f1 = f1_score(all_labels, all_preds, average='macro') * 100

In [None]:
# Final Report
print("\n Final Model Performance:")
print(f" Training Loss: {train_loss:.4f}")
print(f" Test Loss: {test_loss:.4f}")
print(f" Training Accuracy: {train_acc:.2f}%")
print(f" Test Accuracy: {test_acc:.2f}%")
print(f" Precision: {precision:.2f}%")
print(f" Recall: {recall:.2f}%")
print(f" F1-Score: {f1:.2f}%")



 Final Model Performance:
 Training Loss: 0.0215
 Test Loss: 0.1556
 Training Accuracy: 99.59%
 Test Accuracy: 96.08%
 Precision: 96.05%
 Recall: 96.05%
 F1-Score: 96.05%
