<a href="https://colab.research.google.com/github/Shahid-03/Project-I---Image-Processing/blob/main/VisionTransferModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from google.colab import files
import zipfile
import os

# Upload the ZIP file (dataset.zip)
uploaded = files.upload()

# Import zipfile module
import zipfile

# Extract the ZIP file
with zipfile.ZipFile('dataset.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/')

# Verify the contents of the extracted folders
!ls /content/
!ls /content/Train/
!ls /content/Test/

Saving dataset.zip to dataset.zip
dataset.zip  __MACOSX  sample_data  Test  Train
anger  fear  joy  Natural  sadness  surprise
anger  fear  joy  Natural  sadness  surprise


In [5]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from transformers import ViTForImageClassification
from sklearn.metrics import accuracy_score, classification_report

# Define paths (updated for Colab)
train_dir = '/content/Train'
test_dir = '/content/Test'

# Define transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to match ViT input size
    transforms.ToTensor(),          # Convert image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize with ImageNet stats
])

# Load datasets
train_dataset = ImageFolder(root=train_dir, transform=transform)
test_dataset = ImageFolder(root=test_dir, transform=transform)

# Define data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

# Load pre-trained ViT model
model = ViTForImageClassification.from_pretrained(
    'google/vit-base-patch16-224',
    num_labels=len(train_dataset.classes),
    ignore_mismatched_sizes=True  # Handle classifier size mismatch
)

# Move model to appropriate device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Training setup
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Checkpoint path to save the model
checkpoint_path = "/content/checkpoint.pth"

# Training loop with checkpoint saving
try:
    for epoch in range(5):  # Adjust epochs as needed
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs).logits
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        # Save the model after each epoch
        torch.save(model.state_dict(), checkpoint_path)
        print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")
        print(f"Model checkpoint saved at {checkpoint_path}")

except KeyboardInterrupt:
    # Save the model on interrupt
    print("Training interrupted. Saving model...")
    torch.save(model.state_dict(), checkpoint_path)
    print(f"Model checkpoint saved at {checkpoint_path}")

# Evaluation
model.eval()
y_pred = []
y_true = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs).logits
        _, preds = torch.max(outputs, 1)
        y_pred.extend(preds.cpu().numpy())
        y_true.extend(labels.cpu().numpy())

# Classification Report
accuracy = accuracy_score(y_true, y_pred)
report = classification_report(y_true, y_pred, target_names=train_dataset.classes)

print(f"ViT Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:\n", report)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/69.7k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([6]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([6, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 1.3410313799977303
Model checkpoint saved at /content/checkpoint.pth
Epoch 2, Loss: 0.8552926902969679
Model checkpoint saved at /content/checkpoint.pth
Epoch 3, Loss: 0.5807977020740509
Model checkpoint saved at /content/checkpoint.pth
Epoch 4, Loss: 0.4470250817636649
Model checkpoint saved at /content/checkpoint.pth
Epoch 5, Loss: 0.3736654929816723
Model checkpoint saved at /content/checkpoint.pth
ViT Accuracy: 72.00%
Classification Report:
               precision    recall  f1-score   support

     Natural       0.00      0.00      0.00         7
       anger       0.00      0.00      0.00         3
        fear       0.00      0.00      0.00         3
         joy       0.86      1.00      0.92        42
     sadness       0.48      0.71      0.57        14
    surprise       0.50      0.33      0.40         6

    accuracy                           0.72        75
   macro avg       0.31      0.34      0.32        75
weighted avg       0.61      0.72      0.66    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([6]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([6, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 1.3783101414640744
Model checkpoint saved at /content/checkpoint.pth
Epoch 2, Loss: 0.8664451142152151
Model checkpoint saved at /content/checkpoint.pth
Epoch 3, Loss: 0.632463155935208
Model checkpoint saved at /content/checkpoint.pth
Epoch 4, Loss: 0.4798868199189504
Model checkpoint saved at /content/checkpoint.pth
Epoch 5, Loss: 0.3809358514845371
Model checkpoint saved at /content/checkpoint.pth
Epoch 6, Loss: 0.341133293385307
Model checkpoint saved at /content/checkpoint.pth
Epoch 7, Loss: 0.32442326719562214
Model checkpoint saved at /content/checkpoint.pth
Epoch 8, Loss: 0.30825180125733215
Model checkpoint saved at /content/checkpoint.pth
Epoch 9, Loss: 0.2750085735072692
Model checkpoint saved at /content/checkpoint.pth
Epoch 10, Loss: 0.2794377946605285
Model checkpoint saved at /content/checkpoint.pth
ViT Accuracy: 70.67%
Classification Report:
               precision    recall  f1-score   support

     Natural       0.33      0.14      0.20         7
     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
