# Vision transformers

## Connect to wandb

In [1]:
# Import the necessary libraries
import wandb
from dotenv import load_dotenv
import os

# Load the environment variables from the .env file
load_dotenv()

# Get the API key from the environment variable
api_key = os.getenv("WANDB_API_KEY")

# Login to Weights & Biases using the API key
try:
    wandb.login(key=api_key)
    print("Logged in successfully.")
except Exception as e:
    print(f"Error during login: {e}")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
wandb: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: ravikumarchavva (ravikumarchavva-org). Use `wandb login --relogin` to force relogin
wandb: Appending key for api.wandb.ai to your netrc file: C:\Users\chavv\_netrc


Logged in successfully.


## Configurations

In [2]:
# Updated Configuration
CONFIGURATION = {
    'BATCH_SIZE': 32,
    'IM_SIZE': 224,
    'N_EPOCHS': 10,
    'LEARNING_RATE': 1e-5,
    'NUM_CLASSES': 3,
}

run = wandb.init(
    # Set the project where this run will be logged
    project="transformers-human-emotion-estimation-pytorch",

    # Set the experiment name
    name="human-emotion-estimation-2",
    # Track hyperparameters and run metadata
    config={
        "learning_rate": CONFIGURATION['LEARNING_RATE'],
        "epochs": CONFIGURATION['N_EPOCHS'],
        "batch_size": CONFIGURATION['BATCH_SIZE'],
        "image_size": CONFIGURATION['IM_SIZE'],
        "num_classes": CONFIGURATION['NUM_CLASSES'],
    },
)

## Load Dataset

In [3]:
import torch
from torchvision import datasets, transforms

TRAIN_DIR = '../../EmotionsDataset/train/'
TEST_DIR = '../../EmotionsDataset/test/'
CLASS_NAMES = ['angry','happy','sad']

# Define the transformations for the training and testing datasets
transform = transforms.Compose([
    transforms.Resize((CONFIGURATION['IM_SIZE'], CONFIGURATION['IM_SIZE'])),
    transforms.ToTensor(),
])

# Load the training and testing datasets
train_dataset = datasets.ImageFolder(root=TRAIN_DIR, transform=transform)
test_dataset = datasets.ImageFolder(root=TEST_DIR, transform=transform)

# Print the number of samples in each dataset
print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of testing samples: {len(test_dataset)}")

# Create the dataloaders for the training and testing datasets
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=CONFIGURATION['BATCH_SIZE'], shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=CONFIGURATION['BATCH_SIZE'], shuffle=False)

Number of training samples: 6799
Number of testing samples: 2280


## Load transformer

In [4]:
from transformers import AutoImageProcessor, ViTForImageClassification
from torchinfo import summary
import torch

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the image processor and model
image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k", use_fast=True)
model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224-in21k", num_labels=len(CLASS_NAMES))
model.to(device)  # Move model to the appropriate device (GPU/CPU)

# View model summary
summary(model, input_size=(1, 3, 224, 224))  # Adjust input size based on your model

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  context_layer = torch.nn.functional.scaled_dot_product_attention(


Layer (type:depth-idx)                                       Output Shape              Param #
ViTForImageClassification                                    [1, 3]                    --
├─ViTModel: 1-1                                              [1, 197, 768]             --
│    └─ViTEmbeddings: 2-1                                    [1, 197, 768]             152,064
│    │    └─ViTPatchEmbeddings: 3-1                          [1, 196, 768]             590,592
│    │    └─Dropout: 3-2                                     [1, 197, 768]             --
│    └─ViTEncoder: 2-2                                       [1, 197, 768]             --
│    │    └─ModuleList: 3-3                                  --                        85,054,464
│    └─LayerNorm: 2-3                                        [1, 197, 768]             1,536
├─Linear: 1-2                                                [1, 3]                    2,307
Total params: 85,800,963
Trainable params: 85,800,963
Non-trainable par

## Model hyperparameters

In [5]:
from torch.optim import AdamW
from torch.nn import CrossEntropyLoss

# Define optimizer and loss function
optimizer = AdamW(model.parameters(), lr=CONFIGURATION['LEARNING_RATE'])
criterion = torch.nn.CrossEntropyLoss()  # Use Cross Entropy Loss for multi-class classification

## Training

In [6]:
import torch.nn.functional as F
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Fine-tuned learning rate scheduler
scheduler = ReduceLROnPlateau(
    optimizer,
    mode='min',
    factor=0.5,  # Reduce learning rate by half instead of 10x
    patience=3,  # Wait for 3 epochs without improvement before reducing
    verbose=True,  # Print messages when learning rate is updated
    min_lr=1e-7  # Ensure learning rate doesn't go below this value
)

# Training loop (add softmax, accuracy, and top-k logging)
for epoch in range(CONFIGURATION['N_EPOCHS']):
    print(f"Epoch {epoch + 1}/{CONFIGURATION['N_EPOCHS']}")
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    topk_correct = 0

    for batch_idx, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)
        inputs = image_processor(images, return_tensors="pt").to(device)
        outputs = model(**inputs)

        probs = F.softmax(outputs.logits, dim=1)
        loss = criterion(outputs.logits, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(probs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        # Top-K accuracy (k=2)
        topk_probs, topk_preds = torch.topk(probs, k=2, dim=1)
        topk_correct += torch.sum(topk_preds.eq(labels.view(-1, 1)).sum(1)).item()

        if batch_idx % 10 == 0:
            print(f"Batch {batch_idx}/{len(train_loader)} - Loss: {loss.item():.4f}")

    avg_train_loss = running_loss / len(train_loader)
    accuracy = correct / total
    topk_accuracy = topk_correct / total
    print(f"Training loss: {avg_train_loss:.4f}, Accuracy: {accuracy:.4f}, Top-2 Accuracy: {topk_accuracy:.4f}")

    # Log training metrics to WandB
    wandb.log({
        "epoch": epoch + 1,
        "train_loss": avg_train_loss,
        "train_accuracy": accuracy,
        "train_topk_accuracy": topk_accuracy,
    })

    # Validation loop
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    topk_correct = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            inputs = image_processor(images, return_tensors="pt").to(device)
            outputs = model(**inputs)

            probs = F.softmax(outputs.logits, dim=1)
            loss = criterion(outputs.logits, labels)
            val_loss += loss.item()

            _, predicted = torch.max(probs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

            # Top-K accuracy (k=2)
            topk_probs, topk_preds = torch.topk(probs, k=2, dim=1)
            topk_correct += torch.sum(topk_preds.eq(labels.view(-1, 1)).sum(1)).item()

    avg_val_loss = val_loss / len(test_loader)
    val_accuracy = correct / total
    val_topk_accuracy = topk_correct / total
    print(f"Validation loss: {avg_val_loss:.4f}, Accuracy: {val_accuracy:.4f}, Top-2 Accuracy: {val_topk_accuracy:.4f}")

    # Log validation metrics to WandB
    wandb.log({
        "epoch": epoch + 1,
        "val_loss": avg_val_loss,
        "val_accuracy": val_accuracy,
        "val_topk_accuracy": val_topk_accuracy,
    })

    # Learning rate scheduler step (based on validation loss)
    scheduler.step(avg_val_loss)



Epoch 1/10
Batch 0/213 - Loss: 1.0983
Batch 10/213 - Loss: 1.1985
Batch 20/213 - Loss: 1.0730
Batch 30/213 - Loss: 1.0980
Batch 40/213 - Loss: 1.0182
Batch 50/213 - Loss: 1.0372
Batch 60/213 - Loss: 1.0118
Batch 70/213 - Loss: 1.1327
Batch 80/213 - Loss: 1.1110
Batch 90/213 - Loss: 1.0617
Batch 100/213 - Loss: 0.9982
Batch 110/213 - Loss: 1.0464
Batch 120/213 - Loss: 1.0903
Batch 130/213 - Loss: 1.1714
Batch 140/213 - Loss: 1.1241
Batch 150/213 - Loss: 1.0805
Batch 160/213 - Loss: 1.0901
Batch 170/213 - Loss: 1.0590
Batch 180/213 - Loss: 1.0396
Batch 190/213 - Loss: 1.0253
Batch 200/213 - Loss: 1.0186
Batch 210/213 - Loss: 1.0515
Training loss: 1.0646, Accuracy: 0.4423, Top-2 Accuracy: 0.7738
Validation loss: 1.0641, Accuracy: 0.4412, Top-2 Accuracy: 0.7732
Epoch 2/10
Batch 0/213 - Loss: 1.0082
Batch 10/213 - Loss: 1.0564
Batch 20/213 - Loss: 0.9887
Batch 30/213 - Loss: 0.9141
Batch 40/213 - Loss: 1.1538
Batch 50/213 - Loss: 1.0571
Batch 60/213 - Loss: 1.1332
Batch 70/213 - Loss: 1.040

KeyboardInterrupt: 

In [9]:
import torch
from sklearn.metrics import classification_report, accuracy_score

# Load the pre-trained model
model_path = 'vit-emotion-classification'
model = ViTForImageClassification.from_pretrained(model_path, num_labels=len(CLASS_NAMES))

# Move the model to the appropriate device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Set the model to evaluation mode
model.eval()

# Initialize lists to store true labels and predictions
true_labels = []
predictions = []

# Disable gradient calculation for inference
with torch.no_grad():
    for images, labels in test_loader:
        # Move images and labels to the device
        images, labels = images.to(device), labels.to(device)

        # Preprocess images using the image processor
        inputs = image_processor(images, return_tensors="pt").to(device)

        # Forward pass
        outputs = model(**inputs)
        _, preds = torch.max(outputs.logits, 1)

        # Store true labels and predictions
        true_labels.extend(labels.cpu().numpy())
        predictions.extend(preds.cpu().numpy())

# Calculate accuracy
accuracy = accuracy_score(true_labels, predictions)
print(f"Accuracy: {accuracy:.4f}")

# Generate classification report
report = classification_report(true_labels, predictions, target_names=CLASS_NAMES)
print("Classification Report:\n", report)

Accuracy: 0.4412


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Classification Report:
               precision    recall  f1-score   support

       angry       0.00      0.00      0.00       517
       happy       0.44      1.00      0.61      1006
         sad       0.00      0.00      0.00       757

    accuracy                           0.44      2280
   macro avg       0.15      0.33      0.20      2280
weighted avg       0.19      0.44      0.27      2280



In [11]:
!pip install torchmetrics


Collecting torchmetrics
  Downloading torchmetrics-1.4.3-py3-none-any.whl.metadata (19 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.11.7-py3-none-any.whl.metadata (5.2 kB)


Downloading torchmetrics-1.4.3-py3-none-any.whl (869 kB)
   ---------------------------------------- 0.0/869.5 kB ? eta -:--:--
   ------------------------ --------------- 524.3/869.5 kB 4.2 MB/s eta 0:00:01
   ---------------------------------------- 869.5/869.5 kB 2.6 MB/s eta 0:00:00
Downloading lightning_utilities-0.11.7-py3-none-any.whl (26 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.11.7 torchmetrics-1.4.3
