In [1]:
# Import the necessary libraries
import wandb
from dotenv import load_dotenv
import os

# Load the environment variables from the .env file
load_dotenv()

# Get the API key from the environment variable
api_key = os.getenv("WANDB_API_KEY")

# Login to Weights & Biases using the API key
try:
    wandb.login(key=api_key)
    print("Logged in successfully.")
except Exception as e:
    print(f"Error during login: {e}")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
wandb: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: ravikumarchavva (ravikumarchavva-org). Use `wandb login --relogin` to force relogin
wandb: Appending key for api.wandb.ai to your netrc file: C:\Users\chavv\_netrc


Logged in successfully.


In [2]:
# Updated Configuration
CONFIGURATION = {
    'BATCH_SIZE': 16,
    'IM_SIZE': 224,
    'N_EPOCHS': 5,
    'LEARNING_RATE': 1e-5,
    'NUM_CLASSES': 3,
}

run = wandb.init(
    # Set the project where this run will be logged
    project="transformers-human-pose-estimation",

    # Set the experiment name
    name="human-emotion-estimation-1",
    # Track hyperparameters and run metadata
    config={
        "learning_rate": CONFIGURATION['LEARNING_RATE'],
        "epochs": CONFIGURATION['N_EPOCHS'],
        "batch_size": CONFIGURATION['BATCH_SIZE'],
        "image_size": CONFIGURATION['IM_SIZE'],
        "num_classes": CONFIGURATION['NUM_CLASSES'],
    },
)

In [3]:
import torch
from torchvision import datasets, transforms

TRAIN_DIR = '../../EmotionsDataset/train/'
TEST_DIR = '../../EmotionsDataset/test/'
CLASS_NAMES = ['angry','happy','sad']

# Define the transformations for the training and testing datasets
transform = transforms.Compose([
    transforms.Resize((CONFIGURATION['IM_SIZE'], CONFIGURATION['IM_SIZE'])),
    transforms.ToTensor(),
])

# Load the training and testing datasets
train_dataset = datasets.ImageFolder(root=TRAIN_DIR, transform=transform)
test_dataset = datasets.ImageFolder(root=TEST_DIR, transform=transform)

# Print the number of samples in each dataset
print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of testing samples: {len(test_dataset)}")

# Create the dataloaders for the training and testing datasets
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=CONFIGURATION['BATCH_SIZE'], shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=CONFIGURATION['BATCH_SIZE'], shuffle=False)

Number of training samples: 6799
Number of testing samples: 2280


In [4]:
from transformers import AutoImageProcessor, ViTForImageClassification

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load the image processor and model
image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k", use_fast=True)
model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224-in21k", num_labels=len(CLASS_NAMES))
model.to(device)  # Move model to the appropriate device (GPU/CPU)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTSdpaAttention(
            (attention): ViTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_fe

In [5]:
from torch.optim import AdamW
from torch.nn import CrossEntropyLoss

# Define optimizer and loss function
optimizer = AdamW(model.parameters(), lr=CONFIGURATION['LEARNING_RATE'])
criterion = torch.nn.CrossEntropyLoss()  # Use Cross Entropy Loss for multi-class classification

: 

In [None]:

# Training loop
for epoch in range(CONFIGURATION['N_EPOCHS']):
    print(f"Epoch {epoch + 1}/{CONFIGURATION['N_EPOCHS']}")
    model.train()
    running_loss = 0.0
    
    for images, labels in train_loader:
        # Move images and labels to the device
        images, labels = images.to(device), labels.to(device)

        # Preprocess images using the image processor
        inputs = image_processor(images, return_tensors="pt").to(device)

        # Forward pass
        outputs = model(**inputs)
        loss = criterion(outputs.logits, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Training loss: {running_loss / len(train_loader):.4f}")

# Save the trained model
model.save_pretrained("vit-emotion-classification")

Epoch 1/5


  context_layer = torch.nn.functional.scaled_dot_product_attention(
