In [1]:
!git clone https://github.com/google-research/vision_transformer.git

Cloning into 'vision_transformer'...
remote: Enumerating objects: 1097, done.[K
remote: Counting objects: 100% (308/308), done.[K
remote: Compressing objects: 100% (160/160), done.[K
remote: Total 1097 (delta 192), reused 199 (delta 140), pack-reused 789 (from 1)[K
Receiving objects: 100% (1097/1097), 141.93 MiB | 23.53 MiB/s, done.
Resolving deltas: 100% (594/594), done.


In [2]:
!pip uninstall tf-keras
!pip install tf-keras==2.18.0

Found existing installation: tf_keras 2.17.0
Uninstalling tf_keras-2.17.0:
  Would remove:
    /usr/local/lib/python3.10/dist-packages/tf_keras-2.17.0.dist-info/*
    /usr/local/lib/python3.10/dist-packages/tf_keras/*
Proceed (Y/n)? Y
  Successfully uninstalled tf_keras-2.17.0
Collecting tf-keras==2.18.0
  Downloading tf_keras-2.18.0-py3-none-any.whl.metadata (1.6 kB)
Collecting tensorflow<2.19,>=2.18 (from tf-keras==2.18.0)
  Downloading tensorflow-2.18.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)
Collecting tensorboard<2.19,>=2.18 (from tensorflow<2.19,>=2.18->tf-keras==2.18.0)
  Downloading tensorboard-2.18.0-py3-none-any.whl.metadata (1.6 kB)
Downloading tf_keras-2.18.0-py3-none-any.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tensorflow-2.18.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (615.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━

In [3]:
!cd vision_transformer && pip install .

Processing /content/vision_transformer
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting flaxformer@ git+https://github.com/google/flaxformer (from vit_jax==0.0.8)
  Cloning https://github.com/google/flaxformer to /tmp/pip-install-q20uhq8k/flaxformer_3302e5e516fd4b6ab1676666dc6f496f
  Running command git clone --filter=blob:none --quiet https://github.com/google/flaxformer /tmp/pip-install-q20uhq8k/flaxformer_3302e5e516fd4b6ab1676666dc6f496f
  Resolved https://github.com/google/flaxformer to commit 399ea3a85e9807ada653fd0de1a9de627eb0acde
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting aqtp!=0.1.1 (from vit_jax==0.0.8)
  Downloading aqtp-0.8.2-py3-none-any.whl.metadata (18 kB)
Collecting clu (from vit_jax==0.0.8)
  Downloading clu-0.0.12-py3-none-any.whl.metadata (1.9 kB)
Collecting ml-collections (from vit_jax==0.0.8)
  Downloading ml_collections-1.0.0-py3-none-any.whl.metadata (22 kB)
Collecting tensorflow_text (from vit_jax==0.0.8)
  Downloading tensorf

In [4]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("magdawjcicka/emotic")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/magdawjcicka/emotic?dataset_version_number=1...


100%|██████████| 6.16G/6.16G [01:18<00:00, 83.9MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/magdawjcicka/emotic/versions/1


In [5]:
import os
import numpy as np
import torch
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# -------------------------
# Transformations for images
# -------------------------
def get_transform():
    """
    Returns the transformations for preprocessing images.
    """
    return transforms.Compose([
        transforms.ToPILImage(),  # Convert NumPy array to PIL image
        transforms.Resize((224, 224)),  # Resize to 224x224
        transforms.ToTensor(),  # Convert to PyTorch tensor
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize using ImageNet stats
    ])

# -------------------------
# Dataset for EMOTIC data
# -------------------------
class EMOTICDataset(Dataset):
    def __init__(self, annotations, img_dir, transform=None, num_categories=26):
        """
        Dataset for loading EMOTIC data.

        Args:
            annotations (list): List of annotations containing filenames and categories.
            img_dir (str): Directory containing the images.
            transform (callable, optional): Transformations to apply to images.
            num_categories (int): Number of categories for multi-label classification.
        """
        self.annotations = annotations
        self.img_dir = img_dir
        self.transform = transform
        self.num_categories = num_categories

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        entry = self.annotations[idx]
        img_path = os.path.join(self.img_dir, entry['filename'])

        if not os.path.exists(img_path):
            raise FileNotFoundError(f"File not found: {img_path}")

        image = np.load(img_path)  # Load image as a NumPy array

        # Ensure the image is RGB
        if len(image.shape) == 2:
            image = np.stack([image] * 3, axis=-1)  # Convert grayscale image to RGB
        elif image.shape[-1] != 3:
            raise ValueError(f"Unexpected image shape: {image.shape}")

        # Apply transformations
        if self.transform:
            image = self.transform(image)

        # Convert categories to tensors
        categories = torch.zeros(self.num_categories, dtype=torch.float32)
        for category in entry['categories']:
            if category < self.num_categories:
                categories[category] = 1.0

        return image, categories

# -------------------------
# Load annotations
# -------------------------
def parse_annotations(csv_path):
    """
    Load and parse annotations from a CSV file.

    Args:
        csv_path (str): Path to the CSV file containing annotations.

    Returns:
        list: List of annotations as dictionaries.
    """
    import pandas as pd

    df = pd.read_csv(csv_path)
    annotations = []

    # Adjust based on your CSV columns
    category_columns = df.columns[9:39]

    for _, row in df.iterrows():
        categories = [int(idx) for idx, val in enumerate(row[category_columns]) if val == 1]
        annotation = {
            'filename': row['Crop_name'],
            'categories': categories,
        }
        annotations.append(annotation)

    return annotations

# -------------------------
# Integration into training/validation pipeline
# -------------------------
def load_data(train_csv, val_csv, img_dir, batch_size=16, num_categories=26):
    """
    Load training and validation data with DataLoader.

    Args:
        train_csv (str): Path to the CSV file for training annotations.
        val_csv (str): Path to the CSV file for validation annotations.
        img_dir (str): Directory containing the images.
        batch_size (int): Batch size.
        num_categories (int): Number of categories for multi-label classification.

    Returns:
        DataLoader, DataLoader: DataLoaders for training and validation datasets.
    """
    train_annotations = parse_annotations(train_csv)
    val_annotations = parse_annotations(val_csv)

    transform = get_transform()

    train_dataset = EMOTICDataset(train_annotations, img_dir, transform=transform, num_categories=num_categories)
    val_dataset = EMOTICDataset(val_annotations, img_dir, transform=transform, num_categories=num_categories)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader

# Example usage
if __name__ == "__main__":
    base_dir = "/root/.cache/kagglehub/datasets/magdawjcicka/emotic/versions/1"
    train_csv = os.path.join(base_dir, "annots_arrs/annot_arrs_train.csv")
    val_csv = os.path.join(base_dir, "annots_arrs/annot_arrs_test.csv")
    img_dir = os.path.join(base_dir, "img_arrs")

    # Check if files exist
    if not os.path.exists(train_csv):
        raise FileNotFoundError(f"Training CSV file not found: {train_csv}")
    if not os.path.exists(val_csv):
        raise FileNotFoundError(f"Validation CSV file not found: {val_csv}")
    if not os.path.exists(img_dir):
        raise FileNotFoundError(f"Image directory not found: {img_dir}")

    train_loader, val_loader = load_data(train_csv, val_csv, img_dir, batch_size=16)

    for images, labels in train_loader:
        print("Batch of images:", images.shape)
        print("Batch of labels:", labels.shape)
        break

Batch of images: torch.Size([16, 3, 224, 224])
Batch of labels: torch.Size([16, 26])


In [6]:
import torch

# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [7]:
from torch import nn
from torchvision.models import vit_b_16, ViT_B_16_Weights

# Load Vision Transformer model with weights
class VisionTransformerEmotionModel(nn.Module):
    def __init__(self, num_classes=26):
        super(VisionTransformerEmotionModel, self).__init__()
        self.vit = vit_b_16(weights=ViT_B_16_Weights.IMAGENET1K_V1)  # Use the correct weights argument
        self.vit.heads.head = nn.Linear(self.vit.heads.head.in_features, num_classes)  # Update head for multi-label classification

    def forward(self, x):
        return self.vit(x)


# Instantiate the model
num_classes = 26
model = VisionTransformerEmotionModel(num_classes=num_classes).to(device)

Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to /root/.cache/torch/hub/checkpoints/vit_b_16-c867db91.pth
100%|██████████| 330M/330M [00:02<00:00, 132MB/s]


In [9]:
def vca_loss(logits, labels, alpha=0.1):
    """
    Variance-Constrained Agreement (VCA) loss for multi-label classification.

    Args:
        logits (Tensor): Predicted logits (batch_size, num_classes).
        labels (Tensor): Ground-truth labels (batch_size, num_classes).
        alpha (float): Weight for the variance term.

    Returns:
        Tensor: Calculated VCA loss.
    """
    # Apply sigmoid to logits to get probabilities
    probabilities = torch.sigmoid(logits)

    # Compute the mean squared error (agreement loss)
    mean_agreement = torch.mean((probabilities - labels) ** 2)

    # Compute the variance across the batch
    variance = torch.var(probabilities, dim=0).mean()

    # Combine the agreement loss and the variance term
    return mean_agreement + alpha * variance

# Training loop
def train_model(model, train_loader, optimizer, device, alpha=0.1, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            logits = model(images)

            # Compute loss
            loss = vca_loss(logits, labels, alpha)
            total_loss += loss.item()

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss / len(train_loader):.4f}")

In [17]:
!pip install tqdm

from tqdm import tqdm

def train_model_with_progress_bar(model, train_loader, optimizer, device, alpha=0.1, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0

        # Ajouter tqdm pour afficher une barre de progression
        progress_bar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}/{num_epochs}")

        for batch_idx, (images, labels) in progress_bar:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            logits = model(images)

            # Compute loss
            loss = vca_loss(logits, labels, alpha)
            total_loss += loss.item()

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Mise à jour de la barre de progression
            progress_bar.set_postfix({"Batch Loss": loss.item()})

        print(f"Epoch [{epoch + 1}/{num_epochs}] completed, Average Loss: {total_loss / len(train_loader):.4f}")



In [19]:
import torch.optim as optim

# Define optimizer
optimizer = optim.AdamW(model.parameters(), lr=1e-4)

In [20]:
train_model_with_progress_bar(model, train_loader, optimizer, device, alpha=0.1, num_epochs=10)

Epoch 1/10: 100%|██████████| 1540/1540 [15:41<00:00,  1.64it/s, Batch Loss=0.0572]


Epoch [1/10] completed, Average Loss: 0.0568


Epoch 2/10: 100%|██████████| 1540/1540 [15:23<00:00,  1.67it/s, Batch Loss=0.0642]


Epoch [2/10] completed, Average Loss: 0.0562


Epoch 3/10: 100%|██████████| 1540/1540 [15:27<00:00,  1.66it/s, Batch Loss=0.0442]


Epoch [3/10] completed, Average Loss: 0.0558


Epoch 4/10: 100%|██████████| 1540/1540 [15:25<00:00,  1.66it/s, Batch Loss=0.038]


Epoch [4/10] completed, Average Loss: 0.0552


Epoch 5/10: 100%|██████████| 1540/1540 [15:25<00:00,  1.66it/s, Batch Loss=0.0698]


Epoch [5/10] completed, Average Loss: 0.0544


Epoch 6/10: 100%|██████████| 1540/1540 [15:24<00:00,  1.67it/s, Batch Loss=0.0659]


Epoch [6/10] completed, Average Loss: 0.0532


Epoch 7/10: 100%|██████████| 1540/1540 [15:29<00:00,  1.66it/s, Batch Loss=0.0389]


Epoch [7/10] completed, Average Loss: 0.0515


Epoch 8/10: 100%|██████████| 1540/1540 [15:28<00:00,  1.66it/s, Batch Loss=0.0422]


Epoch [8/10] completed, Average Loss: 0.0492


Epoch 9/10: 100%|██████████| 1540/1540 [15:22<00:00,  1.67it/s, Batch Loss=0.0544]


Epoch [9/10] completed, Average Loss: 0.0464


Epoch 10/10: 100%|██████████| 1540/1540 [15:23<00:00,  1.67it/s, Batch Loss=0.0696]

Epoch [10/10] completed, Average Loss: 0.0433





In [22]:
def evaluate_model(model, val_loader, device, alpha=0.1):
    model.eval()
    total_loss = 0.0
    all_targets = []
    all_predictions = []

    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc="Evaluating"):
            images, labels = images.to(device), labels.to(device)

            logits = model(images)
            loss = vca_loss(logits, labels, alpha)
            total_loss += loss.item()

            # Convert logits to probabilities
            probabilities = torch.sigmoid(logits)
            predictions = (probabilities > 0.5).float()

            all_targets.append(labels.cpu().numpy())
            all_predictions.append(predictions.cpu().numpy())

    # Calculate average loss
    avg_loss = total_loss / len(val_loader)

    # Concatenate all targets and predictions
    all_targets = np.vstack(all_targets)
    all_predictions = np.vstack(all_predictions)

    # Calculate precision, recall, and F1-score
    from sklearn.metrics import precision_score, recall_score, f1_score
    precision = precision_score(all_targets, all_predictions, average="macro")
    recall = recall_score(all_targets, all_predictions, average="macro")
    f1 = f1_score(all_targets, all_predictions, average="macro")

    return avg_loss, precision, recall, f1

val_loss, precision, recall, f1 = evaluate_model(model, val_loader, device, alpha=0.1)
print(f"Validation Loss: {val_loss:.4f}")
print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}")

Evaluating: 100%|██████████| 455/455 [01:37<00:00,  4.67it/s]


Validation Loss: 0.1205
Precision: 0.2645, Recall: 0.0422, F1-Score: 0.0577


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
