In [1]:
!pip install git+https://github.com/mlfoundations/open_clip.git

Collecting git+https://github.com/mlfoundations/open_clip.git
  Cloning https://github.com/mlfoundations/open_clip.git to /tmp/pip-req-build-ilyu5c_7
  Running command git clone --filter=blob:none --quiet https://github.com/mlfoundations/open_clip.git /tmp/pip-req-build-ilyu5c_7
  Resolved https://github.com/mlfoundations/open_clip.git to commit a87f11eaf354000d2736580855ae0d9b76ad2a22
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting ftfy (from open_clip_torch==2.32.0)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.9.0->open_clip_torch==2.32.0)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.9.0->open_clip_torch==2.32.0)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-non

**no of samples**

In [2]:
import os

root_dir = "/kaggle/input/pacs-dataset/kfold"
domains = ["photo", "art_painting", "cartoon", "sketch"]

for domain in domains:
    domain_path = os.path.join(root_dir, domain)
    count = 0
    for cls in os.listdir(domain_path):
        cls_path = os.path.join(domain_path, cls)
        count += len(os.listdir(cls_path))
    print(f"{domain} has {count} images")

photo has 1670 images
art_painting has 2048 images
cartoon has 2344 images
sketch has 3929 images


**Multi-Domain Training with RN50 and ViT-B16 Backbones**

In [3]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets
from torch.utils.data import DataLoader
from tqdm import tqdm
import open_clip

device = "cuda" if torch.cuda.is_available() else "cpu"
PACS_PATH = "/kaggle/input/pacs-dataset/kfold"

class CLIPMLPClassifier(nn.Module):
    def __init__(self, clip_model, num_classes):
        super().__init__()
        self.clip = clip_model
        self.mlp = nn.Sequential(
            nn.Linear(self.clip.visual.output_dim, 256),
            nn.ReLU(),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        with torch.no_grad():
            feats = self.clip.encode_image(x)
        return self.mlp(feats)

def get_loader(domains, root, transform, batch_size=32, shuffle=False):
    dataset = []
    for domain in domains:
        ds = datasets.ImageFolder(os.path.join(root, domain), transform=transform)
        dataset.extend(ds.samples)
    base_ds = datasets.ImageFolder(os.path.join(root, domains[0]), transform=transform)
    base_ds.samples = dataset
    loader = DataLoader(base_ds, batch_size=batch_size, shuffle=shuffle)
    return loader, len(base_ds.classes)

train_domains = ["photo", "sketch", "art_painting"]
test_domain = "cartoon"
results = {}

backbones = ['RN50', 'ViT-B-16']

for backbone in backbones:
    print(f"\n= Training with {backbone} =")

    clip_model, _, preprocess = open_clip.create_model_and_transforms(backbone, pretrained='openai')
    clip_model.eval().to(device)
    for param in clip_model.parameters():
        param.requires_grad = False

    train_loader, num_classes = get_loader(train_domains, PACS_PATH, preprocess, shuffle=True)
    test_loader, _ = get_loader([test_domain], PACS_PATH, preprocess, shuffle=False)

    model = CLIPMLPClassifier(clip_model, num_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.mlp.parameters(), lr=1e-4)

    best_acc = 0
    wait = 0
    patience = 10

    for epoch in range(50):
        model.train()
        total_loss = 0

        for imgs, labels in tqdm(train_loader, desc=f"[{backbone}] Epoch {epoch+1} - Training"):
            imgs, labels = imgs.to(device), labels.to(device)
            logits = model(imgs)
            loss = criterion(logits, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"[{backbone}] Epoch {epoch+1} - Training Loss: {total_loss:.4f}")

        # Evaluation
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for imgs, labels in test_loader:
                imgs, labels = imgs.to(device), labels.to(device)
                outputs = model(imgs)
                preds = torch.argmax(outputs, dim=1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)
        acc = 100 * correct / total
        print(f"[{backbone}] Test Accuracy on '{test_domain}': {acc:.2f}%")

        if acc > best_acc:
            best_acc = acc
            wait = 0
            print(f"[{backbone}] Accuracy improved.")
        else:
            wait += 1
            print(f"[{backbone}] No improvement. Wait count: {wait}/{patience}")
            if wait >= patience:
                print(f"[{backbone}] Early stopping triggered.")
                break

    results[backbone] = best_acc
    print(f"[{backbone}] Best Accuracy Achieved: {best_acc:.2f}%")

print("\n= Summary of Test Accuracies =")
for name, acc in results.items():
    print(f"{name:10s}: {acc:.2f}%")



= Training with RN50 =


open_clip_model.safetensors:   0%|          | 0.00/408M [00:00<?, ?B/s]

[RN50] Epoch 1 - Training: 100%|██████████| 239/239 [01:11<00:00,  3.35it/s]


[RN50] Epoch 1 - Training Loss: 424.6849
[RN50] Test Accuracy on 'cartoon': 59.64%
[RN50] Accuracy improved.


[RN50] Epoch 2 - Training: 100%|██████████| 239/239 [00:41<00:00,  5.70it/s]


[RN50] Epoch 2 - Training Loss: 312.1026
[RN50] Test Accuracy on 'cartoon': 74.02%
[RN50] Accuracy improved.


[RN50] Epoch 3 - Training: 100%|██████████| 239/239 [00:44<00:00,  5.40it/s]


[RN50] Epoch 3 - Training Loss: 231.8337
[RN50] Test Accuracy on 'cartoon': 76.02%
[RN50] Accuracy improved.


[RN50] Epoch 4 - Training: 100%|██████████| 239/239 [00:41<00:00,  5.80it/s]


[RN50] Epoch 4 - Training Loss: 188.3210
[RN50] Test Accuracy on 'cartoon': 77.52%
[RN50] Accuracy improved.


[RN50] Epoch 5 - Training: 100%|██████████| 239/239 [00:41<00:00,  5.70it/s]


[RN50] Epoch 5 - Training Loss: 161.7625
[RN50] Test Accuracy on 'cartoon': 79.14%
[RN50] Accuracy improved.


[RN50] Epoch 6 - Training: 100%|██████████| 239/239 [00:40<00:00,  5.88it/s]


[RN50] Epoch 6 - Training Loss: 147.1130
[RN50] Test Accuracy on 'cartoon': 78.46%
[RN50] No improvement. Wait count: 1/10


[RN50] Epoch 7 - Training: 100%|██████████| 239/239 [00:40<00:00,  5.92it/s]


[RN50] Epoch 7 - Training Loss: 135.6252
[RN50] Test Accuracy on 'cartoon': 78.07%
[RN50] No improvement. Wait count: 2/10


[RN50] Epoch 8 - Training: 100%|██████████| 239/239 [00:41<00:00,  5.79it/s]


[RN50] Epoch 8 - Training Loss: 128.9197
[RN50] Test Accuracy on 'cartoon': 79.39%
[RN50] Accuracy improved.


[RN50] Epoch 9 - Training: 100%|██████████| 239/239 [00:40<00:00,  5.87it/s]


[RN50] Epoch 9 - Training Loss: 124.4329
[RN50] Test Accuracy on 'cartoon': 79.78%
[RN50] Accuracy improved.


[RN50] Epoch 10 - Training: 100%|██████████| 239/239 [00:40<00:00,  5.84it/s]


[RN50] Epoch 10 - Training Loss: 118.6082
[RN50] Test Accuracy on 'cartoon': 79.65%
[RN50] No improvement. Wait count: 1/10


[RN50] Epoch 11 - Training: 100%|██████████| 239/239 [00:40<00:00,  5.91it/s]


[RN50] Epoch 11 - Training Loss: 115.5327
[RN50] Test Accuracy on 'cartoon': 78.92%
[RN50] No improvement. Wait count: 2/10


[RN50] Epoch 12 - Training: 100%|██████████| 239/239 [00:41<00:00,  5.80it/s]


[RN50] Epoch 12 - Training Loss: 110.8202
[RN50] Test Accuracy on 'cartoon': 79.86%
[RN50] Accuracy improved.


[RN50] Epoch 13 - Training: 100%|██████████| 239/239 [00:42<00:00,  5.58it/s]


[RN50] Epoch 13 - Training Loss: 110.6558
[RN50] Test Accuracy on 'cartoon': 81.06%
[RN50] Accuracy improved.


[RN50] Epoch 14 - Training: 100%|██████████| 239/239 [00:45<00:00,  5.27it/s]


[RN50] Epoch 14 - Training Loss: 104.8697
[RN50] Test Accuracy on 'cartoon': 79.86%
[RN50] No improvement. Wait count: 1/10


[RN50] Epoch 15 - Training: 100%|██████████| 239/239 [00:44<00:00,  5.34it/s]


[RN50] Epoch 15 - Training Loss: 103.9631
[RN50] Test Accuracy on 'cartoon': 81.02%
[RN50] No improvement. Wait count: 2/10


[RN50] Epoch 16 - Training: 100%|██████████| 239/239 [00:41<00:00,  5.71it/s]


[RN50] Epoch 16 - Training Loss: 104.0227
[RN50] Test Accuracy on 'cartoon': 80.84%
[RN50] No improvement. Wait count: 3/10


[RN50] Epoch 17 - Training: 100%|██████████| 239/239 [00:41<00:00,  5.79it/s]


[RN50] Epoch 17 - Training Loss: 102.5895
[RN50] Test Accuracy on 'cartoon': 79.86%
[RN50] No improvement. Wait count: 4/10


[RN50] Epoch 18 - Training: 100%|██████████| 239/239 [00:40<00:00,  5.88it/s]


[RN50] Epoch 18 - Training Loss: 98.9410
[RN50] Test Accuracy on 'cartoon': 80.25%
[RN50] No improvement. Wait count: 5/10


[RN50] Epoch 19 - Training: 100%|██████████| 239/239 [00:41<00:00,  5.80it/s]


[RN50] Epoch 19 - Training Loss: 99.8799
[RN50] Test Accuracy on 'cartoon': 79.39%
[RN50] No improvement. Wait count: 6/10


[RN50] Epoch 20 - Training: 100%|██████████| 239/239 [00:47<00:00,  5.04it/s]


[RN50] Epoch 20 - Training Loss: 97.2938
[RN50] Test Accuracy on 'cartoon': 80.33%
[RN50] No improvement. Wait count: 7/10


[RN50] Epoch 21 - Training: 100%|██████████| 239/239 [00:41<00:00,  5.75it/s]


[RN50] Epoch 21 - Training Loss: 97.5575
[RN50] Test Accuracy on 'cartoon': 80.84%
[RN50] No improvement. Wait count: 8/10


[RN50] Epoch 22 - Training: 100%|██████████| 239/239 [00:41<00:00,  5.72it/s]


[RN50] Epoch 22 - Training Loss: 95.0696
[RN50] Test Accuracy on 'cartoon': 79.91%
[RN50] No improvement. Wait count: 9/10


[RN50] Epoch 23 - Training: 100%|██████████| 239/239 [00:44<00:00,  5.32it/s]


[RN50] Epoch 23 - Training Loss: 95.7041
[RN50] Test Accuracy on 'cartoon': 79.86%
[RN50] No improvement. Wait count: 10/10
[RN50] Early stopping triggered.
[RN50] Best Accuracy Achieved: 81.06%

= Training with ViT-B-16 =


open_clip_model.safetensors:   0%|          | 0.00/599M [00:00<?, ?B/s]

[ViT-B-16] Epoch 1 - Training: 100%|██████████| 239/239 [01:16<00:00,  3.11it/s]


[ViT-B-16] Epoch 1 - Training Loss: 236.3461
[ViT-B-16] Test Accuracy on 'cartoon': 98.38%
[ViT-B-16] Accuracy improved.


[ViT-B-16] Epoch 2 - Training: 100%|██████████| 239/239 [01:16<00:00,  3.12it/s]


[ViT-B-16] Epoch 2 - Training Loss: 57.7756
[ViT-B-16] Test Accuracy on 'cartoon': 98.63%
[ViT-B-16] Accuracy improved.


[ViT-B-16] Epoch 3 - Training: 100%|██████████| 239/239 [01:16<00:00,  3.11it/s]


[ViT-B-16] Epoch 3 - Training Loss: 37.0170
[ViT-B-16] Test Accuracy on 'cartoon': 98.42%
[ViT-B-16] No improvement. Wait count: 1/10


[ViT-B-16] Epoch 4 - Training: 100%|██████████| 239/239 [01:15<00:00,  3.15it/s]


[ViT-B-16] Epoch 4 - Training Loss: 30.3771
[ViT-B-16] Test Accuracy on 'cartoon': 98.42%
[ViT-B-16] No improvement. Wait count: 2/10


[ViT-B-16] Epoch 5 - Training: 100%|██████████| 239/239 [01:22<00:00,  2.91it/s]


[ViT-B-16] Epoch 5 - Training Loss: 26.8525
[ViT-B-16] Test Accuracy on 'cartoon': 98.34%
[ViT-B-16] No improvement. Wait count: 3/10


[ViT-B-16] Epoch 6 - Training: 100%|██████████| 239/239 [01:16<00:00,  3.12it/s]


[ViT-B-16] Epoch 6 - Training Loss: 24.5477
[ViT-B-16] Test Accuracy on 'cartoon': 98.38%
[ViT-B-16] No improvement. Wait count: 4/10


[ViT-B-16] Epoch 7 - Training: 100%|██████████| 239/239 [01:41<00:00,  2.35it/s]


[ViT-B-16] Epoch 7 - Training Loss: 22.8588
[ViT-B-16] Test Accuracy on 'cartoon': 98.29%
[ViT-B-16] No improvement. Wait count: 5/10


[ViT-B-16] Epoch 8 - Training: 100%|██████████| 239/239 [01:16<00:00,  3.12it/s]


[ViT-B-16] Epoch 8 - Training Loss: 21.5112
[ViT-B-16] Test Accuracy on 'cartoon': 98.29%
[ViT-B-16] No improvement. Wait count: 6/10


[ViT-B-16] Epoch 9 - Training: 100%|██████████| 239/239 [01:16<00:00,  3.12it/s]


[ViT-B-16] Epoch 9 - Training Loss: 20.3005
[ViT-B-16] Test Accuracy on 'cartoon': 98.34%
[ViT-B-16] No improvement. Wait count: 7/10


[ViT-B-16] Epoch 10 - Training: 100%|██████████| 239/239 [01:16<00:00,  3.11it/s]


[ViT-B-16] Epoch 10 - Training Loss: 19.3122
[ViT-B-16] Test Accuracy on 'cartoon': 98.38%
[ViT-B-16] No improvement. Wait count: 8/10


[ViT-B-16] Epoch 11 - Training: 100%|██████████| 239/239 [01:16<00:00,  3.12it/s]


[ViT-B-16] Epoch 11 - Training Loss: 18.3867
[ViT-B-16] Test Accuracy on 'cartoon': 98.38%
[ViT-B-16] No improvement. Wait count: 9/10


[ViT-B-16] Epoch 12 - Training: 100%|██████████| 239/239 [01:17<00:00,  3.09it/s]


[ViT-B-16] Epoch 12 - Training Loss: 17.8875
[ViT-B-16] Test Accuracy on 'cartoon': 98.21%
[ViT-B-16] No improvement. Wait count: 10/10
[ViT-B-16] Early stopping triggered.
[ViT-B-16] Best Accuracy Achieved: 98.63%

= Summary of Test Accuracies =
RN50      : 81.06%
ViT-B-16  : 98.63%
