In [None]:
# test_clip_rl.ipynb

import torch
from torch.utils.data import DataLoader
from transformers import CLIPProcessor, CLIPModel
from datasets import load_dataset

# Load the fine-tuned model and processor
clip_model = CLIPModel.from_pretrained("clip_rl_finetuned")#arange path from mentined model file
clip_processor = CLIPProcessor.from_pretrained("clip_rl_finetuned_processor")##arange path from mentined model file
clip_model.to(device)
clip_model.eval()

# Load the dataset
dataset = load_dataset("JotDe/mscoco_100k")
_, _, test_dataset = random_split(dataset['train'], [int(0.8 * len(dataset['train'])), int(0.1 * len(dataset['train'])), len(dataset['train']) - int(0.8 * len(dataset['train'])) - int(0.1 * len(dataset['train']))])

def preprocess_data(batch):
    images = [item['image'] for item in batch]
    texts = [item['text'] for item in batch]
    inputs = clip_processor(text=texts, images=images, return_tensors="pt", padding=True).to(device)
    return {"input_ids": inputs["input_ids"], "pixel_values": inputs["pixel_values"]}

test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=preprocess_data)

# Evaluate on the test set
correct = 0
total = 0
print("Starting testing...")

with torch.no_grad():
    for batch in test_dataloader:
        outputs = clip_model(**batch)
        logits = outputs.logits_per_image
        predictions = torch.argmax(logits, dim=1)
        labels = torch.arange(len(logits)).to(device)

        correct += (predictions == labels).sum().item()
        total += len(labels)

accuracy = correct / total
print(f"Test Accuracy: {accuracy:.4f}")
