In [None]:
import json
import torch
import os

from datasets import load_dataset

from ram import inference_ram_openset as inference
from ram import get_transform
from ram.models import ram_plus

### Load HallusionBench benchmark dataset

In [None]:
dataset = load_dataset("lmms-lab/HallusionBench", "default")
dataset = dataset['image']
dataset

In [None]:
def generate_image_tags(dataset):

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('Device:', device)

    img_size = 384
    model_weights = "pretrained/ram_plus_swin_large_14m.pth"
    output_path = os.path.join(os.curdir, "../../results/hallusionBench_tags.json")
    with open(output_path, 'w') as f:
        f.write('')

    transform = get_transform(image_size=img_size)

    model = ram_plus(pretrained=model_weights,
                    image_size=img_size,
                    vit='swin_l'
            )
    
    model.eval()
    model = model.to(device)

    prev_img_source = None
    for idx in range(len(dataset)):
        img = dataset['image'][idx]
        img_source = dataset['filename'][idx]

        # Skip duplicate images
        if img_source == prev_img_source:
            continue
        prev_img_source = img_source

        img = transform(img).unsqueeze(0).to(device)

        tags = inference(img, model)
        
        with open(output_path, 'a') as f:
            json.dump({img_source: tags}, f)
            f.write(',\n')

        print(img_source, ": ", tags)

    print('Tags generated and saved to', output_path)


In [None]:
generate_image_tags(dataset)