In [1]:
import pandas as pd
import os

# Load the Artemis test data
artemis_df = pd.read_csv("../data/artemis-v2/dataset/combined/train/artemis_per_image_weighted.csv")
test_df = artemis_df[artemis_df['split'] == 'test']

In [2]:
from EmotionCLIP import model, preprocess, tokenizer
from PIL import Image
import torch
import matplotlib.pyplot as plt
from torch.nn import functional as F

# Image folder path
image_files = test_df['image_path'].tolist()

# Emotion label mapping
consist_json = {
    'amusement': 0,
    'anger': 1,
    'awe': 2,
    'contentment': 3,
    'disgust': 4,
    'excitement': 5,
    'fear': 6,
    'sadness': 7,
    'something else': 8
}
reversal_json = {v: k for k, v in consist_json.items()}
text_list = [f"This picture conveys a sense of {key}" for key in consist_json.keys()]
text_input = tokenizer(text_list)

# Create subplots
num_images = len(image_files)
rows = 3  # 3 rows
cols = 3  # 3 columns
fig, axes = plt.subplots(rows, cols, figsize=(15, 10))  # Adjust the canvas size
axes = axes.flatten()  # Flatten the subplots to a 1D array
title_fontsize = 20

# Iterate through each image
for idx, img_path in enumerate(image_files):
    # Load image
    img = Image.open(img_path)
    img_input = preprocess(img)

    # Predict emotion
    with torch.no_grad():
        logits_per_image, _ = model(img_input.unsqueeze(0).to(device=model.device, dtype=model.dtype), text_input.to(device=model.device))
    softmax_logits_per_image = F.softmax(logits_per_image, dim=-1)
    top_k_values, top_k_indexes = torch.topk(softmax_logits_per_image, k=1, dim=-1)
    predicted_emotion = reversal_json[top_k_indexes.item()]

    # Display image and prediction result
    ax = axes[idx]
    ax.imshow(img)
    ax.set_title(f"Predicted: {predicted_emotion}", fontsize=title_fontsize)
    ax.axis('off')

# Hide any extra subplots
for idx in range(num_images, rows * cols):
    axes[idx].axis('off')

plt.tight_layout()
plt.show()


AssertionError: Torch not compiled with CUDA enabled