## Test Split Images

In [1]:
import numpy as np
import pandas as pd

import os
import shutil

In [2]:
parent_directory = 'E:/Pitt/Spring 2024/CS 2002/wikiart/wikiart'
new_folder_path = 'E:/Pitt/Spring 2024/CS 2002/wikiart/CS 2756/v3_Test' 
os.makedirs(new_folder_path, exist_ok=True)

In [3]:
test_df =  pd.read_csv('E:/Pitt/Spring 2024/CS 2002/final-splits.csv')

In [4]:
sampled_df = test_df[test_df['split'] == 'TEST']

In [6]:
 
folder = []
painting = []
def copy_sampled_images(sampled_df, parent_directory, new_folder_path):
    file_extensions = ['.jpg', '.jpeg', '.png']  

    for _, row in sampled_df.iterrows():
        painting_name = row['painting']
        folder_name = row['art_style']
        folder.append(folder_name)
        painting.append(painting_name)
          
        # Path to the sub-folder
        sub_folder_path = os.path.join(parent_directory, folder_name)
        
        found = False
        for ext in file_extensions:
            painting_path = os.path.join(sub_folder_path, painting_name + ext)
            if os.path.exists(painting_path):
                shutil.copy(painting_path, new_folder_path)
                #print(f"Copied: {painting_path}")
                found = True
                break
        
        if not found:
            print(f"Painting not found: {painting_name} in {sub_folder_path}")

copy_sampled_images(sampled_df, parent_directory, new_folder_path)

In [7]:
len(sampled_df)

3800

In [8]:
len(sampled_df['painting'].unique())

3800

In [1]:
import torch 
import gc

torch.cuda.empty_cache()
gc.collect()
torch.cuda.is_available()

True

In [2]:
import numpy as np
from pkg_resources import packaging

print("Torch version:", torch.__version__)

Torch version: 2.2.1


In [3]:
import clip

clip.available_models()

['RN50',
 'RN101',
 'RN50x4',
 'RN50x16',
 'RN50x64',
 'ViT-B/32',
 'ViT-B/16',
 'ViT-L/14',
 'ViT-L/14@336px']

In [4]:
model, preprocess = clip.load("ViT-B/32")
model.cuda().eval()
# model.cpu().eval()
input_resolution = model.visual.input_resolution
context_length = model.context_length
vocab_size = model.vocab_size

print("Model parameters:", f"{np.sum([int(np.prod(p.shape)) for p in model.parameters()]):,}")
print("Input resolution:", input_resolution)
print("Context length:", context_length)
print("Vocab size:", vocab_size)

Model parameters: 151,277,313
Input resolution: 224
Context length: 77
Vocab size: 49408


In [5]:
preprocess

Compose(
    Resize(size=224, interpolation=bicubic, max_size=None, antialias=True)
    CenterCrop(size=(224, 224))
    <function _convert_image_to_rgb at 0x0000024BD5DFE430>
    ToTensor()
    Normalize(mean=(0.48145466, 0.4578275, 0.40821073), std=(0.26862954, 0.26130258, 0.27577711))
)

## Setting up input images and texts

In [6]:
import os
import pandas as pd
from PIL import Image
import numpy as np
from torchvision import transforms
import pickle

%matplotlib inline
%config InlineBackend.figure_format = 'retina'


images_dir = 'E:/Pitt/Spring 2024/CS 2002/wikiart/CS 2756/v3_Test' 
#descriptions_csv = pd.read_csv('E:/Pitt/Spring 2024/CS 2002/final-splits.csv')
descriptions_df = pd.read_csv('E:/Pitt/Spring 2024/CS 2002/final-splits.csv')

In [7]:
ARTEMIS_EMOTIONS = ['excitement-amusement',
 'awe',
 'contentment',
 'disgust-anger',
 'fear',
 'sadness',
 'something else']

emotions = ["A picture making me feel " + emotion for emotion in ARTEMIS_EMOTIONS]

In [8]:

total_images = sum(1 for file in os.listdir(images_dir) if file.endswith(('.png', '.jpg', '.jpeg')))
print(total_images)

3800


In [9]:
import torch
import numpy as np
from PIL import Image
import os

image_filenames = [filename for filename in os.listdir(images_dir) if filename.endswith(".png") or filename.endswith(".jpg")]
image_filenames_ext = [[os.path.splitext(filename)[0] for filename in os.listdir(images_dir) if filename.endswith(".png") or filename.endswith(".jpg")]]

def process_in_batches(image_filenames, images_dir, text_tokens, model, preprocess, batch_size=32):
    all_top_probs = []
    all_top_labels = []

    # total number of batches
    total_batches = len(image_filenames) // batch_size + (1 if len(image_filenames) % batch_size > 0 else 0)

    for batch_idx in range(total_batches):
        # start and end indices for the current batch
        start_idx = batch_idx * batch_size
        end_idx = start_idx + batch_size
        batch_filenames = image_filenames[start_idx:end_idx]

        # preprocess images in the current batch
        processed_images = [preprocess(Image.open(os.path.join(images_dir, filename)).convert("RGB")) for filename in batch_filenames]
        image_input = torch.tensor(np.stack(processed_images)).cuda()

        with torch.no_grad():  # Encoding
            image_features = model.encode_image(image_input).float()
            # Normalize features
            image_features /= image_features.norm(dim=-1, keepdim=True)

            # Calculate probabilities for the current batch
            text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)
            top_probs, top_labels = text_probs.cpu().topk(5, dim=-1)

        # Accumulate results from the current batch
        all_top_probs.append(top_probs)
        all_top_labels.append(top_labels)

    # Concatenate results from all batches
    all_top_probs = torch.cat(all_top_probs, dim=0)
    all_top_labels = torch.cat(all_top_labels, dim=0)

    return all_top_probs, all_top_labels



In [10]:


text_tokens = clip.tokenize(emotions).cuda()

with torch.no_grad():
    text_features = model.encode_text(text_tokens).float()
    text_features /= text_features.norm(dim=-1, keepdim=True)


  attn_output = scaled_dot_product_attention(q, k, v, attn_mask, dropout_p, is_causal)


In [None]:
batch_size = 32 
all_top_probs, all_top_labels = process_in_batches(
    image_filenames, images_dir, text_tokens, model, preprocess, batch_size=batch_size
)



In [None]:
print(all_top_probs[10])
print(all_top_labels[10])

In [None]:
image_filenames_ext = [os.path.splitext(filename)[0] for filename in os.listdir(images_dir) if filename.endswith(".png") or filename.endswith(".jpg")]

In [None]:
gt_label = np.zeros(len(image_filenames_ext),dtype=int)

In [None]:
for i, painting in enumerate(image_filenames_ext):
    
    painting_df = descriptions_df[descriptions_df['painting'] == painting]
    if i == 0:
      print(painting)
      print(painting_df)
    for _, row in painting_df.iterrows():
        emotion_index = ARTEMIS_EMOTIONS.index(row['emotion'])
        gt_label[i] = emotion_index

In [None]:
ARTEMIS_EMOTIONS[gt_label[0]]

In [None]:
top_probs_cpu = all_top_probs.cpu().numpy()
print(top_probs_cpu[0])
print(np.argmax(top_probs_cpu[0]))  

In [None]:
import numpy as np
correct_predictions = 0


for i in range(len(gt_label)):
    # If the ground truth label is among the top 5 predictions
    if gt_label[i] in all_top_labels[i]:
        correct_predictions += 1


accuracy = correct_predictions / len(gt_label)


print(f'Accuracy: {accuracy:.2f}')


In [None]:

correct_predictions = 0

for i in range(len(gt_label)):
    # If the ground truth label is equal to the top prediction
    if gt_label[i] == all_top_labels[i][0]:
        correct_predictions += 1

accuracy = correct_predictions / len(gt_label)

print(f'Accuracy: {accuracy:.2f}')


In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix

max_pred = all_top_labels[:, 0]

cm = confusion_matrix(gt_label, max_pred, labels=np.arange(len(ARTEMIS_EMOTIONS)))

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=ARTEMIS_EMOTIONS, yticklabels=ARTEMIS_EMOTIONS)
plt.title('Confusion Matrix')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()


In [None]:
import plotly.figure_factory as ff

cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
cm_normalized = np.nan_to_num(cm_normalized)  # Replace NaNs with 0

fig = ff.create_annotated_heatmap(z=cm_normalized, x=ARTEMIS_EMOTIONS, y=ARTEMIS_EMOTIONS, colorscale='Blues', annotation_text=np.around(cm_normalized, decimals=2))
fig.update_layout(title=f'Confusion Matrix - Normalized',
                  xaxis=dict(title='Predicted'),
                  yaxis=dict(title='Actual'))

fig.update_layout(margin=dict(t=50, l=200))

# Show the plot
fig.show()