In [None]:
from PIL import Image
import os
import io
import numpy as np
import torch
import torchvision
import das.rewards as rewards
import csv

aesthetic_fn = rewards.aesthetic_score(torch_dtype = torch.float32, device = 'cuda')
hps_fn = rewards.hps_score(inference_dtype = torch.float32, device = 'cuda')
imagereward = rewards.ImageReward(inference_dtype = torch.float32, device = 'cuda')
pick_fn = rewards.PickScore(inference_dtype = torch.float32, device = 'cuda')
clip_fn = rewards.clip_score(inference_dtype = torch.float32, device = 'cuda')

ModuleNotFoundError: No module named 'fairscale'

In [4]:
import os
import torch
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
from tqdm import tqdm
import numpy as np
from scipy.spatial.distance import pdist
import csv
import lpips
from torchvision import transforms

# Load the CLIP model and processor (using openai/clip-vit-large-patch14)
model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

# Load LPIPS model
lpips_model = lpips.LPIPS(net='alex')

# Device configuration
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
lpips_model = lpips_model.to(device)

# Image preprocessing function
def preprocess_image(image_path):
    image = Image.open(image_path).convert("RGB")
    return processor(images=image, return_tensors="pt")['pixel_values'].squeeze(0)

# Function to preprocess image for LPIPS
def preprocess_image_lpips(image_path):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])
    image = Image.open(image_path).convert("RGB")
    return transform(image).unsqueeze(0)

# Function to calculate CLIP-based metrics and LPIPS
def calculate_metrics(image_folder, K=20):
    image_folder = os.path.join(image_folder, "eval_vis")
    embeddings = []
    lpips_images = []
    image_files = [os.path.join(image_folder, file) for file in os.listdir(image_folder) if (file.endswith(('png', 'jpg', 'jpeg')) and not "ess" in file and not "intermediate_rewards" in file)]

    if len(image_files) == 0:
        raise ValueError(f"No images found in the folder: {image_folder}")

    # Preprocess images and compute embeddings
    for image_path in tqdm(image_files):
        try:
            # For CLIP
            pixel_values = preprocess_image(image_path).unsqueeze(0).to(device)
            with torch.no_grad():
                embedding = model.get_image_features(pixel_values).cpu().numpy().squeeze()
            embeddings.append(embedding)

            # For LPIPS
            lpips_image = preprocess_image_lpips(image_path).to(device)
            lpips_images.append(lpips_image)
        except Exception as e:
            print(f"Error processing image {image_path}: {e}")
            continue

    embeddings = np.array(embeddings)

    if len(embeddings) == 0:
        raise ValueError("No embeddings were generated. Please check your images and preprocessing steps.")
    
    # ---- Calculate Mean Pairwise Distance (CLIP-based) ----
    pairwise_distances = pdist(embeddings, metric='cosine')
    mean_distance = np.mean(pairwise_distances)
    num_distances = pairwise_distances.size
    std_error = np.std(pairwise_distances) / np.sqrt(num_distances)
    
    # ---- Calculate Truncated CLIP Entropy (TCE) ----
    covariance_matrix = np.cov(embeddings, rowvar=False)
    eigenvalues = np.linalg.eigvalsh(covariance_matrix)[-K:]
    TCE_K = (K / 2) * np.log(2 * np.pi * np.e) + (1 / 2) * np.sum(np.log(eigenvalues))
    
    # ---- Calculate LPIPS-based diversity ----
    lpips_distances = []
    num_images = len(lpips_images)
    for i in range(num_images):
        for j in range(i+1, num_images):
            with torch.no_grad():
                distance = lpips_model(lpips_images[i], lpips_images[j]).item()
            lpips_distances.append(distance)
    
    mean_lpips = np.mean(lpips_distances)
    std_lpips = np.std(lpips_distances)
    
    return mean_distance, std_error, TCE_K, mean_lpips, std_lpips

Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /home/jovyan/conda/dfs/lib/python3.10/site-packages/lpips/weights/v0.1/alex.pth


In [17]:
img_folder = "logs/SMC/aesthetic/2024.09.26_01.12.19"

In [18]:
aesthetic_score = []
hps_score = []
imagereward_score = []
pick_score = []
clip_score = []
image_names = [file for file in os.listdir(img_folder + "/eval_vis") if (file.endswith(('png', 'jpg', 'jpeg')) and not "ess" in file and not "intermediate_rewards" in file)]
for image_name in image_names:

    image_path = os.path.join(img_folder + "/eval_vis", image_name)

    image = Image.open(image_path).convert("RGB")
    image = torchvision.transforms.ToTensor()(image).unsqueeze(0).to('cuda')

    prompt = image_name.split("|")[0].split("_")[-1][:-1]
    # print(prompt)

    with torch.no_grad():
        clip_score.append(clip_fn(image, prompt).item())
        aesthetic_score.append(aesthetic_fn(image, prompt).item())
        hps_score.append(hps_fn(image, prompt).item())
        imagereward_score.append(imagereward(image, prompt).item())
        pick_score.append(pick_fn(image, prompt).item())

print(f"Finished evaluating images in {img_folder}")
print("Aesthetic score: ", np.mean(aesthetic_score))
print("Aesthetic score std: ", np.std(aesthetic_score))
print("HPS score: ", np.mean(hps_score))
print("HPS score std: ", np.std(hps_score))
print("Image reward score: ", np.mean(imagereward_score))
print("Image reward score std: ", np.std(imagereward_score))
print("Pick score: ", np.mean(pick_score))
print("Pick score std: ", np.std(pick_score))
print("Clip score: ", np.mean(clip_score))
print("Clip score std: ", np.std(clip_score))
    
# Save the results to a text file
names = ["Aesthetic score", "Aesthetic score std", "HPS score", "HPS score std",
         "Image reward score", "Image reward score std", "Pick score", "Pick score std", "CLIP score", "CLIP score std"]

values = [np.mean(aesthetic_score), np.std(aesthetic_score),
          np.mean(hps_score), np.std(hps_score),
          np.mean(imagereward_score), np.std(imagereward_score),
          np.mean(pick_score), np.std(pick_score),
          np.mean(clip_score), np.std(clip_score)]

# Format the values to 5 decimal places
formatted_values = [f"{v:.5f}" for v in values]

with open(os.path.join(img_folder, "eval_results.csv"), "w", newline='') as f:
    writer = csv.writer(f)
    writer.writerow(names)
    writer.writerow(formatted_values)

Finished evaluating images in logs/SMC/aesthetic/2024.09.26_01.12.19
Aesthetic score:  7.056802809238434
Aesthetic score std:  0.3690912412318431
HPS score:  0.27003603242337704
HPS score std:  0.008796761060747487
Image reward score:  1.1079169440781698
Image reward score std:  0.5541496030510243
Pick score:  0.21461675222963095
Pick score std:  0.00884714490436561
Clip score:  0.25687733199447393
Clip score std:  0.019748996170760567


In [19]:
# Calculate metrics
try:
    mean_distance, std_error, TCE, mean_lpips, std_lpips = calculate_metrics(img_folder, K=20)
    print(f"Finished evaluating images in {img_folder}")
    print(f"Mean Pairwise Distance (CLIP-based Diversity Metric): {mean_distance}")
    print(f"Standard Error of the Distance: {std_error}")
    print(f"Truncated CLIP Entropy (TCE): {TCE}")
    print(f"Mean LPIPS Distance: {mean_lpips}")
    print(f"Standard Deviation of LPIPS Distance: {std_lpips}")

    # Save the results to a CSV file
    names = ["Mean Pairwise Distance (CLIP)", "Standard Error of the Distance (CLIP)", 
             "Truncated CLIP Entropy (TCE)", "Mean LPIPS Distance", "Std Dev LPIPS Distance"]
    values = [mean_distance, std_error, TCE, mean_lpips, std_lpips]

    # Format the values to 5 decimal places
    formatted_values = [f"{v:.5f}" for v in values]

    with open(os.path.join(img_folder, "eval_diversity_results.csv"), "w", newline='') as f:
        writer = csv.writer(f)
        writer.writerow(names)
        writer.writerow(formatted_values)

except Exception as e:
    print(f"An error occurred: {e}")

100%|██████████| 16/16 [00:02<00:00,  5.85it/s]


Finished evaluating images in logs/SMC/aesthetic/2024.09.26_01.12.19
Mean Pairwise Distance (CLIP-based Diversity Metric): 0.33313817014833663
Standard Error of the Distance: 0.007745194941195248
Truncated CLIP Entropy (TCE): -42.314139664497574
Mean LPIPS Distance: 0.647677265604337
Standard Deviation of LPIPS Distance: 0.04706209534226535
