In [None]:
import pandas as pd

# Load the uploaded CSV file
df = pd.read_csv('your directory/dataset.csv')

# Add columns
columns_to_add = ["sd_image", "sd_ITA", "sd_IEA", "PickScore", "Asthetic"]
for column in columns_to_add:
    df[column] = ""

df.head()

In [None]:
!pip install diffusers
!pip install torchmetrics
!pip install accelerate
!pip install git+https://github.com/openai/CLIP.git
!pip install open-clip-torch

In [None]:
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
import torch
from torchmetrics.functional.multimodal import clip_score
from functools import partial
from PIL import Image
import io
import base64
from huggingface_hub import notebook_login
import IPython.display as display
import cv2
import numpy as np
from io import BytesIO
from transformers import AutoProcessor, AutoModel
import clip
import os
import torch.nn as nn
from os.path import expanduser
from urllib.request import urlretrieve

In [None]:
# Required to get access to stable diffusion model
notebook_login()

In [None]:
# Import and Initialize Stable Diffusion model
model_id = "stabilityai/stable-diffusion-2-1"
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to("cuda")
device = "cuda" if torch.cuda.is_available() else "cpu"

# Function to convert NumPy array to Base64 encoded string
def numpy_to_base64(image_np):
    image_pil = Image.fromarray(image_np.astype("uint8"), "RGB")
    buffered = BytesIO()
    image_pil.save(buffered, format="JPEG")
    img_str = base64.b64encode(buffered.getvalue()).decode()
    return img_str

# Function to generate image from text prompt
def generate_image(prompt):
    with torch.no_grad():
        generated = pipe(prompt).images[0]
    return np.array(generated)

# Calculate CLIP score
clip_score_fn = partial(clip_score, model_name_or_path="openai/clip-vit-base-patch16")
def calculate_clip_score(image, prompt):
    images_int = (image * 255).astype("uint8")
    clip_score = clip_score_fn(torch.from_numpy(images_int).unsqueeze(0).permute(0, 3, 1, 2), [prompt]).detach()
    return round(float(clip_score), 4)

# Import and Initialize PickScore model
processor = AutoProcessor.from_pretrained("laion/CLIP-ViT-H-14-laion2B-s32B-b79K")
model = AutoModel.from_pretrained("yuvalkirstain/PickScore_v1").eval().to(device)

# Calculate PickScore
def calc_prob(prompt, image):
    # preprocess
    image_input = processor(images=image, padding=True, truncation=True, max_length=77, return_tensors="pt").to(device)
    text_input = processor(text=prompt, padding=True, truncation=True, max_length=77, return_tensors="pt").to(device)
    with torch.no_grad():
        # embed
        image_emb = model.get_image_features(**image_input)
        image_emb = image_emb / torch.norm(image_emb, dim=-1, keepdim=True)
        text_emb = model.get_text_features(**text_input)
        text_emb = text_emb / torch.norm(text_emb, dim=-1, keepdim=True)
        # score
        score = model.logit_scale.exp() * (text_emb @ image_emb.T)[0]
    return score.item()

# Asthetic model
def get_aesthetic_model(clip_model="vit_l_14"):
    home = expanduser("~")
    cache_folder = home + "/.cache/emb_reader"
    path_to_model = cache_folder + "/sa_0_4_"+clip_model+"_linear.pth"
    if not os.path.exists(path_to_model):
        os.makedirs(cache_folder, exist_ok=True)
        url_model = (
            "https://github.com/LAION-AI/aesthetic-predictor/blob/main/sa_0_4_"+clip_model+"_linear.pth?raw=true"
        )
        urlretrieve(url_model, path_to_model)
    if clip_model == "vit_l_14":
        m = nn.Linear(768, 1)
    elif clip_model == "vit_b_32":
        m = nn.Linear(512, 1)
    else:
        raise ValueError()
    s = torch.load(path_to_model)
    m.load_state_dict(s)
    m.eval()
    return m

# Asthetic Score model initialization
amodel = get_aesthetic_model(clip_model="vit_l_14")
amodel.eval()

# Calculate Asthetic Score
def calculate_aesthetic_score(pil_image):
    image = preprocess(pil_image).unsqueeze(0)
    with torch.no_grad():
        image_features = c_model.encode_image(image)
        image_features /= image_features.norm(dim=-1, keepdim=True)
        prediction = amodel(image_features)
    return prediction.item()

import open_clip
c_model, _, preprocess = open_clip.create_model_and_transforms('ViT-L-14', pretrained='openai')

In [None]:
# Iterate over DataFrame and process data
for index, row in df.iterrows():
    print(index)
    emotion = row['emotion']
    utterance = row['utterance']

    # Image generation
    image = generate_image(utterance)
    generated_image_pil = Image.fromarray(image.astype("uint8"), "RGB")
    df.at[index, 'sd_image'] = numpy_to_base64(image)

    # CLIP score calculation
    sd_ITA = calculate_clip_score(image, utterance)
    sd_IEA = calculate_clip_score(image, emotion)
    df.at[index, 'sd_ITA'] = sd_ITA
    df.at[index, 'sd_IEA'] = sd_IEA

    # PickScore calculation
    pick_score = calc_prob(utterance, generated_image_pil)
    df.at[index, 'PickScore'] = pick_score

    # Asthetic Scorecalculation
    aesthetic_score = calculate_aesthetic_score(generated_image_pil)
    df.at[index, 'Asthetic'] = aesthetic_score

    df.to_csv('your directory/dataset.csv', index=False)

In [None]:
# mean
sd_ITA_average = df['sd_ITA'].mean()
sd_IEA_average = df['sd_IEA'].mean()
PickScore_average = df['PickScore'].mean()
Asthetic_average = df['Asthetic'].mean()

# median
sd_ITA_median = df['sd_ITA'].median()
sd_IEA_median = df['sd_IEA'].median()
PickScore_median = df['PickScore'].median()
Asthetic_median = df['Asthetic'].median()

# std
sd_ITA_std = df['sd_ITA'].std()
sd_IEA_std = df['sd_IEA'].std()
PickScore_std = df['PickScore'].std()
Asthetic_std = df['Asthetic'].std()

# print result
print("sd_ITA 평균:", sd_ITA_average)
print("sd_IEA 평균:", sd_IEA_average)
print("PickScore 평균:", PickScore_average)
print("Asthetic 평균:", Asthetic_average)

print("sd_ITA 중앙값:", sd_ITA_median)
print("sd_IEA 중앙값:", sd_IEA_median)
print("PickScore 중앙값:", PickScore_median)
print("Asthetic 중앙값:", Asthetic_median)

print("sd_ITA 표준편차:", sd_ITA_std)
print("sd_IEA 표준편차:", sd_IEA_std)
print("PickScore 중앙값:", PickScore_std)
print("Asthetic 중앙값:", Asthetic_std)