# Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!tar -xf "/content/drive/MyDrive/EECS592/dev.tar" -C "/content"
!tar -xf "/content/drive/MyDrive/EECS592/test.tar" -C "/content"
!tar -xf "/content/drive/MyDrive/EECS592/train.tar" -C "/content"
!mkdir images

tar: Ignoring unknown extended header keyword 'LIBARCHIVE.xattr.com.apple.quarantine'
tar: Ignoring unknown extended header keyword 'LIBARCHIVE.xattr.com.apple.quarantine'
mkdir: cannot create directory ‘images’: File exists


# Dataloader

In [None]:
from transformers import CLIPProcessor, CLIPModel

import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

import torch
from torch.utils.data import DataLoader
from torch.optim import Adam
import torch.nn as nn
from torchvision import transforms
from torchvision.utils import save_image
import torch.nn.functional as F

from tqdm import tqdm, trange


In [None]:
device = 'cpu'
if torch.backends.mps.is_available():
    device = 'mps'
if torch.cuda.is_available():
    device = 'cuda'
print(f"Using '{device}' device")

Using 'cuda' device


In [None]:
from transformers import CLIPProcessor, CLIPModel


def get_clip_features(image, preprocess, model, device):
    image = preprocess(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        features = model.get_image_features(**image)
    return features.squeeze(0)


In [None]:

class MIT5KDataset(Dataset):
    def __init__(self, root_dir, transform=None, clip_model=None, preprocess=None, device=None):
        self.root_dir = root_dir
        self.transform = transform
        self.clip_model = clip_model
        self.preprocess = preprocess
        self.device = device
        self.image_files = [f for f in os.listdir(os.path.join(root_dir, "original")) if (f.endswith('.png') and f[0]!='.')]

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        original_img_path = os.path.join(self.root_dir, "original", img_name)
        expert_img_path = os.path.join(self.root_dir, "expert", img_name)

        original_image = Image.open(original_img_path).convert("RGB")
        expert_image = Image.open(expert_img_path).convert("RGB")

        if self.transform:
            original_image = self.transform(original_image)
            expert_image = self.transform(expert_image)
        diff = original_image - expert_image
        mean_diff = diff.view(-1, 3).mean(dim=0).view(1, 3, 1, 1)  # Calculate mean per channel and reshape
        original_features = None

        if self.clip_model and self.preprocess:
            original_features = get_clip_features(original_image, self.preprocess, self.clip_model, self.device)

        return original_image, expert_image, mean_diff, original_features


In [None]:

# transform = transforms.Compose([
#     transforms.Resize((256, 256)),
#     transforms.ToTensor()
# ])

# clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
# preprocess = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# dataset = MIT5KDataset(root_dir='train', transform=transform, clip_model=clip_model, preprocess=preprocess, device=device)
# dataloader = DataLoader(dataset, batch_size=4, shuffle=True)


# GAN Test

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Generator(nn.Module):
    def __init__(self, feature_dim=512):
        super(Generator, self).__init__()
        # Output three values, one for each channel in RGB
        self.fc = nn.Linear(feature_dim, 3)  # Only three outputs for RGB

    def forward(self, features):
        # Generate one RGB value per image
        rgb_values = self.fc(features)  # shape: [batch_size, 3]
        rgb_values = torch.sigmoid(rgb_values)  # Normalize to range [0, 1]

        # Expand RGB values across the spatial dimensions (e.g., 256x256)
        output_size = (features.shape[0], 3, 256, 256)  # Define the desired output size
        img = rgb_values.view(features.shape[0], 3, 1, 1)
        img = img.expand(output_size)  # Expand the single RGB value to fill the image size

        return img


In [None]:
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(6, 64, 4, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, 64, 4, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, 64, 4, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, 1, 4, padding=0),
            nn.AdaptiveAvgPool2d(1),
            nn.Sigmoid()
        )

    def forward(self, img_a, img_b):
        img_input = torch.cat((img_a, img_b), 1)
        validity = self.model(img_input)
        return validity.view(-1, 1)


In [None]:

generator = Generator().to(device)
discriminator = Discriminator().to(device)

adversarial_loss = nn.BCELoss()
loss_fn = nn.MSELoss()

optimizer_G = Adam(generator.parameters(), lr=0.0001)
optimizer_D = Adam(discriminator.parameters(), lr=0.001)

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
preprocess = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

dataset = MIT5KDataset(root_dir='train', transform=transform, clip_model=clip_model, preprocess=preprocess, device=device)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)


In [None]:
!rm -rf images
!mkdir images

In [None]:
from tqdm.notebook import tqdm
import torch
from torchvision.utils import save_image

num_epochs = 1
print_interval = 100

for epoch in tqdm(range(num_epochs), desc='Epochs'):
    for i, (imgs, experts, diff, clip_features) in enumerate(tqdm(dataloader, desc='Batches', leave=False)):
        valid = torch.ones(imgs.size(0), 1, device=device, dtype=torch.float32)
        fake = torch.zeros(imgs.size(0), 1, device=device, dtype=torch.float32)

        real_imgs = imgs.to(device)
        expert_imgs = experts.to(device)
        clip_features = clip_features.to(device)


        optimizer_D.zero_grad()

        # Train Discriminator with real and fake data
        # Pass the separate images directly to the discriminator
        real_loss = adversarial_loss(discriminator(real_imgs, expert_imgs), valid)
        gen_filter = generator(clip_features)
        synthetic_expert = real_imgs + gen_filter
        fake_loss = adversarial_loss(discriminator(expert_imgs, synthetic_expert.detach()), fake)

        d_loss = (real_loss + fake_loss) / 2
        d_loss.backward()
        optimizer_D.step()

        optimizer_G.zero_grad()

        # Train Generator to fool Discriminator
        # Pass the separate images directly to the discriminator
        g_loss = adversarial_loss(discriminator(expert_imgs, synthetic_expert), valid)
        g_loss.backward()
        optimizer_G.step()

        if i % print_interval == 0:
            tqdm.write(f"[Epoch {epoch+1}/{num_epochs}] [Batch {i+1}/{len(dataloader)}] [D loss: {d_loss.item()}] [G loss: {g_loss.item()}]")

Epochs:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/3999 [00:00<?, ?it/s]

[Epoch 1/1] [Batch 1/3999] [D loss: 0.6833770871162415] [G loss: 0.8110776543617249]
[Epoch 1/1] [Batch 101/3999] [D loss: 0.0334535613656044] [G loss: 3.9016170501708984]
[Epoch 1/1] [Batch 201/3999] [D loss: 0.03291986137628555] [G loss: 4.783243179321289]
[Epoch 1/1] [Batch 301/3999] [D loss: 0.005513912998139858] [G loss: 6.292965412139893]
[Epoch 1/1] [Batch 401/3999] [D loss: 0.023850250989198685] [G loss: 4.079901218414307]
[Epoch 1/1] [Batch 501/3999] [D loss: 0.11755391210317612] [G loss: 2.3103814125061035]
[Epoch 1/1] [Batch 601/3999] [D loss: 0.12529295682907104] [G loss: 3.3434560298919678]
[Epoch 1/1] [Batch 701/3999] [D loss: 0.07022759318351746] [G loss: 2.3368096351623535]
[Epoch 1/1] [Batch 801/3999] [D loss: 0.033492717891931534] [G loss: 3.8232979774475098]
[Epoch 1/1] [Batch 901/3999] [D loss: 0.005539760924875736] [G loss: 4.806713581085205]
[Epoch 1/1] [Batch 1001/3999] [D loss: 0.0066316272132098675] [G loss: 4.339112281799316]
[Epoch 1/1] [Batch 1101/3999] [D l

In [None]:
!zip -r images.zip images

  adding: images/ (stored 0%)
  adding: images/0_2500_expert.png (deflated 0%)
  adding: images/0_500_original.png (deflated 0%)
  adding: images/0_3300_synthetic_expert.png (deflated 0%)
  adding: images/0_3600_synthetic_expert.png (deflated 0%)
  adding: images/0_100_expert.png (deflated 0%)
  adding: images/0_1300_original.png (deflated 0%)
  adding: images/0_2700_original.png (deflated 0%)
  adding: images/0_900_expert.png (deflated 0%)
  adding: images/0_800_original.png (deflated 0%)
  adding: images/0_800_synthetic_expert.png (deflated 0%)
  adding: images/0_2700_expert.png (deflated 0%)
  adding: images/0_3900_synthetic_expert.png (deflated 0%)
  adding: images/0_2800_expert.png (deflated 0%)
  adding: images/0_1900_synthetic_expert.png (deflated 0%)
  adding: images/0_600_expert.png (deflated 0%)
  adding: images/0_900_original.png (deflated 0%)
  adding: images/0_2000_original.png (deflated 0%)
  adding: images/0_200_expert.png (deflated 0%)
  adding: images/0_3700_expert.png

In [None]:
# Save the Generator and Discriminator models
torch.save(generator.state_dict(), 'generator.pth')
torch.save(discriminator.state_dict(), 'discriminator.pth')


## dev test

In [None]:
# Load the models for testing
generator.load_state_dict(torch.load('generator.pth'))
discriminator.load_state_dict(torch.load('discriminator.pth'))
generator.eval()  # Set to evaluation mode
discriminator.eval()  # Set to evaluation mode


Discriminator(
  (model): Sequential(
    (0): Conv2d(6, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.2, inplace=True)
    (2): Conv2d(64, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): LeakyReLU(negative_slope=0.2, inplace=True)
    (5): Conv2d(64, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (6): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): LeakyReLU(negative_slope=0.2, inplace=True)
    (8): Conv2d(64, 1, kernel_size=(4, 4), stride=(1, 1))
    (9): AdaptiveAvgPool2d(output_size=1)
    (10): Sigmoid()
  )
)

In [None]:
dev_dataset = MIT5KDataset(root_dir='dev', transform=transform, clip_model=clip_model, preprocess=preprocess, device=device)
dev_dataloader = DataLoader(dev_dataset, batch_size=1, shuffle=False)

# Testing loop
with torch.no_grad():
    for i, (dev_imgs, dev_experts, _, dev_clip_features) in enumerate(dev_dataloader):
        dev_real_imgs = dev_imgs.to(device)
        dev_clip_features = dev_clip_features.to(device)
        dev_gen_filter = generator(dev_clip_features)
        dev_synthetic_expert = dev_real_imgs + dev_gen_filter

        # Save dev images
        save_image(dev_synthetic_expert.data, f"/content/images/dev_{i}_synthetic_expert.png", nrow=1, normalize=True)
        save_image(dev_experts.data, f"/content/images/dev_{i}_expert.png", nrow=1, normalize=True)
        save_image(dev_real_imgs.data, f"/content/images/dev_{i}_original.png", nrow=1, normalize=True)


# CNN Test

In [None]:
# reference: https://www.hackersrealm.net/post/extract-features-from-image-python
# reference: https://github.com/yuukicammy/mit-adobe-fivek-dataset
#from torch.utils.data.dataloader import DataLoader
#from dataset.fivek import MITAboveFiveK
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Reshape, Conv2D, MaxPooling2D, Flatten, Input, Conv2DTranspose
import numpy as np
import torch
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
import tensorflow
import matplotlib.pyplot as plt
import os
import cv2

MODEL_SAVE_PATH = 'model/'
DEV_IMAGE_DIR = 'dev/original/'

def CNN_model():
  model = Sequential([
      Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
      MaxPooling2D((2,2)),
      Conv2D(64, (3, 3), activation='relu'),
      MaxPooling2D((2,2)),
      Conv2D(64, (3, 3), activation='relu'),
      Flatten(),
      Dense(64, activation='relu'),
      Reshape((224, 224))
  ])
  return model

def fully_connected_model():
    model = Sequential([
       Dense(64, activation='relu', input_shape=(224, 224)),
       Dense(32, activation='relu'),
       Dense(4096, activation='linear'),
       Reshape((224,224))
    ])
    return model

def CNN_model_2():
    input_tensor = Input(shape=(224, 224, 3))
    conv2D1 = Conv2D(64, (3, 3), activation='relu', padding='same')(input_tensor)
    conv2D2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2D1)

    tranpose1 = Conv2DTranspose(64, (3, 3), strides=(1, 1), activation='relu', padding='same')(conv2D2)
    output_tensor = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(tranpose1)
    model = Model(inputs=input_tensor, outputs=output_tensor)
    return model

In [None]:
# read from train dataset
path_edited = "train/expert/"
path_original = "train/original/"
#VGG_model = VGG16()
#VGG_model = Model(inputs=VGG_model.inputs, outputs=VGG_model.layers[-2].output)

CLIP_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
CLIP_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
emotion_list = ["amusement", "awe", "contentment", "excitement", "anger", "disgust", "fear", "sadness"]

# {"amusement":[value1, value2], "awe":[value1, value2]}
color_shift_values = {}
original_images_dict = {}
# initialize the color_shift_values
for emotion in emotion_list:
    color_shift_values[emotion] = []
    original_images_dict[emotion] = []
edited_list = os.listdir(path_edited)
original_list = os.listdir(path_original)
num = len(original_list)
num_epochs = 10
for i in tqdm(range(100)):
    original_img = cv2.imread(path_original+original_list[i])
    original_img = cv2.resize(original_img, (224, 224))
    # classification using CLIP
    inputs_clip = CLIP_processor(text=["image evokes amusement", "image evokes awe", "image evokes contentment", "image evokes excitement", "image evokes anger", "image evokes disgust", "image evokes fear", "image evokes sadness"], images=original_img, return_tensors="pt", padding=True)
    outputs_clip = CLIP_model(**inputs_clip)
    logits_per_image = outputs_clip.logits_per_image
    probs = logits_per_image.softmax(dim=1)
    # get the highest prob in probs list
    highest_prob = torch.max(probs)
    highest_prob_index = 0
    #print(probs)
    for i in range(8):
        if (probs[0][i].item()==highest_prob):
            highest_prob_index = i
    image_class = emotion_list[highest_prob_index]
    original_images_dict[image_class].append(original_img)

    edited_img = cv2.imread(path_edited+edited_list[i])
    edited_img = cv2.resize(edited_img, (224, 224))
    #original_img = img_to_array(original_img)
    #edited_img = img_to_array(edited_img)
    #original_color = cv2.cvtColor(original_img, cv2.COLOR_BGR2LAB)
    original_color = edited_img
    #print("original_color")
    #print(original_color)
    #edited_color = cv2.cvtColor(edited_img, cv2.COLOR_BGR2LAB)
    edited_color = edited_img
    #print("edited_color")
    #print(edited_color)
    # get color_shift_value
    color_diff = cv2.subtract(edited_color, original_color)
    color_shift_values[image_class].append(color_diff)

model_dict = {}
for emotion in emotion_list:
    if(len(original_images_dict[emotion])!=0):
        #print("len")
        #print(len(original_images_dict[emotion]))
        X = np.array(original_images_dict[emotion])
        print(X.shape)
        #print(X)
        y = np.array(color_shift_values[emotion])
        print(y.shape)
        #print(y)

        # build model with fully connected layer
        model_dict[emotion] = CNN_model_2()

        # print(model_dict[emotion].summary())
        model_dict[emotion].compile(optimizer='adam', loss='mean_squared_error')
        model_dict[emotion].fit(X, y, epochs=num_epochs, batch_size=32)

for emotion, model in model_dict.items():
    model.save(f'model/{emotion}.h5')  # Save each model in an H5 file

100%|██████████| 100/100 [00:38<00:00,  2.59it/s]

(8, 224, 224, 3)
(8, 224, 224, 3)
Epoch 1/10





Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(33, 224, 224, 3)
(33, 224, 224, 3)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(17, 224, 224, 3)
(17, 224, 224, 3)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(5, 224, 224, 3)
(5, 224, 224, 3)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(4, 224, 224, 3)
(4, 224, 224, 3)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(3, 224, 224, 3)
(3, 224, 224, 3)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(17, 224, 224, 3)
(17, 224, 224, 3)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
(13, 224, 224, 3)
(13, 2

## dev test

In [None]:
from tensorflow.keras.models import load_model as keras_load_model

def load_emotion_models(emotion_list, model_dir='model/'):

    model_dict = {}
    for emotion in emotion_list:
        try:
            model_path = os.path.join(model_dir, f'{emotion}.h5')
            model_dict[emotion] = keras_load_model(model_path)
            print(f"Loaded model for {emotion} from {model_path}.")
        except Exception as e:
            print(f"Error loading model for {emotion}: {e}")
    return model_dict


In [None]:
!rm -rf dev/gen/

In [None]:
DEV_IMAGE_DIR = 'dev/original/'
GEN_IMAGE_DIR = 'dev/gen/'

model_dict = load_emotion_models(emotion_list)

# Ensure the generated images directory exists
if not os.path.exists(GEN_IMAGE_DIR):
    os.makedirs(GEN_IMAGE_DIR)

# Process each image in the dev/original directory
for filename in os.listdir(DEV_IMAGE_DIR):
    file_path = os.path.join(DEV_IMAGE_DIR, filename)
    img = cv2.imread(file_path)
    img_resized = cv2.resize(img, (224, 224))
    print(filename)
    #cv2.imwrite(output_path, img_resized)

    # Classify image using CLIP model
    inputs_clip = CLIP_processor(text=[f"image evokes {emotion}" for emotion in emotion_list], images=img_resized, return_tensors="pt", padding=True)
    outputs_clip = CLIP_model(**inputs_clip)
    logits_per_image = outputs_clip.logits_per_image
    probs = logits_per_image.softmax(dim=1)
    highest_prob_index = probs.argmax().item()
    image_class = emotion_list[highest_prob_index]

    # Use the corresponding model to generate the modified image
    if image_class in model_dict:
        #img_transformed = model_dict[image_class].predict(np.expand_dims(img_resized, axis=0))[0]
        # Convert back to BGR for saving with OpenCV
        #img_transformed = cv2.cvtColor(img_transformed, cv2.COLOR_RGB2BGR)
        input_img = np.expand_dims(img_resized, axis=0)
        predicted_color_adjustment_value = model_dict[image_class].predict(input_img)
        #print(predicted_color_adjustment_value)
        #test_color = cv2.cvtColor(img_resized, cv2.COLOR_BGR2LAB)
        test_color = img_resized
        predicted_color_adjustment_value = np.reshape(predicted_color_adjustment_value, (1, 224, 224, 3))
        print("test_color")
        print(test_color)
        print(test_color.shape)
        print("predicted_color_adjustment_value")
        print(predicted_color_adjustment_value[0])
        print(predicted_color_adjustment_value[0].shape)
        edited_color = test_color+(predicted_color_adjustment_value[0])
        #edited_color = np.clip(edited_color, 0, 255)
        print("edited_color")
        print(edited_color)
        new_edited_img = cv2.cvtColor(test_color, cv2.COLOR_LAB2BGR)
        output_path = os.path.join(GEN_IMAGE_DIR, filename)
        cv2.imwrite(output_path, edited_color)

print("Image processing completed. Modified images are saved in 'dev/gen'.")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  ...
  [0.00000000e+00 0.00000000e+00 0.00000000e+00]
  [0.00000000e+00 0.00000000e+00 0.00000000e+00]
  [2.13669447e-34 0.00000000e+00 1.17174414e-37]]

 [[0.00000000e+00 0.00000000e+00 3.66463872e-21]
  [0.00000000e+00 0.00000000e+00 0.00000000e+00]
  [0.00000000e+00 0.00000000e+00 0.00000000e+00]
  ...
  [0.00000000e+00 0.00000000e+00 0.00000000e+00]
  [0.00000000e+00 0.00000000e+00 0.00000000e+00]
  [1.77301577e-29 0.00000000e+00 1.82003800e-31]]

 [[2.14847719e-22 5.66178189e-21 5.65319180e-10]
  [3.43357991e-32 0.00000000e+00 3.17201437e-20]
  [4.98409354e-36 0.00000000e+00 2.29470270e-25]
  ...
  [3.08678115e-37 0.00000000e+00 3.35481198e-30]
  [5.84520460e-29 0.00000000e+00 4.76061927e-25]
  [1.63722761e-14 3.28976696e-34 2.36028460e-18]]]
(224, 224, 3)
edited_color
[[[52. 64. 61.]
  [55. 66. 65.]
  [49. 68. 62.]
  ...
  [55. 68. 65.]
  [51. 64. 57.]
  [56. 69. 63.]]

 [[51. 61. 58.]
  [46. 62. 55.]
  [48. 60. 57

# Evaluation

In [None]:
!pip install kornia


Collecting kornia
  Downloading kornia-0.7.2-py2.py3-none-any.whl (825 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m825.4/825.4 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting kornia-rs>=0.1.0 (from kornia)
  Downloading kornia_rs-0.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m66.2 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.9.1->kornia)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.9.1->kornia)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.9.1->kornia)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.

In [None]:
from typing import List
import numbers

import kornia as K
from kornia.geometry import resize
import numpy as np
import torch
import math
from kornia.filters import get_gaussian_kernel2d
from kornia.metrics import ssim, psnr
from torch import nn
import torch.nn.functional as F
from scipy.ndimage import gaussian_filter
from scipy.signal import get_window
from skimage.metrics import structural_similarity as scipy_ssim

In [None]:
gen_dir = 'dev/gen/'
expert_dir = 'dev/expert/'

gen_images = sorted(os.listdir(gen_dir))
expert_images = sorted(os.listdir(expert_dir))
total_kornia_ssim, total_scipy_ssim, total_kornia_psnr = 0.0, 0.0, 0.0
new_size = (224, 224)
for gen_img, exp_img in zip(gen_images, expert_images):
    gen_path = os.path.join(gen_dir, gen_img)
    exp_path = os.path.join(expert_dir, exp_img)

    origin = resize(K.io.load_image(exp_path, K.io.ImageLoadType.RGB32)[None, ...], new_size)
    adjusted = resize(K.io.load_image(gen_path, K.io.ImageLoadType.RGB32)[None, ...], new_size)

    img1_np = np.moveaxis(origin.cpu().detach().numpy().squeeze(), 0, 2)
    img2_np = np.moveaxis(adjusted.cpu().detach().numpy().squeeze(), 0, 2)

    # Compute SSIM and PSNR
    k_ssim_value = torch.mean(ssim(origin, adjusted, window_size=11)).item()
    sc_ssim_value = scipy_ssim(im1=img1_np, im2=img2_np, gaussian_weights=True, data_range=1.0, use_sample_covariance=False, channel_axis=2)
    k_psnr_value = psnr(origin, adjusted, max_val=2.).item()

    total_kornia_ssim += k_ssim_value
    total_scipy_ssim += sc_ssim_value
    total_kornia_psnr += k_psnr_value

avg_kornia_ssim = total_kornia_ssim / 500
avg_scipy_ssim = total_scipy_ssim / 500
avg_kornia_psnr = total_kornia_psnr / 500

print(f"kornia ssim: {avg_kornia_ssim}")
print(f"scipy ssim: {avg_scipy_ssim}")


kornia ssim: 0.7686039545238018
scipy ssim: 0.7697984846234321


In [None]:
print(f"kornia psnr: {avg_kornia_psnr}")

kornia psnr: 23.906390384674072


# Test Data Generation

In [None]:
import os
import cv2
import numpy as np
import torch

TEST_IMAGE_DIR = 'test/original/'
GENERATED_IMAGE_DIR = 'test/gen/'

if not os.path.exists(GENERATED_IMAGE_DIR):
    os.makedirs(GENERATED_IMAGE_DIR)

for filename in os.listdir(TEST_IMAGE_DIR):
    file_path = os.path.join(TEST_IMAGE_DIR, filename)
    img = cv2.imread(file_path)
    if img is None:
        continue
    img_resized = cv2.resize(img, (224, 224))

    inputs_clip = CLIP_processor(text=[f"image evokes {emotion}" for emotion in emotion_list], images=img_resized, return_tensors="pt", padding=True)
    outputs_clip = CLIP_model(**inputs_clip)
    logits_per_image = outputs_clip.logits_per_image
    probs = logits_per_image.softmax(dim=1)
    highest_prob_index = probs.argmax().item()
    image_class = emotion_list[highest_prob_index]

    # Predict the transformation using the appropriate model
    if image_class in model_dict:
        img_transformed = model_dict[image_class].predict(np.expand_dims(img_resized, axis=0))[0]
        img_transformed = (img_transformed * 255).astype(np.uint8)  # Scale output to 0-255 range for saving
        output_path = os.path.join(GENERATED_IMAGE_DIR, filename)
        cv2.imwrite(output_path, img_transformed)
        print(f"Processed and saved: {output_path}")

print("All images have been processed and saved in the test/gen directory.")


Processed and saved: test/gen/a0535-jmac_MG_6029.png
Processed and saved: test/gen/a1852-_DSC8964.png
Processed and saved: test/gen/a3065-jmac_DSC0967.png
Processed and saved: test/gen/a3170-IMG_0123.png
Processed and saved: test/gen/a1354-IMG_8018.png
Processed and saved: test/gen/a3688-jmac_MG_1424.png
Processed and saved: test/gen/a1909-KE_-0029-2.png
Processed and saved: test/gen/a1896-kme_558.png
Processed and saved: test/gen/a4562-_MG_7033.png
Processed and saved: test/gen/a4644-Duggan_090214_5136.png
Processed and saved: test/gen/a4349-DSC_0395.png
Processed and saved: test/gen/a3480-dgw_151.png
Processed and saved: test/gen/a3527-IMG_4244.png
Processed and saved: test/gen/a0037-jmacAlgarve_Sagres_07.png
Processed and saved: test/gen/a0464-NKIM_130.png
Processed and saved: test/gen/a0010-jmac_MG_4807.png
Processed and saved: test/gen/a3242-20080623_at_15h18m22__MG_9919.png
Processed and saved: test/gen/a3283-IMG_1370.png
Processed and saved: test/gen/a3156-20080514_101818__MG_98