In [1]:
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_VISIBLE_DEVICES=0


In [2]:
import os
from PIL import Image
import numpy as np
import torch
from torchvision import models, transforms
from torch.nn.functional import mse_loss
from scipy.linalg import sqrtm
import pandas as pd
from tqdm import tqdm

## Show Data

In [3]:
def highlight_best(data, dark_mode=True):
    if dark_mode:
        best_color = '#ec7063'
        second_best_color = '#2e86c1'
        best_attr = f'background-color: {best_color}; font-weight: bold; color: black'
        second_best_attr = f'background-color: {second_best_color}; text-decoration: underline; color: black'
    else:
        best_color = '#f5b7b1'
        second_best_color = '#a9dfbf'
        best_attr = f'font-weight: bold'
        second_best_attr = f'color: black; text-decoration: underline'
        best_attr = f'background-color: {best_color}; font-weight: bold'
    
    def extract_value(val):
        # if string
        if isinstance(val, str):
            return float(val.split('±')[0].strip())
        else:
            return val
    
    if data.ndim == 1:  # Single row or column (Series)
        # Highlight based on whether the metric prefers min or max
        col_name = data.name.lower()
        if 'loss' in col_name or 'fid' in col_name:
            sorted_data = data.apply(extract_value).sort_values()
        else:
            sorted_data = data.apply(extract_value).sort_values(ascending=False)
        best = sorted_data.iloc[0]
        second_best = sorted_data.iloc[1]
        return [best_attr if v == best else second_best_attr if v == second_best else '' for v in data.apply(extract_value)]
    else:  # DataFrame case
        styled_df = pd.DataFrame('', index=data.index, columns=data.columns)
        for col in data.columns:
            col_name = col.lower()
            if 'loss' in col_name or 'fid' in col_name:
                sorted_data = data[col].apply(extract_value).sort_values()
            else:
                sorted_data = data[col].apply(extract_value).sort_values(ascending=False)
            best = sorted_data.iloc[0]
            second_best = sorted_data.iloc[1]
            styled_df.loc[data[col].apply(extract_value) == best, col] = best_attr
            styled_df.loc[data[col].apply(extract_value) == second_best, col] = second_best_attr
        return styled_df

In [4]:
csv_file = "evaluation/evaluation.csv"

df = pd.read_csv(csv_file)
df = df.reindex(['Method'] + sorted(df.columns[1:]), axis=1)
metrics = df.columns[1:]
df.style.apply(highlight_best, subset=metrics, axis=0)

Unnamed: 0,Method,Content Loss↓,Content Similarity↑,FID↓,Style Loss↓,Style Similarity↑
0,original,17.580 ±2.715,0.269 ±0.032,10.421 ±3.987,0.536 ±0.441,0.508 ±0.042
1,lab,16.619 ±2.240,0.285 ±0.034,17.044 ±6.521,0.619 ±0.535,0.482 ±0.032
2,luv,16.927 ±2.246,0.286 ±0.031,16.134 ±4.729,0.605 ±0.498,0.481 ±0.032
3,pca,16.512 ±2.520,0.290 ±0.027,15.843 ±4.870,0.515 ±0.513,0.484 ±0.031


## Prepare Evaluation Data

In [5]:
def prepare_eval_images(group_id, methods):
    content_dir = f'data/Content/content{group_id}'
    style_dir = f'data/Style/style{group_id}'
    stylized_dir = f'output/styleshot/style{group_id}_content{group_id}'

    content_out = f'evaluation/Content/content{group_id}'
    style_out = f'evaluation/Style/style{group_id}'

    os.makedirs(content_out, exist_ok=True)
    os.makedirs(style_out, exist_ok=True)

    content_files = sorted(os.listdir(content_dir))
    style_files = sorted(os.listdir(style_dir))
    for method in methods:
        stylized_out = f'evaluation/Stylized/style{group_id}_content{group_id}/{method}'
        os.makedirs(stylized_out, exist_ok=True)

        for style_file in tqdm(style_files, desc=f'Processing styles for method {method}'):
            for content_file in tqdm(content_files, desc=f'Processing contents for style {style_file}', leave=False):
                content = Image.open(os.path.join(content_dir, content_file)).resize((256, 256))
                style = Image.open(os.path.join(style_dir, style_file)).resize((256, 256))

                content_number = int(content_file.split('.')[0])
                style_number = int(style_file.split('.')[0])
                stylized_path = f"{stylized_dir}/{style_number:02d}_{content_number:02d}_{method}_styleshot.png"
                if not os.path.exists(stylized_path):
                    continue
                
                stylized = Image.open(f"{stylized_dir}/{style_number:02d}_{content_number:02d}_{method}_styleshot.png").resize((256, 256))

                content.save(os.path.join(content_out, f'{style_number:02d}_{content_number:02d}.png'))
                style.save(os.path.join(style_out, f'{style_number:02d}_{content_number:02d}.png'))
                stylized.save(os.path.join(stylized_out, f'{style_number:02d}_{content_number:02d}.png'))

## Evaluation

In [6]:
class VGG19Extractor(torch.nn.Module):
    def __init__(self):
        super(VGG19Extractor, self).__init__()
        self.model = models.vgg19(weights='VGG19_Weights.DEFAULT').features.eval()
        self.layers = {
            '0': 'conv1_1',  # Style layer
            '5': 'conv2_1',  # Style layer
            '10': 'conv3_1', # Style layer
            '19': 'conv4_1', # Style layer
            '21': 'conv4_2', # Content layer
            '28': 'conv5_1'  # Style layer
        }
        # self.model = torch.nn.Sequential(*list(vgg)[:29])  # 裁剪模型

    def forward(self, x):
        features = {}
        for name, layer in self.model._modules.items():
            x = layer(x)
            if name in self.layers:
                features[self.layers[name]] = x
        return features

class VGG16Extractor(torch.nn.Module):
    def __init__(self):
        super(VGG16Extractor, self).__init__()
        self.model = models.vgg16(weights='VGG16_Weights.DEFAULT').features.eval()
        self.layers = {
            '0': 'conv1_1',  # Style layer
            '5': 'conv2_1',  # Style layer
            '10': 'conv3_1', # Style layer
            '17': 'conv4_1', # Style layer
            '19': 'conv4_2', # Content layer
            '24': 'conv5_1'  # Style layer
        }
        # self.model = torch.nn.Sequential(*list(vgg)[:29])  # 裁剪模型

    def forward(self, x):
        features = {}
        for name, layer in self.model._modules.items():
            x = layer(x)
            if name in self.layers:
                features[self.layers[name]] = x
        return features

def gram_matrix(tensor):
    b, c, h, w = tensor.size()
    features = tensor.view(b, c, h * w)
    gram = torch.bmm(features, features.transpose(1, 2))
    return gram

def calculate_fid(mean1, cov1, mean2, cov2):
    diff = np.sum((mean1 - mean2) ** 2)
    cov_mean = sqrtm(np.dot(cov1, cov2))
    if np.iscomplexobj(cov_mean):
        cov_mean = cov_mean.real  # 去掉複數部分
    fid = diff + np.trace(cov1 + cov2 - 2 * cov_mean)
    return fid

def preprocess_images(image_paths, gray=False):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    images = []
    for path in image_paths:
        image = Image.open(path).convert('RGB')
        if gray:
            image = image.convert('L').convert('RGB')
        images.append(transform(image).unsqueeze(0))
    return torch.cat(images, dim=0).cuda()

def compute_metrics(content_images, style_images_gray, stylized_images, stylized_images_gray, model, batch_size=1):
    num_samples = content_images.size(0)
    content_loss = 0.0
    content_similarity = 0.0
    style_loss_gram = 0.0
    style_loss_similarity = 0.0
    style_features_list = []
    stylized_features_list = []

    for i in range(0, num_samples, batch_size):
        content_batch = content_images[i:i+batch_size]
        style_batch_gray = style_images_gray[i:i+batch_size]
        stylized_batch = stylized_images[i:i+batch_size]
        stylized_batch_gray = stylized_images_gray[i:i+batch_size]

        # 提取特徵
        content_features = model(content_batch)
        gray_style_features = model(style_batch_gray)
        stylized_features = model(stylized_batch)
        gray_stylized_features = model(stylized_batch_gray)

        # 累加內容損失
        content_loss += mse_loss(stylized_features['conv4_2'], content_features['conv4_2']).item()
        # content_loss += torch.norm(stylized_features['conv4_2'] - content_features['conv4_2'], p=2).item()
        content_similarity += torch.nn.functional.cosine_similarity(stylized_features['conv4_2'], content_features['conv4_2'], dim=1).mean().item()
        # style_loss_similarity += torch.nn.functional.cosine_similarity(gray_stylized_features['conv2_1'], gray_style_features['conv2_1'], dim=1).mean().item()
        
        # 累加風格損失
        style_weights = {'conv1_1': 1.,
                 'conv2_1': 0.75,
                 'conv3_1': 0.2,
                 'conv4_1': 0.2,
                 'conv5_1': 0.2}
        for layer in ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']:
            gram_style = gram_matrix(gray_style_features[layer])
            gram_stylized = gram_matrix(gray_stylized_features[layer])
            b, c, h, w = gray_style_features[layer].size()
            style_loss_gram += style_weights[layer] * torch.sum((gram_style - gram_stylized) ** 2) / (4 * (c ** 2) * (h * w) ** 2)
            style_loss_similarity += style_weights[layer] * torch.nn.functional.cosine_similarity(gray_stylized_features[layer], gray_style_features[layer], dim=1).mean().item()

        # 保存特徵供後續計算 FID
        style_features_list.append(torch.flatten(gray_style_features['conv2_1'], start_dim=2).permute(0, 2, 1))
        stylized_features_list.append(torch.flatten(gray_stylized_features['conv2_1'], start_dim=2).permute(0, 2, 1))

    # 拼接所有批次的特徵
    style_flattened = torch.cat(style_features_list, dim=0).reshape(-1, style_features_list[0].shape[-1]).detach().cpu().numpy()
    stylized_flattened = torch.cat(stylized_features_list, dim=0).reshape(-1, stylized_features_list[0].shape[-1]).detach().cpu().numpy()
    
    # 計算 FID
    mean_style = np.mean(style_flattened, axis=0)
    mean_stylized = np.mean(stylized_flattened, axis=0)
    cov_style = np.cov(style_flattened, rowvar=False)
    cov_stylized = np.cov(stylized_flattened, rowvar=False)
    fid = calculate_fid(mean_style, cov_style, mean_stylized, cov_stylized)

    # 返回平均損失與 FID
    content_loss /= (num_samples / batch_size)
    content_similarity /= (num_samples / batch_size)
    style_loss_gram = style_loss_gram.item() / (num_samples / batch_size) / sum(style_weights.values())
    style_loss_similarity = style_loss_similarity / (num_samples / batch_size) / (sum(style_weights.values()))
    return content_loss, style_loss_gram, content_similarity, style_loss_similarity, fid


In [7]:
def evaluate_methods(group_id, methods, vgg_extractor):
    content_dir = f"evaluation/Content/content{group_id}"
    style_dir = f"evaluation/Style/style{group_id}"
    results = []
    for method in methods:
        stylized_dir = f"evaluation/Stylized/style{group_id}_content{group_id}/{method}"
        
        content_paths = [os.path.join(content_dir, file) for file in os.listdir(content_dir)]
        style_paths = [os.path.join(style_dir, file) for file in os.listdir(style_dir)]
        stylized_paths = [os.path.join(stylized_dir, file) for file in os.listdir(stylized_dir)]

        # remove the image which does not have stylized image
        content_paths = [path for path in content_paths if os.path.exists(os.path.join(stylized_dir, os.path.basename(path)))]
        style_paths = [path for path in style_paths if os.path.exists(os.path.join(stylized_dir, os.path.basename(path)))]
        stylized_paths = [path for path in stylized_paths if os.path.exists(path)]

        assert len(stylized_paths) != 0
        assert len(content_paths) == len(style_paths) == len(stylized_paths), f"content: {len(content_paths)}, style: {len(style_paths)}, stylized: {len(stylized_paths)}"

        # preprocess images
        content_images = preprocess_images(content_paths)
        style_images_gray = preprocess_images(style_paths, gray=True)
        stylized_images = preprocess_images(stylized_paths)
        stylized_images_gray = preprocess_images(stylized_paths, gray=True)
        
        # extract features and calculate metrics
        vgg_extractor = vgg_extractor.cuda()
        content_loss, style_loss, content_similarity, style_loss_similarity, fid_score = compute_metrics(content_images, style_images_gray, stylized_images, stylized_images_gray, vgg_extractor)

        results.append({
            "Method": method,
            "Content Loss↓": content_loss,
            "Content Similarity↑": content_similarity,
            "Style Loss↓": style_loss,
            "Style Similarity↑": style_loss_similarity,
            "FID↓": fid_score
        })
    
    return results

In [8]:
vgg_extractor = VGG19Extractor().eval()
# vgg_extractor = VGG16Extractor().eval()
result_all = []

## Group 1

In [9]:
group_id = "1"
methods = ["original", "lab", "luv", "pca"]
data_path = os.path.join('evaluation', 'Stylized', f'style{group_id}_content{group_id}')

if not os.path.exists(data_path):
    prepare_eval_images(group_id, methods)

result = evaluate_methods(group_id, methods, vgg_extractor)
result_all.append(result)

In [10]:
df = pd.DataFrame(result)
# save to csv
df.to_csv(f"evaluation/evaluation{group_id}.csv", index=False)

# show table
df.style.apply(highlight_best, subset=['Content Loss↓', 'Content Similarity↑','Style Loss↓', 'Style Similarity↑', 'FID↓'], axis=0)


Unnamed: 0,Method,Content Loss↓,Content Similarity↑,Style Loss↓,Style Similarity↑,FID↓
0,original,15.497891,0.323624,0.145619,0.55859,5.109168
1,lab,15.39556,0.341935,0.205119,0.518647,12.530405
2,luv,15.675183,0.336472,0.214714,0.517471,14.301745
3,pca,15.607295,0.335831,0.200359,0.519843,11.739694


## Group 2

In [11]:
group_id = "2"
methods = ["original", "lab", "luv", "pca"]
data_path = os.path.join('evaluation', 'Stylized', f'style{group_id}_content{group_id}')

if not os.path.exists(data_path):
    prepare_eval_images(group_id, methods)

result = evaluate_methods(group_id, methods, vgg_extractor)
result_all.append(result)

In [12]:
df = pd.DataFrame(result)
# save to csv
df.to_csv(f"evaluation/evaluation{group_id}.csv", index=False)

# show table
df.style.apply(highlight_best, subset=['Content Loss↓', 'Content Similarity↑','Style Loss↓', 'Style Similarity↑', 'FID↓'], axis=0)

Unnamed: 0,Method,Content Loss↓,Content Similarity↑,Style Loss↓,Style Similarity↑,FID↓
0,original,16.366145,0.256099,0.436056,0.53713,11.355264
1,lab,15.904366,0.268169,0.681322,0.496811,25.495914
2,luv,16.662245,0.266417,0.640204,0.493869,20.325739
3,pca,14.325919,0.286613,0.214448,0.500018,16.359606


## Group 3

In [13]:
group_id = "3"
methods = ["original", "lab", "luv", "pca"]
data_path = os.path.join('evaluation', 'Stylized', f'style{group_id}_content{group_id}')

if not os.path.exists(data_path):
    prepare_eval_images(group_id, methods)

result = evaluate_methods(group_id, methods, vgg_extractor)
result_all.append(result)

In [14]:
df = pd.DataFrame(result)
# save to csv
df.to_csv(f"evaluation/evaluation{group_id}.csv", index=False)

# show table
df.style.apply(highlight_best, subset=['Content Loss↓', 'Content Similarity↑','Style Loss↓', 'Style Similarity↑', 'FID↓'], axis=0)

Unnamed: 0,Method,Content Loss↓,Content Similarity↑,Style Loss↓,Style Similarity↑,FID↓
0,original,15.644429,0.245464,0.332734,0.482074,9.794449
1,lab,15.125591,0.265952,0.280925,0.463771,19.04605
2,luv,15.751747,0.26399,0.314446,0.461118,17.180875
3,pca,15.162542,0.268222,0.302196,0.464591,21.600693


## Group 4

In [15]:
group_id = "4"
methods = ["original", "lab", "luv", "pca"]
data_path = os.path.join('evaluation', 'Stylized', f'style{group_id}_content{group_id}')

if not os.path.exists(data_path):
    prepare_eval_images(group_id, methods)

result = evaluate_methods(group_id, methods, vgg_extractor)
result_all.append(result)

In [16]:
df = pd.DataFrame(result)
# save to csv
df.to_csv(f"evaluation/evaluation{group_id}.csv", index=False)

# show table
df.style.apply(highlight_best, subset=['Content Loss↓', 'Content Similarity↑','Style Loss↓', 'Style Similarity↑', 'FID↓'], axis=0)

Unnamed: 0,Method,Content Loss↓,Content Similarity↑,Style Loss↓,Style Similarity↑,FID↓
0,original,18.426396,0.270236,0.474793,0.5097,9.638389
1,lab,16.102889,0.289258,0.407552,0.494526,8.751085
2,luv,15.671494,0.297639,0.404758,0.495874,8.879282
3,pca,16.715511,0.287331,0.438298,0.496148,10.161249


## Group 5

In [17]:
group_id = "5"
methods = ["original", "lab", "luv", "pca"]
data_path = os.path.join('evaluation', 'Stylized', f'style{group_id}_content{group_id}')

if not os.path.exists(data_path):
    prepare_eval_images(group_id, methods)

result = evaluate_methods(group_id, methods, vgg_extractor)
result_all.append(result)

In [18]:
df = pd.DataFrame(result)
# save to csv
df.to_csv(f"evaluation/evaluation{group_id}.csv", index=False)

# show table
df.style.apply(highlight_best, subset=['Content Loss↓', 'Content Similarity↑','Style Loss↓', 'Style Similarity↑', 'FID↓'], axis=0)

Unnamed: 0,Method,Content Loss↓,Content Similarity↑,Style Loss↓,Style Similarity↑,FID↓
0,original,21.964197,0.24818,1.290964,0.4531,16.209106
1,lab,20.564994,0.259485,1.5185,0.437355,19.395971
2,luv,20.875338,0.265755,1.449009,0.43767,19.981917
3,pca,20.748142,0.27227,1.417236,0.441699,19.356191


## Summary Results

In [19]:
# Flatten result_all into a single list of dictionaries
flat_results = [item for sublist in result_all for item in sublist]

# Convert to a DataFrame for easier processing
df = pd.DataFrame(flat_results)

# Columns to compute mean and standard deviation
metrics = ['Content Loss↓', 'Content Similarity↑', 'Style Loss↓', 'Style Similarity↑', 'FID↓']

# Initialize a formatted results list
formatted_results = []

# Group by 'Method' and calculate mean ± std
for method, group in df.groupby('Method'):
    formatted_row = {'Method': method}
    for metric in metrics:
        mean = group[metric].mean()
        std = group[metric].std()
        formatted_row[metric] = f"{mean:.3f} ±{std:.3f}"
    formatted_results.append(formatted_row)

# Create a new DataFrame with formatted results
formatted_df = pd.DataFrame(formatted_results)


# Sort the DataFrame by 'Method' in the specified order
method_order = ['original', 'lab', 'luv', 'pca']
formatted_df['Method'] = pd.Categorical(formatted_df['Method'], categories=method_order, ordered=True)
formatted_df = formatted_df.sort_values('Method')
# Reset the index to reorder it
formatted_df = formatted_df.reset_index(drop=True)

# Save to CSV if needed
formatted_df.to_csv("evaluation/evaluation.csv", index=False)

# Print each formatted item
formatted_df
formatted_df.style.apply(highlight_best, subset=metrics, axis=0)

Unnamed: 0,Method,Content Loss↓,Content Similarity↑,Style Loss↓,Style Similarity↑,FID↓
0,original,17.580 ±2.715,0.269 ±0.032,0.536 ±0.441,0.508 ±0.042,10.421 ±3.987
1,lab,16.619 ±2.240,0.285 ±0.034,0.619 ±0.535,0.482 ±0.032,17.044 ±6.521
2,luv,16.927 ±2.246,0.286 ±0.031,0.605 ±0.498,0.481 ±0.032,16.134 ±4.729
3,pca,16.512 ±2.520,0.290 ±0.027,0.515 ±0.513,0.484 ±0.031,15.843 ±4.870


## ALL

In [20]:
# run all groups
# result_all = []

# for id in tqdm(range(1, 6), desc="Processing all groups"):
#     group_id = str(id)
#     methods = ["original", "lab", "luv", "pca"]
#     data_path = os.path.join('evaluation', 'Stylized', f'style{group_id}_content{group_id}')

#     if not os.path.exists(data_path):
#         prepare_eval_images(group_id, methods)

#     result = evaluate_methods(group_id, methods, vgg_extractor)
#     result_all.append(result)