In [None]:
import os
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from collections import OrderedDict

from src.visualizations import * 
from src.statistical_tests import *
from src.fairness import kl, cheb, tv, chi 
from src.fairness import compute_RDP, compute_PR, compute_UCPR 
from src.evaluation import compute_losses, compute_diversity 

# 1. Loading all Images 

In [None]:
METHODS = ["pulse", "psp", "fairpsp", "posteriorSampling",  "ddrm"] 
RACES = ["White", 
       "Indian", 
       "Black", 
       "Latino_Hispanic",
       "Southeast Asian",
       "East Asian",
       "Middle Eastern"]

In [None]:
img_paths = {"fairface": {method: f"upsampled_imgs/fairface/16_to_128/{method}/" for method in METHODS}, 
             "fairface_avg": {method: f"upsampled_imgs/fairface/4_to_128/{method}/" for method in METHODS},
             "fairface_noisy_avg": {method: f"upsampled_imgs/fairface/4noise_to_128/{method}/" for method in METHODS},
             "unfairface": {method: f"upsampled_imgs/unfairface/16_to_128/{method}/" for method in METHODS},
             "unfairface_avg": {method: f"upsampled_imgs/unfairface/4_to_128/{method}/" for method in METHODS},
              "unfairface_noisy_avg": {method: f"upsampled_imgs/unfairface/4noise_to_128/{method}/" for method in METHODS},
             }

labels_path = "data/fairface/fairface_label_val.csv"
real_img_path = "data/fairface/test_correct_prediction/"
img_paths["fairface"]["real"] = real_img_path 
img_paths["unfairface"]["real"] = real_img_path 
avg_img_path = "data/fairface/avg_faces/"
img_paths["fairface_avg"]["real"] = avg_img_path
img_paths["unfairface_avg"]["real"] = avg_img_path 
noisy_avg_img_path = "data/fairface/avg_noisy_faces/"
img_paths["fairface_noisy_avg"]["real"] = noisy_avg_img_path
img_paths["unfairface_noisy_avg"]["real"] = noisy_avg_img_path 

# lr images are computed on the fly 
img_paths["fairface"]["lr"] = ""
img_paths["unfairface"]["lr"] = ""
img_paths["fairface_avg"]["lr"] = ""
img_paths["unfairface_avg"]["lr"] = ""
img_paths["fairface_noisy_avg"]["lr"] = ""
img_paths["unfairface_noisy_avg"]["lr"] = ""

# reorder the paths such that the keys()-order is Original - LR - PULSE - etc 
# Swap the last two keys to the front
keys = ["real", "lr"] + METHODS
for dataset_name in img_paths.keys():
    img_paths[dataset_name] = OrderedDict((key, img_paths[dataset_name][key]) for key in keys)

# 2. Qualitative Results

In [None]:
os.makedirs("plots/reconstructions", exist_ok=True)

## 2.1. Visualize Random Reconstructions 

In [None]:
# Unfairface: 
visualize_reconstructions(img_paths["unfairface"], num_imgs=4)

In [None]:
# Fairface: 
visualize_reconstructions(img_paths["fairface"], num_imgs=4)

In [None]:
# UnfairFace vs FairFace 
visualize_reconstructions_comparison(img_paths["unfairface"], img_paths["fairface"], num_imgs=4)

### Teaser Image: 

In [None]:
img_names_teaser = ["22.jpg", "106.jpg", "118.jpg", "511.jpg"]
visualize_reconstructions(img_paths["unfairface"], img_names=img_names_teaser)
plt.savefig("plots/reconstructions/teaser_unfairface.pdf")

In [None]:
img_names_teaser = ["22.jpg", "106.jpg", "118.jpg", "511.jpg"]
visualize_reconstructions(img_paths["fairface"], img_names=img_names_teaser)
plt.savefig("plots/reconstructions/teaser_fairface.pdf")

## 2.2. Visualize Samples from a specific Ethnicity 

In [None]:
labels_df = pd.read_csv(labels_path)

def return_img_names_in_test_set(test_set_dir, race="White", num_imgs=8):
    """Reads the labels file and returns the image names of a specific race."""
    img_names = list(labels_df["file"][labels_df["race"]==race])
    filenames_test = os.listdir(test_set_dir)
    img_names_return = []
    index = 0
    while len(img_names_return) < num_imgs:
        img_name = img_names[index].split("/")[-1]
        if img_name in filenames_test:
            img_names_return.append(img_name)
        index += 1 
    return img_names_return 

img_names_black = return_img_names_in_test_set(img_paths["fairface"]["real"], "Black", num_imgs=12)
img_names_white = return_img_names_in_test_set(img_paths["fairface"]["real"], "White", num_imgs=4)
img_names_indian = return_img_names_in_test_set(img_paths["fairface"]["real"], "Indian", num_imgs=12)
img_names_me = return_img_names_in_test_set(img_paths["fairface"]["real"], "Middle Eastern", num_imgs=12)
img_names_ea = return_img_names_in_test_set(img_paths["fairface"]["real"], "East Asian", num_imgs=12)
img_names_sea = return_img_names_in_test_set(img_paths["fairface"]["real"], "Southeast Asian", num_imgs=12)
img_names_lh = return_img_names_in_test_set(img_paths["fairface"]["real"], "Latino_Hispanic", num_imgs=12)

# cherry-picked images
img_names_black_selected = [img_names_black[j] for j in [2, 7, 8]]
img_names_indian_selected = [img_names_indian[j] for j in [1, 4, 8]] 
img_names_me_selected = [img_names_me[j] for j in [0, 6, 11]]
img_names_ea_selected = [img_names_ea[j] for j in [0, 6, 9]] 
img_names_sea_selected = [img_names_sea[j] for j in [1, 4, 10]] 
img_names_lh_selected = [img_names_lh[j] for j in [1, 4, 5]]

In [None]:
visualize_reconstructions(img_paths["unfairface"], img_names=img_names_white)
plt.savefig("plots/reconstructions/white_unfairface.pdf")

In [None]:
visualize_reconstructions(img_paths["fairface"], img_names=img_names_white)
plt.savefig("plots/reconstructions/white_fairface.pdf")

In [None]:
visualize_reconstructions_comparison(img_paths["unfairface"], img_paths["fairface"], img_names=img_names_white )
plt.savefig("plots/reconstructions/comparison_white.pdf")

### Black 

In [None]:
visualize_reconstructions(img_paths["unfairface"], img_names=img_names_black_selected)
plt.savefig("plots/reconstructions/black_unfairface.pdf")

In [None]:
visualize_reconstructions(img_paths["fairface"], img_names=img_names_black_selected)
plt.savefig("plots/reconstructions/black_fairface.pdf")

In [None]:
visualize_reconstructions_comparison(img_paths["unfairface"], img_paths["fairface"], img_names=img_names_black_selected)
plt.savefig("plots/reconstructions/comparison_black.pdf")

### Plot More Headscarves, Bindis, and Monolid Eyes

In [None]:
img_names_bindis = [f"{nr}.jpg" for nr in [136, 900, 1637]] 
visualize_reconstructions(img_paths["unfairface"], img_names=img_names_bindis)
plt.savefig("plots/reconstructions/bindis_unfairface.pdf")

visualize_reconstructions(img_paths["fairface"], img_names=img_names_bindis)
plt.savefig("plots/reconstructions/bindis_fairface.pdf")

In [None]:
img_names_scarves = [f"{nr}.jpg" for nr in [1214, 1404, 1901]] 
visualize_reconstructions(img_paths["unfairface"], img_names=img_names_scarves)
plt.savefig("plots/reconstructions/scarves_unfairface.pdf")

visualize_reconstructions(img_paths["fairface"], img_names=img_names_scarves)
plt.savefig("plots/reconstructions/scarves_fairface.pdf")

In [None]:
img_names_monolid = [f"{nr}.jpg" for nr in [138, 346, 399]] 
visualize_reconstructions(img_paths["unfairface"], img_names=img_names_monolid)
plt.savefig("plots/reconstructions/monolid_unfairface.pdf")

visualize_reconstructions(img_paths["fairface"], img_names=img_names_monolid)
plt.savefig("plots/reconstructions/monolid_fairface.pdf")

### Remaining Races:

In [None]:
dict_race2img = {"Indian": img_names_indian_selected, 
                "Southeast Asian": img_names_sea_selected,
                "East Asian": img_names_ea_selected,
                "Middle Eastern": img_names_me_selected,
                "Latino_Hispanic": img_names_lh_selected}

for race in ["Indian", "Southeast Asian", "East Asian", "Middle Eastern", "Latino_Hispanic"]:
    img_names = dict_race2img[race]
    visualize_reconstructions(img_paths["fairface"], img_names=img_names)
    plt.savefig(f"plots/reconstructions/{race}_fairface.pdf")
    visualize_reconstructions(img_paths["unfairface"], img_names=img_names)
    plt.savefig(f"plots/reconstructions/{race}_unfairface.pdf")

## 2.3. Visualize multiple Samples given LowRes Downsampled to 4x4

In [None]:
visualize_reconstructions_avg(img_paths["unfairface_avg"], race="White", num_imgs=3)
plt.savefig("plots/reconstructions/white_unfairface_avg.pdf")

In [None]:
visualize_reconstructions_avg(img_paths["fairface_avg"], race="White", num_imgs=3)
plt.savefig("plots/reconstructions/white_fairface_avg.pdf")

In [None]:
visualize_reconstructions_avg_comparison(img_paths["unfairface_avg"], img_paths["fairface_avg"], race="White", num_imgs=4)

In [None]:
visualize_reconstructions_avg(img_paths["unfairface_avg"], race="Black", num_imgs=3)
plt.savefig("plots/reconstructions/black_unfairface_avg.pdf")

In [None]:
visualize_reconstructions_avg(img_paths["fairface_avg"], race="Black", num_imgs=3)
plt.savefig("plots/reconstructions/black_fairface_avg.pdf")

In [None]:
visualize_reconstructions_avg_comparison(img_paths["unfairface_avg"], img_paths["fairface_avg"], race="Black", num_imgs=4)

### Noisy Averages as Input:

In [None]:
path_avg = img_paths["unfairface_avg"]["real"]
print(path_avg)

In [None]:
visualize_reconstructions_noisy_avg(img_paths["unfairface_noisy_avg"], path_avg=path_avg, race="White", num_imgs=3)
plt.savefig("plots/reconstructions/white_unfairface_noisy_avg.pdf")

In [None]:
visualize_reconstructions_noisy_avg(img_paths["fairface_noisy_avg"], path_avg=path_avg, race="White", num_imgs=3)
plt.savefig("plots/reconstructions/white_fairface_noisy_avg.pdf")

In [None]:
visualize_reconstructions_noisy_avg(img_paths["unfairface_noisy_avg"], path_avg=path_avg, race="Black", num_imgs=3)
plt.savefig("plots/reconstructions/black_unfairface_noisy_avg.pdf")
visualize_reconstructions_noisy_avg(img_paths["fairface_noisy_avg"], path_avg=path_avg, race="Black", num_imgs=3)
plt.savefig("plots/reconstructions/black_fairface_noisy_avg.pdf")

# 3. Quantitative Results
## 3.1. Calculate all Losses and obtain Losses-DFs 

In [None]:
def losses_to_dfs(setting):
    """Returns a dict of loss dfs. Each df contains the losses for a specific method."""
    assert setting in ["fairface", "unfairface"]
    losses_dir = os.path.join("evaluation", setting) 
    os.makedirs(losses_dir, exist_ok=True)
    dfs = {}
    for method in METHODS: 
        losses_path = os.path.join(losses_dir, f"losses_{method}.csv")
        if os.path.exists(losses_path):
            dfs[method] = pd.read_csv(losses_path)
        else:
            df = compute_losses(img_paths[setting]["real"], img_paths[setting][method], labels_path=labels_path)
            dfs[method] = df
            df.to_csv(losses_path, index=True)
    return dfs 
      
dfs = {}      
for setting in ["fairface", "unfairface"]:
    dfs[setting] = losses_to_dfs(setting)
    
print(dfs["fairface"]["pulse"].head())

## 3.2. Evaluating Performance

In [None]:
LOSSES = ["lpips", 
          "ssim",
          "race_cos", 
          "race_0-1", 
          "niqe16", 
          "blur"
          ]

def evaluate_performance(dfs, setting):
    performances = []
    for method in METHODS:
        performance = {"method": method}
        for loss in LOSSES:
            if loss == "race_0-1":
                performance[loss] = 1 - np.mean(dfs[setting][method][loss])
            else:
                performance[loss] = np.mean(dfs[setting][method][loss])
        performances.append(performance)
    losses_df = pd.concat([pd.DataFrame([performance]) for performance in performances], ignore_index=True)
    losses_df["blur"] = losses_df["blur"] * 100 
    return losses_df 


In [None]:
losses_unfairface = evaluate_performance(dfs, "unfairface")
losses_unfairface

In [None]:
losses_fairface = evaluate_performance(dfs, "fairface")
losses_fairface

In [None]:
# Combine them into one DataFrame 
# (See Table 1 in the Paper)
losses_both = pd.DataFrame()
losses_both["method"] = [name_to_str(method) for method in losses_unfairface["method"]]
for label in losses_unfairface.columns:
    if label == "method":
        continue 
    losses_both[label] = losses_unfairface[label]
    losses_both[f"{label}-F"] = losses_fairface[label]
print(losses_both)

## 3.2.2. Test whether the values are statistically different FairFace vs UnFairFace

### 1. Wilcoxon Test (less Assumptions; nonparametric version of paired T-Test)

In [None]:
two_sample_wilcoxon(dfs, alpha=0.05, methods=METHODS, losses=LOSSES)

In [None]:
p_values = two_sample_wilcoxon(dfs, alpha=0.05, return_decision=False, methods=METHODS, losses=LOSSES)
p_values 

### Note that we cannot run a paired t-Test because Normality of the features is violated in all cases but the NIQE-Score:

In [None]:
test_paired_t_test_assumption(dfs, methods=METHODS, losses=LOSSES)

In [None]:
p_values = two_sample_paired_ttest(dfs, alpha=0.05, return_decision=False, methods=METHODS, losses=LOSSES)
# The test only applies for NIQE because we cannot assume the other scores to be normally distributed! 
p_values 

### 2. Pearson's Chi-squared test for race_0-1

In [None]:
two_sample_chi2(dfs, 0.05, methods=METHODS)

## 3.3. Evaluating Fairness
### 3.3.1 Plotting Performance per Race

In [None]:
for loss in LOSSES:
    if loss=="race_cos":
        ylim = [0, 0.4]
    elif loss=="race_0-1":
        ylim = [0, 1]
    else:
        ylim = None 
    plot_performance_per_race(loss, dfs, "fairface", methods=METHODS, ylim=ylim)
    plot_performance_per_race(loss, dfs, "unfairface", methods=METHODS, ylim=ylim)

### Rescaling the above (for the race prediction accuracy) provides a visualization of RDP

In [None]:
plot_rdp(dfs, "unfairface", methods=METHODS, ylim=[0,0.5])
plot_rdp(dfs, "fairface", methods=METHODS, ylim=[0,0.5])

### 3.3.1 Plotting the Proportional Representation Distribution

In [None]:
plot_pr(dfs, "unfairface", methods=METHODS, ylim=[0,0.6])
plot_pr(dfs, "fairface", methods=METHODS, ylim=[0, 0.6])

### 3.3.2. Evaluating Fairness according to the introduced Metrics

In [None]:
rdps = compute_RDP(dfs, "fairface", methods=METHODS, races=RACES)
prs = compute_PR(dfs, "fairface", methods=METHODS, races=RACES)

In [None]:
# Evaluation 
divergences = {"KL": kl, "Cheb": cheb, "TV": tv, "chi": chi}

divergences = {"chi": chi, "Cheb": cheb}

def evaluating_fairness(dfs, setting):
    list_fairness = [] 
    rdps = compute_RDP(dfs, setting, methods=METHODS, races=RACES)
    prs = compute_PR(dfs, setting, methods=METHODS, races=RACES)
    for method in METHODS:
        fairness = {"method": name_to_str(method)}
        rdp = rdps[method]
        pr = prs[method]
        for div_name in divergences.keys():
            fairness[f"RDP-{div_name}"] = divergences[div_name](rdp)
        for div_name in divergences.keys():
            fairness[f"PR-{div_name}"] = divergences[div_name](pr)
        list_fairness.append(fairness)
    fairness_df = pd.concat([pd.DataFrame([fairness]) for fairness in list_fairness], ignore_index=True)
    return fairness_df

In [None]:
fairness_unfairface = evaluating_fairness(dfs, "unfairface")
fairness_unfairface

In [None]:
fairness_fairface = evaluating_fairness(dfs, "fairface")
fairness_fairface 

In [None]:
# Combine them into one DataFrame 
fairness_both = pd.DataFrame()
fairness_both["method"] = fairness_unfairface["method"]
for label in fairness_unfairface.columns:
    if label == "method":
        continue 
    fairness_both[label] = fairness_unfairface[label]
    fairness_both[f"{label}-F"] = fairness_fairface[label]
print(fairness_both)

## 3.3.1 Statistically Testing Fairness

In [None]:
testing_fairness(dfs, "unfairface", methods=METHODS, races=RACES, metric="rdp")

In [None]:
testing_fairness(dfs, "fairface", methods=METHODS, races=RACES, metric="rdp")

In [None]:
testing_fairness(dfs, "unfairface", methods=METHODS, races=RACES, metric="pr")

In [None]:
testing_fairness(dfs, "fairface", methods=METHODS, races=RACES, metric="pr")

# 4. Evaluating Diversity

## 4.1. Generate Diversity DataFrames 

In [None]:
def diversity_to_dfs(setting):
    assert setting in ["fairface_avg", "unfairface_avg", "fairface_noisy_avg", "unfairface_noisy_avg"]
    losses_dir = os.path.join("evaluation", setting) 
    os.makedirs(losses_dir, exist_ok=True)
    dfs = {}
    for method in METHODS: 
        losses_path = os.path.join(losses_dir, f"losses_{method}.csv")
        if os.path.exists(losses_path):
            dfs[method] = pd.read_csv(losses_path)
        else:
            if setting in ["fairface_avg", "unfairface_avg"]:
                num_duplicates = 100 
            elif setting in ["fairface_noisy_avg", "unfairface_noisy_avg"]:
                num_duplicates = 1 
            df = compute_diversity(img_paths[setting]["real"], 
                                   img_paths[setting][method], 
                                   labels_path=labels_path, 
                                   num_duplicates=num_duplicates)
            dfs[method] = df
            df.to_csv(losses_path, index=True)
    return dfs 
      
for setting in ["fairface_avg", "unfairface_avg", "fairface_noisy_avg", "unfairface_noisy_avg"]:
    dfs[setting] = diversity_to_dfs(setting)

In [None]:
plot_ucpr(dfs, "unfairface_avg", methods=METHODS, races=RACES, ylim=[0, 1.])
plot_ucpr(dfs, "fairface_avg", methods=METHODS, races=RACES, ylim=[0, 1.])

In [None]:
# Evaluation 
divergences = {"KL": kl, "Cheb": cheb, "TV": tv, "chi": chi}
divergences = {"chi": chi, "Cheb": cheb}

def evaluating_diversity(dfs, setting):
    list_diversity = [] 
    ucprs = compute_UCPR(dfs, setting, methods=METHODS, races=RACES)
    for method in METHODS:
        diversity = {"method": name_to_str(method)}
        ucpr = ucprs[method]
        for div_name in divergences.keys():
            diversity[f"UCPR-{div_name}"] = divergences[div_name](ucpr)
        list_diversity.append(diversity)
    diversity_df = pd.concat([pd.DataFrame([fairness]) for fairness in list_diversity], ignore_index=True)
    return diversity_df

In [None]:
diversity_unfairface = evaluating_diversity(dfs, "unfairface_avg")
diversity_unfairface 

In [None]:
diversity_fairface = evaluating_diversity(dfs, "fairface_avg")
diversity_fairface 

In [None]:
# Combine them into one DataFrame 
diversity_both = pd.DataFrame()
diversity_both["method"] = diversity_unfairface["method"]
for label in diversity_unfairface.columns:
    if label == "method":
        continue 
    diversity_both[label] = diversity_unfairface[label]
    diversity_both[f"{label}-F"] = diversity_fairface[label]
print(diversity_both)

## 4.2. Testing Diversity

In [None]:
testing_fairness(dfs, "unfairface_avg", methods=METHODS, races=RACES, metric="ucpr")

In [None]:
testing_fairness(dfs, "fairface_avg", methods=METHODS, races=RACES, metric="ucpr")

## 4.3. Testing Diversity with Noisy Inputs
Instead of reconstructing each image 100 times, we take one image, calculate 100 perturbed versions of it, and reconstruct these. The advantage is that it allows us to compute a diversity for pSp and fairpSp, whose reconstruction is deterministic. That is, every reconstruction of the same image is the same. This allows us to compute 100 different images even with pSp and fairpSp. 

In [None]:
plot_ucpr(dfs, "unfairface_noisy_avg", methods=METHODS, races=RACES, ylim=[0, 1.])
plot_ucpr(dfs, "fairface_noisy_avg", methods=METHODS, races=RACES, ylim=[0, 1.])

In [None]:
diversity_unfairface = evaluating_diversity(dfs, "unfairface_noisy_avg")
diversity_fairface = evaluating_diversity(dfs, "fairface_noisy_avg")

# Combine them into one DataFrame 
diversity_both = pd.DataFrame()
diversity_both["method"] = diversity_unfairface["method"]
for label in diversity_unfairface.columns:
    if label == "method":
        continue 
    diversity_both[label] = diversity_unfairface[label]
    diversity_both[f"{label}-F"] = diversity_fairface[label]
print(diversity_both)

In [None]:
testing_fairness(dfs, "unfairface_noisy_avg", methods=METHODS, races=RACES, metric="ucpr")

In [None]:
testing_fairness(dfs, "fairface_noisy_avg", methods=METHODS, races=RACES, metric="ucpr")