In [2]:
import os
os.chdir("..")  # go to project root
print(f"cwd: {os.getcwd()}")  # sanity check

cwd: /home/dude/dev/uni/cs760


In [3]:
import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [4]:
EVAL_DIR = "EvaluationOutputs"

In [5]:
# Format for CSV filename: {model_name}_{clip/fov/fvd}.csv
METRICS = ["clip", "fov", "fvd"]
MODELS = ["wan_1.3", "wan_14", "StableDiffusion", "Diffueraser"]

def load_file(model, metric):
    fp = os.path.join(EVAL_DIR, f"{model}_{metric}.csv")
    df = pd.read_csv(fp)
    return df

In [6]:
def load_fov_metrics():
    fov_df = pd.DataFrame(columns=MODELS)
    for model in MODELS:
        df = load_file(model, "fov")
        # make Video_Name the index
        df.set_index("Video_Name", inplace=True)
        fov_df[model] = df["Avg_FOV_Ratio"]
    fov_df.dropna(inplace=True)  # drop rows with any NaN values
    return fov_df

fov_df = load_fov_metrics()
fov_df.describe()

Unnamed: 0,wan_1.3,wan_14,StableDiffusion,Diffueraser
count,495.0,495.0,495.0,495.0
mean,0.988156,0.989853,0.995021,0.991632
std,0.033604,0.032953,0.008693,0.0268
min,0.712507,0.710899,0.931252,0.791623
25%,0.995744,0.999004,0.996117,0.998242
50%,0.999814,0.999991,0.997775,0.999969
75%,0.999999,1.0,0.998385,1.0
max,1.0,1.0,0.999504,1.0


In [7]:
def load_clip_metrics():
    clip_df = pd.DataFrame(columns=MODELS + [f"{m}_original_score" for m in MODELS])
    for model in MODELS:
        df = load_file(model, "clip")
        # make video the index
        df.set_index("video", inplace=True)
        clip_df[model] = df["inpainted_score"]
        clip_df[f"{model}_original_score"] = df["original_score"]
    clip_df.dropna(inplace=True)  # drop rows with any NaN values

    # then average the original scores across models
    clip_df["original_score"] = clip_df[[f"{m}_original_score" for m in MODELS]].mean(axis=1)

    # and drop the individual original score columns
    clip_df.drop(columns=[f"{m}_original_score" for m in MODELS], inplace=True)

    return clip_df

clip_df = load_clip_metrics()
clip_df.describe()

Unnamed: 0,wan_1.3,wan_14,StableDiffusion,Diffueraser,original_score
count,495.0,495.0,495.0,495.0,495.0
mean,0.988656,0.987757,0.928142,0.982706,0.987354
std,0.004968,0.005304,0.01461,0.008347,0.005941
min,0.968773,0.968564,0.858954,0.946347,0.966607
25%,0.985744,0.984515,0.919604,0.978363,0.984005
50%,0.989377,0.988629,0.930149,0.984268,0.988499
75%,0.992363,0.991719,0.93759,0.988421,0.991497
max,0.998698,0.998469,0.963287,0.997485,0.998337


In [8]:
def load_fvd_metrics():
    fvd_df = pd.DataFrame(columns=MODELS)
    for model in MODELS:
        df = load_file(model, "fvd")
        # make video_name the index
        df.set_index("video_name", inplace=True)
        fvd_df[model] = df["fvd_score"]
    fvd_df.dropna(inplace=True)  # drop rows with any NaN values
    return fvd_df

fvd_df = load_fvd_metrics()
fvd_df.describe()

Unnamed: 0,wan_1.3,wan_14,StableDiffusion,Diffueraser
count,495.0,495.0,495.0,495.0
mean,126.903961,105.287072,287.13297,207.864499
std,72.950566,67.074618,128.229042,98.073899
min,13.807621,10.478917,81.477386,28.819233
25%,72.712381,51.703164,198.020436,127.37592
50%,114.965544,93.944943,256.602709,204.354432
75%,167.009653,141.462522,338.730415,263.580506
max,551.929092,494.688997,826.465739,551.958455


In [None]:
# write to csv with columns "video_name", "model", "metric", "completed"
summary_fp = os.path.join("EvaluationOutputs", "manual_eval.csv")
with open(summary_fp, "w", newline="") as csvfile:
    fieldnames = ["video_name", "model", "metric", "rank", "completed"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

worst_k = 3
best_k = 3

# for each metric, find the worst k and best k videos for each model
for model in MODELS:
    print(f"Model: {model}")
    for metric, df in zip(["clip", "fvd"], [clip_df, fvd_df]):
        # print(f"  Metric: {metric}")
        if metric == "fvd":  # lower is better
            worst_videos = df[model].nlargest(worst_k).index.tolist()
            best_videos = df[model].nsmallest(best_k).index.tolist()
        else:  # higher is better
            worst_videos = df[model].nsmallest(worst_k).index.tolist()
            best_videos = df[model].nlargest(best_k).index.tolist()

        # write to csv
        with open(summary_fp, "a", newline="") as csvfile:
            fieldnames = ["video_name", "model", "metric", "rank", "completed"]
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            for rank, video in enumerate(worst_videos, start=1):
                writer.writerow({"video_name": video, "model": model, "metric": metric, "rank": f"worst_{rank}", "completed": 0})
            for rank, video in enumerate(best_videos, start=1):
                writer.writerow({"video_name": video, "model": model, "metric": metric, "rank": f"best_{rank}", "completed": 0})

Model: wan_1.3
Model: wan_14
Model: StableDiffusion
Model: Diffueraser
