In [None]:
from src.toolkit.process_results import extract_results
from src.toolkit.post_metrics import compute_forgetting, compute_average, compute_average_forgetting, compute_AAA, compute_mean_std_metric, compute_wcacc, decorate_with_training_task
import seaborn as sns
import matplotlib.pyplot as plt
import os
plt.style.use("matplotlibrc.template")
colors = plt.cm.Dark2.colors

In [None]:
def create_results_dict(benchmark, memory_size, include_gdumb=False):
    results_dict = {"ER": f"er_{benchmark}_20_{memory_size}", 
                "$\operatorname{ER-ACE}$":  f"er_ace_{benchmark}_20_{memory_size}",
                "DER++":  f"der_{benchmark}_20_{memory_size}",
                "MIR":  f"mir_{benchmark}_20_{memory_size}",
                "ER + LwF":  f"er_lwf_{benchmark}_20_{memory_size}",
                #"AGEM":  f"agem_{benchmark}_20_{memory_size}",
                "RAR":  f"rar_{benchmark}_20_{memory_size}",
                "SCR":  f"scr_{benchmark}_20_{memory_size}",
                "i.i.d":  f"er_{benchmark}_1_{memory_size}",
                }
    
    if include_gdumb:
        results_dict["GDumb"] = f"gdumb_{benchmark}_20_{memory_size}"
        
    return results_dict

In [None]:
results_path = "ENTER RESULTS PATH HERE"
memory_size = 2000
benchmark = "split_cifar100"
results_dict = {"ER": f"er_{benchmark}_20_{memory_size}", 
                "$\operatorname{ER-ACE}$":  f"er_ace_{benchmark}_20_{memory_size}",
                "DER++":  f"der_{benchmark}_20_{memory_size}",
                "MIR":  f"mir_{benchmark}_20_{memory_size}",
                "ER + LwF":  f"er_lwf_{benchmark}_20_{memory_size}",
                #"AGEM":  f"agem_{benchmark}_20_{memory_size}",
                "RAR":  f"rar_{benchmark}_20_{memory_size}",
                "SCR":  f"scr_{benchmark}_20_{memory_size}",
                #"GDumb":  f"gdumb_{benchmark}_20_{memory_size}",
                "i.i.d":  f"er_{benchmark}_1_{memory_size}",
                }
print(results_dict)

# Adjusted Acc curve

In [None]:
%matplotlib qt
for i, (method_label, method_path) in enumerate(results_dict.items()):
    frames_method = extract_results(os.path.join(results_path, method_path))
    df = frames_method["continual"]
    if method_label != "i.i.d":
        metric_list = [
            f"Top1_Acc_Exp/eval_phase/valid_stream/Task000/Exp{i:03d}"
            for i in range(20)
        ]
        df["training_exp"] = df[metric_list].count(axis=1)
        df["StreamAcc"] = (
            df["Top1_Acc_Stream/eval_phase/valid_stream/Task000"]
            * df["training_exp"]
            / 20
        )
    else:
        df["StreamAcc"] = df["Top1_Acc_Stream/eval_phase/valid_stream/Task000"]

    df["smoothed_StreamAcc"] = (
        df.groupby("seed")["StreamAcc"]
        .rolling(10, min_periods=1)
        .mean()
        .reset_index(level=0, drop=True)
    )

    sns.lineplot(
        data=df, x="mb_index", y="smoothed_StreamAcc", errorbar="sd", color=colors[i]
    )
    
plt.xlabel("Batch index")
plt.ylabel("Stream Accuracy")

# Forgetting Figures

In [None]:
# Forgetting (on normal stream)

for i, (method_label, method_path) in enumerate(results_dict.items()):
    if method_label == "i.i.d": continue
    
    frames = extract_results(os.path.join(results_path, method_path))
    
    df = compute_average_forgetting(
        frames["training"],
        20,
        base_name="Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp")
    
    sns.lineplot(
        data=df,
        x="training_exp",
        y="Average_Forgetting",
        errorbar=None,
        label=method_label,
        color=colors[i])
    
    df = compute_average_forgetting(
        frames["training"],
        20,
        base_name="CumulativeAccuracy/eval_phase/test_stream/Exp",
        name="Average_Cumulative_Forgetting")
    
    sns.lineplot(
        data=df,
        x="training_exp",
        y="Average_Cumulative_Forgetting",
        errorbar=None,
        color=colors[i],
        linestyle="--")

plt.xlabel("Training task")
plt.ylabel("Forgetting")
plt.gca().set_xticks(list(range(1, 21)))

# Task shift figure

In [None]:

for i, (method_label, method_path) in enumerate(results_dict.items()):
    if method_label not in ["ER", "SCR"]: continue
    
    frames = extract_results(os.path.join(results_path, method_path))
    
    df = frames["continual"]
    step1 = 238*4
    df = df[(df.mb_index > step1 - 238//2) & (df.mb_index < step1 + 238//2)]
    
    key1 = "Top1_Acc_Exp/eval_phase/valid_stream/Task000/Exp003"
    key2 = "Top1_Acc_Exp/eval_phase/valid_stream/Task000/Exp004"
    
    df[f"smoothed_{key1}"] = (
        df.groupby("seed")[key1]
        .rolling(10, min_periods=1)
        .mean()
        .reset_index(level=0, drop=True)
    )
    
       
    df[f"smoothed_{key2}"] = (
        df.groupby("seed")[key2]
        .rolling(10, min_periods=1)
        .mean()
        .reset_index(level=0, drop=True)
    )
    
    sns.lineplot(data=df, x="mb_index", y=f"smoothed_{key1}", label=method_label, color=colors[i], linestyle="--")
    sns.lineplot(data=df, x="mb_index", y=f"smoothed_{key2}", color=colors[i])
    
    
    
    
plt.xlabel("Batch index")
plt.ylabel("Task accuracy")

# Cumulative Accuracy figure

In [None]:

for i, (method_label, method_path) in enumerate(results_dict.items()):
    if method_label not in ["ER", "SCR"]: continue
    
    frames = extract_results(os.path.join(results_path, method_path))
    
    df = frames["continual"]
    
    key = "CumulativeAccuracy/eval_phase/valid_stream/Exp010"
    
    df[f"smoothed_{key}"] = (
        df.groupby("seed")[key]
        .rolling(10, min_periods=1)
        .mean()
        .reset_index(level=0, drop=True)
    )
   
    sns.lineplot(data=df, x="mb_index", y=f"smoothed_{key}", label=method_label, color=colors[i])
    

# Multiple memory sizes

In [None]:
from collections import defaultdict

filtered_methods = ["ER", "i.i.d", "GDumb"]

# means : {"method-name": [mean_mem_1, mean_mem_2, ...]}
means, stds = defaultdict(list), defaultdict(list)

memory_sizes = [500, 2000, 8000]

for memory_size in memory_sizes:
    results_dict = create_results_dict(benchmark, memory_size, include_gdumb=True)
    for method in filtered_methods:
        frame = extract_results(os.path.join(results_path, results_dict[method]))["training"]
        mean, std = compute_mean_std_metric(frame, "Top1_Acc_Stream/eval_phase/test_stream/Task000")
        means[method].append(mean)
        stds[method].append(std)


In [None]:
#sns.lineplot(x=memory_sizes, y=means["ER"])
method_colors = {"ER": 0, "i.i.d": 7, "GDumb": 5}

for method in filtered_methods:
    plt.errorbar(x=memory_sizes, y=means[method], yerr=stds[method], color=colors[method_colors[method]], label=method)

plt.legend()
plt.xlabel("Memory size")
plt.ylabel("Final Accuracy")
plt.gca().set_xticks(memory_sizes)

# Joined Adjusted Acc

In [None]:
results_dict = create_results_dict("split_cifar100", 2000)

fig, axes = plt.subplots(1, 2)

for i, (method_label, method_path) in enumerate(results_dict.items()):
    frames_method = extract_results(os.path.join(results_path, method_path))
    df = frames_method["continual"]
    if method_label != "i.i.d":
        metric_list = [
            f"Top1_Acc_Exp/eval_phase/valid_stream/Task000/Exp{i:03d}"
            for i in range(20)
        ]
        df["training_exp"] = df[metric_list].count(axis=1)
        df["StreamAcc"] = (
            df["Top1_Acc_Stream/eval_phase/valid_stream/Task000"]
            * df["training_exp"]
            / 20
        )
    else:
        df["StreamAcc"] = df["Top1_Acc_Stream/eval_phase/valid_stream/Task000"]

    df["smoothed_StreamAcc"] = (
        df.groupby("seed")["StreamAcc"]
        .rolling(10, min_periods=1)
        .mean()
        .reset_index(level=0, drop=True)
    )
    # Check
    # print(df[df.seed==0]["smoothed_StreamAcc"])
    # print(df[df.seed==0]["StreamAcc"])
    #sns.lineplot(
    #    data=df, x="mb_index", y="smoothed_StreamAcc", errorbar="sd", label=method_label, color=colors[i]
    #)
    sns.lineplot(
        data=df, x="mb_index", y="smoothed_StreamAcc", errorbar="sd", color=colors[i], ax=axes[0]
    )

axes[0].set_xlabel("Batch index")
axes[0].set_ylabel("Stream Accuracy")
    
results_dict = create_results_dict("split_tinyimagenet", 4000)
results_path = "/DATA/ocl_survey/results_server/"
    
for i, (method_label, method_path) in enumerate(results_dict.items()):
    frames_method = extract_results(os.path.join(results_path, method_path))
    df = frames_method["continual"]
    if method_label != "i.i.d":
        metric_list = [
            f"Top1_Acc_Exp/eval_phase/valid_stream/Task000/Exp{i:03d}"
            for i in range(20)
        ]
        df["training_exp"] = df[metric_list].count(axis=1)
        df["StreamAcc"] = (
            df["Top1_Acc_Stream/eval_phase/valid_stream/Task000"]
            * df["training_exp"]
            / 20
        )
    else:
        df["StreamAcc"] = df["Top1_Acc_Stream/eval_phase/valid_stream/Task000"]

    df["smoothed_StreamAcc"] = (
        df.groupby("seed")["StreamAcc"]
        .rolling(10, min_periods=1)
        .mean()
        .reset_index(level=0, drop=True)
    )
    # Check
    # print(df[df.seed==0]["smoothed_StreamAcc"])
    # print(df[df.seed==0]["StreamAcc"])
    sns.lineplot(
        data=df, x="mb_index", y="smoothed_StreamAcc", errorbar="sd", label=method_label, color=colors[i], ax=axes[1]
    )
    #sns.lineplot(
    #    data=df, x="mb_index", y="smoothed_StreamAcc", errorbar="sd", color=colors[i], ax=axes[1]
    #)
    
axes[1].set_xlabel("Batch index")
axes[1].set_ylabel("")