In [1]:
import os
import sys

import numpy as np
import pandas as pd
import seaborn as sns
from scipy.stats import pearsonr, spearmanr

sys.path.insert(0, "..")

from validator_tests.eval_validators import _get_correlation, group_by_task_validator
from validator_tests.utils.constants import TARGET_ACCURACY
from validator_tests.utils.df_utils import get_name_from_df, unify_validator_columns
from validator_tests.utils.plot_val_vs_acc import scatter_plot

In [2]:
df1 = pd.read_pickle(
    "/home/tkm45/NEW_STUFF/2021_experiments/office31_amazon_webcam_fl6_Adam_lr1/all_dfs_processed.pkl"
)
df2 = pd.read_pickle(
    "/home/tkm45/NEW_STUFF/2021_experiments/office31_amazon_webcam_fl3_Adam_lr1/all_dfs_processed.pkl"
)
df = pd.concat([df1, df2], axis=0)

In [3]:
def add_noise(original_df, scale):
    df = original_df.copy()
    df.groupby(["adapter", "trial_num", "epoch"]).size().reset_index().rename(
        columns={0: "count"}
    )
    df = df.rename(columns={"count": "noise"})
    df["noise"] = np.random.normal(scale=scale, size=(len(df)))
    df = original_df.merge(df)
    df[TARGET_ACCURACY] = df[TARGET_ACCURACY] + df["noise"]
    df[TARGET_ACCURACY] = df[TARGET_ACCURACY].clip(lower=0, upper=1)
    return df


def get_correlation(df, per_adapter):
    return _get_correlation(
        df.copy(), per_adapter=per_adapter, src_threshold=0.0, name="weighted_spearman"
    )


def get_acc(df, per_adapter, N):
    def score_fn(x):
        return np.mean(-np.sort(-x[TARGET_ACCURACY].values)[:N])

    return _get_correlation(
        df.copy(),
        per_adapter=per_adapter,
        src_threshold=0.0,
        name=f"top_{N}_acc",
        score_fn=score_fn,
    )

In [5]:
N = 5
corr = get_correlation(df, per_adapter=False)
acc = get_acc(df, per_adapter=False, N=N)
print(len(corr), len(acc))

s = {"corr_with_noise": [], f"top_{N}_with_noise": []}
for scale in np.linspace(0, 1, 101):
    df_with_noise = add_noise(df, scale)
    corr_with_noise = get_correlation(df_with_noise, per_adapter=False)
    acc_with_noise = get_acc(df_with_noise, per_adapter=False, N=N)

    s1 = spearmanr(
        corr_with_noise["weighted_spearman"].values, corr["weighted_spearman"].values
    ).correlation
    s2 = spearmanr(
        acc_with_noise[f"top_{N}_acc"].values, acc[f"top_{N}_acc"].values
    ).correlation
    s["corr_with_noise"].append(s1)
    s[f"top_{N}_with_noise"].append(s2)
    print(scale, s1, s2)

74 74
