In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import mannwhitneyu
from observational_fear.stats import p_adjust, auc

from pathlib import Path

DATA_DIR = Path(r"F:\OFL\one-p experiment\data")
FIG_DIR = Path(r"C:\Users\roryl\repos\observational-fear\figs")
SAVE_DIR = DATA_DIR / "derived" / "decoding-aligned"

In [2]:
def compare_by_group(df, group_col, value_col):
    groups = df[group_col].unique()
    assert len(groups) == 2, "Too many groups"
    x = df[df[group_col] == groups[0]][value_col].values
    y = df[df[group_col] == groups[1]][value_col].values
    u, p = mannwhitneyu(x, y)
    return pd.Series(dict(u=u, p=p))


def compare_auc_at_intervals(df, auc_intervals):
    out = []
    for t_start, t_stop in auc_intervals:
        df_sub = df.loc[lambda x: (x.offset > t_start) & (x.offset < t_stop)]
        res = df_sub.groupby(["config", "shuffle"])["f1score"].apply(auc).reset_index()
        ser = compare_by_group(res, group_col="config", value_col="f1score")
        ser["t_start"] = t_start
        ser["t_stop"] = t_stop
        out.append(ser)
    return pd.DataFrame(out).iloc[:, ::-1]

def load_data(name):
    return pd.read_parquet(SAVE_DIR / f"{name}.parquet.gzip")


In [3]:

auc_intervals = [
    (-10, 10),
    (-10, 0),
    (-2.5, 2.5),
    (0, 10)
    ]

In [4]:
df = load_data("USD2").groupby(["config", "shuffle", "offset"], as_index=False)["f1score"].mean()
pval_ts = df.groupby(["offset"]).apply(compare_by_group, group_col="config", value_col="f1score").reset_index().assign(p=lambda x: p_adjust(x.p))
auc_res = compare_auc_at_intervals(df, auc_intervals)
pval_ts.to_csv(SAVE_DIR / "USD2 - p val time series.csv", index=False)
auc_res.to_csv(SAVE_DIR / "USD2 -AUC at intervals.csv", index=False)

In [5]:
df = load_data("USD4").groupby(["config", "shuffle", "offset"], as_index=False)["f1score"].mean()
pval_ts = df.groupby(["offset"]).apply(compare_by_group, group_col="config", value_col="f1score").reset_index().assign(p=lambda x: p_adjust(x.p))
auc_res = compare_auc_at_intervals(df, auc_intervals)
pval_ts.to_csv(SAVE_DIR / "USD4 - p val time series.csv", index=False)
auc_res.to_csv(SAVE_DIR / "USD4 -AUC at intervals.csv", index=False)

In [6]:
df = load_data("USCombined").groupby(["config", "shuffle", "offset"], as_index=False)["f1score"].mean()
pval_ts = df.groupby(["offset"]).apply(compare_by_group, group_col="config", value_col="f1score").reset_index().assign(p=lambda x: p_adjust(x.p))
auc_res = compare_auc_at_intervals(df, auc_intervals)
pval_ts.to_csv(SAVE_DIR / "USCombined - p val time series.csv", index=False)
auc_res.to_csv(SAVE_DIR / "USCombined -AUC at intervals.csv", index=False)

In [7]:
df = load_data("StartD2").groupby(["config", "shuffle", "offset"], as_index=False)["f1score"].mean()
pval_ts = df.groupby(["offset"]).apply(compare_by_group, group_col="config", value_col="f1score").reset_index().assign(p=lambda x: p_adjust(x.p))
auc_res = compare_auc_at_intervals(df, auc_intervals)
pval_ts.to_csv(SAVE_DIR / "StartD2 - p val time series.csv", index=False)
auc_res.to_csv(SAVE_DIR / "StartD2 -AUC at intervals.csv", index=False)

In [8]:
df = load_data("StopD2").groupby(["config", "shuffle", "offset"], as_index=False)["f1score"].mean()
pval_ts = df.groupby(["offset"]).apply(compare_by_group, group_col="config", value_col="f1score").reset_index().assign(p=lambda x: p_adjust(x.p))
auc_res = compare_auc_at_intervals(df, auc_intervals)
pval_ts.to_csv(SAVE_DIR / "StopD2 - p val time series.csv", index=False)
auc_res.to_csv(SAVE_DIR / "StopD2 -AUC at intervals.csv", index=False)

In [9]:
df = load_data("StartStopD2Combined").groupby(["config", "shuffle", "offset"], as_index=False)["f1score"].mean()
pval_ts = df.groupby(["offset"]).apply(compare_by_group, group_col="config", value_col="f1score").reset_index().assign(p=lambda x: p_adjust(x.p))
auc_res = compare_auc_at_intervals(df, auc_intervals)
pval_ts.to_csv(SAVE_DIR / "StartStopD2Combined - p val time series.csv", index=False)
auc_res.to_csv(SAVE_DIR / "StartStopD2Combined -AUC at intervals.csv", index=False)