In [None]:
## this block will conduct t-tests on the emotion label PCs
## based on the ROI (MD or amygdala), Holm corrected
## and return a graph showing the mean of peak and trough 
## of the ROI and whether the difference is significant

## NOTE: you'll have to use the "emotion_labels__PCA_and_inter-rater_reliability_git"
## to get the "portrayed_PC_Scores_labeled_by_movie" csv and the viewer csv

import matplotlib.pyplot as plt
from matplotlib.patches import Patch
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind
from statsmodels.stats.multitest import multipletests

## do the stats (t-test with holm-correction)

# 1. Read data
df = pd.read_csv("portrayed_PC_scores_labeled_by_movie.csv")

# 1b. Average rows in couplets (rows 0+1, 2+3, …)
#   – all numeric columns get mean()
#   – 'label' takes the first value in each pair
df = df.reset_index(drop=True)
# pick out numeric cols
num_cols = df.select_dtypes(include=[np.number]).columns
# group by integer division of the row index
grp = df.index // 2
# mean of numeric columns
df_num = df[num_cols].groupby(grp).mean()
# first label in each group
df_label = df['label'].groupby(grp).first()
# reassemble and reset index
df = pd.concat([df_label, df_num], axis=1).reset_index(drop=True)

# 2. Define contrasts and mappings
comparisons = [("AM_high", "AM_low"), ("MD_high", "MD_low")]
label_map = {
    "AM_high": "amygdala peak",
    "AM_low":  "amygdala trough",
    "MD_high": "MD peak",
    "MD_low":  "MD trough"
}
colors = {"AM": "#EDC001", "MD": "#A5B299"}

# 3. Run t-tests and collect p-values
results = []
raw_p = []
for hi, lo in comparisons:
    subset = df[df["label"].isin([hi, lo])]
    desc = subset.groupby("label")["PC1"].agg(["mean", "sem"]).reindex([hi, lo])
    t_stat, p_val = ttest_ind(
        subset[subset["label"] == hi]["PC1"],
        subset[subset["label"] == lo]["PC1"],
        equal_var=True, nan_policy="omit"
    )
    n1 = subset[subset["label"] == hi].shape[0]
    n2 = subset[subset["label"] == lo].shape[0]
    dfree = n1 + n2 - 2
    results.append({
        "hi": hi,
        "lo": lo,
        "desc": desc,
        "t": t_stat,
        "p_raw": p_val,
        "df": dfree
    })
    raw_p.append(p_val)

# 4. Holm correction
_, p_corr, _, _ = multipletests(raw_p, method="holm")
for r, pc in zip(results, p_corr):
    r["p_corr"] = pc

# 5. Print summary table
print("Contrast\tMean_hi±SEM\tMean_lo±SEM\tt-stat\t df\tp_corr")
for r in results:
    hi, lo = r["hi"], r["lo"]
    desc, t_stat, p_corr, dfree = r["desc"], r["t"], r["p_corr"], r["df"]
    mean_hi, sem_hi = desc.loc[hi, "mean"], desc.loc[hi, "sem"]
    mean_lo, sem_lo = desc.loc[lo, "mean"], desc.loc[lo, "sem"]
    print(f"{hi} vs {lo}\t"
          f"{mean_hi:.3f}±{sem_hi:.3f}\t"
          f"{mean_lo:.3f}±{sem_lo:.3f}\t"
          f"{t_stat:.3f}\t"
          f"{dfree}\t"
          f"{p_corr:.4g}")

# 6. Significance stars helper
def stars(p):
    if p < 0.001: return "***"
    if p < 0.01:  return "**"
    if p < 0.05:  return "*"
    return "ns"


# 7. Make the combined figure (uniform fonts, shared y‐axis, generic x‐labels, legend at top right)

# First: compute global y‐limits so both panels match
lower_limits = []
upper_limits = []
for r in results:
    desc = r["desc"]
    y_top = (desc["mean"] + desc["sem"]).max()
    y_bot = (desc["mean"] - desc["sem"]).min()
    y_range = y_top - y_bot

    lower = y_bot - 0.1 * y_range
    upper = y_top + 0.05 * y_range + 0.3 * y_range
    lower_limits.append(lower)
    upper_limits.append(upper)

global_lower = min(lower_limits)
global_upper = max(upper_limits)

fig, axes = plt.subplots(1, 2, figsize=(14, 7), constrained_layout=True)

for idx, (ax, r) in enumerate(zip(axes, results)):
    hi, lo = r["hi"], r["lo"]
    desc, pc = r["desc"], r["p_corr"]
    grp = hi.split("_")[0]   # "AM" or "MD"
    x = np.arange(2)

    # draw bars + errorbars
    ax.bar(
        x, desc["mean"], yerr=desc["sem"],
        width=0.6,
        color=colors[grp],
        edgecolor="black",
        capsize=5
    )
    ax.axhline(0, color="gray", linestyle="--", linewidth=1)

    # bracket & stars
    y_top = (desc["mean"] + desc["sem"]).max()
    y_bot = (desc["mean"] - desc["sem"]).min()
    y_range = y_top - y_bot
    bracket_y = y_top + 0.05 * y_range
    h = 0.02 * y_range

    ax.plot(
        [0, 0, 1, 1],
        [bracket_y, bracket_y + h, bracket_y + h, bracket_y],
        color="black", lw=1.2
    )
    ax.text(
        0.5, bracket_y + h * 1.2,
        stars(pc),
        ha="center", va="bottom", fontsize=14
    )
    ax.text(
        0.5 + 0.1,
        bracket_y + h * 1.2,
        f"p = {pc:.2g}",
        ha="left", va="bottom", fontsize=14
    )

    # set the shared y‐limits
    ax.set_ylim(global_lower, global_upper)

    # X/Y labels and ticks, all fontsize=14
    ax.set_xticks(x)
    ax.set_xticklabels(["peak", "trough"], fontsize=14)
    ax.set_xlabel("Group", fontsize=14)
    if idx == 0:
        ax.set_ylabel("Mean PC1", fontsize=14)
    else:
        ax.set_ylabel("")  # remove y‐label on panel b

    # Remove top/right spines (no box)
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)

    # panel letter (a/b)
    letter = "a" if idx == 0 else "b"
    ax.text(-0.05, 1.02, letter, transform=ax.transAxes, fontsize=14)

# Add a single patch legend at the top right (Amygdala = yellow, MD = green)
legend_handles = [
    Patch(color=colors["AM"], label="Amygdala"),
    Patch(color=colors["MD"], label="MD")
]
fig.legend(
    handles=legend_handles,
    loc="upper right",
    bbox_to_anchor=(0.98, 0.95),
    fontsize=14,
    frameon=False
)

# Significance key (below x‐axis), fontsize=14
fig.text(
    0.5, -0.04,
    "ns: p ≥ 0.05    *: p < 0.05    **: p < 0.01    ***: p < 0.001",
    ha="center", fontsize=14
)

plt.show()
