In [None]:
## this block will conduct t-tests on the emotion label PCs
## based on the ROI (MD or amygdala), Holm corrected
## and return a graph showing the mean of peak and trough 
## of the ROI and whether the difference is significant

## NOTE: you'll have to use the "emotion_labels__PCA_and_inter-rater_reliability_git"
## to get the "portrayed_PC_Scores_labeled_by_movie" csv

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind

# 1.  Read data 
df = pd.read_csv("portrayed_PC_scores_labeled_by_movie.csv")


# 2.  Define the two PC2 contrasts we care about
comparisons = [("AM_high", "AM_low"),
               ("MD_high", "MD_low")]

# 3.  Run both t-tests and collect results
results = []
raw_pvals = []

for hi, lo in comparisons:
    subset = df[df["label"].isin([hi, lo])]

    # NOTE: use PC2 here
    desc = subset.groupby("label")["PC3"].agg(["mean", "sem"]).reindex([hi, lo])

    hi_vals = subset[subset["label"] == hi]["PC3"]
    lo_vals = subset[subset["label"] == lo]["PC3"]
    t_stat, p_val = ttest_ind(hi_vals, lo_vals, equal_var=True, nan_policy="omit")

    results.append({"hi": hi, "lo": lo,
                    "desc": desc,
                    "t": t_stat, "p_raw": p_val})
    raw_pvals.append(p_val)


# 4.  Multiple-comparison correction
try:
    from statsmodels.stats.multitest import multipletests
    method = "holm"              # or 'bonferroni', 'fdr_bh', …
    _, p_corr, _, _ = multipletests(raw_pvals, method=method)
except ImportError:
    method = "bonferroni"
    m = len(raw_pvals)
    p_corr = [min(p * m, 1.0) for p in raw_pvals]

for r, pc in zip(results, p_corr):
    r["p_corr"] = pc

print(f"Using {method} correction for {len(raw_pvals)} PC2 contrasts\n")
for r in results:
    print(f"{r['hi']} vs {r['lo']}:  "
          f"t = {r['t']:.3f},  p_raw = {r['p_raw']:.4g},  p_corr = {r['p_corr']:.4g}")

# 5.  Helper for significance stars
def stars(p):
    if p < 0.001: return "***"
    if p < 0.01:  return "**"
    if p < 0.05:  return "*"
    return "ns"


# 6.  Plot each comparison (bars, error bars, bracket, stars)
for r in results:
    hi, lo = r["hi"], r["lo"]
    desc   = r["desc"]

    fig, ax = plt.subplots(figsize=(8, 6))
    x = np.arange(2)

    ax.bar(x,
           desc["mean"],
           yerr=desc["sem"],
           width=0.6,
           #color="#A5B299",
           color='#A5B299',
           capsize=5,
           edgecolor="black")
    ax.axhline(0, color="gray", linestyle="--", linewidth=1)

    offset = (desc["mean"] + desc["sem"]).max() * 0.10
    h      = offset * 0.25
    y_base = max(desc.loc[hi, "mean"] + desc.loc[hi, "sem"],
                 desc.loc[lo, "mean"] + desc.loc[lo, "sem"])
    y = y_base + offset

    ax.plot([0, 0, 1, 1], [y, y+h, y+h, y], color="black", lw=1.2)
    ax.text(0.5, y + h + offset * 0.05, stars(r["p_corr"]),
            ha="center", va="bottom", fontsize=12)

    ax.set_xticks(x)
    ax.set_xticklabels([hi, lo], fontsize=12)
    ax.set_ylabel("Mean PC3", fontsize=14)
    ax.set_xlabel("Group", fontsize=14)
    ax.set_title(f"Mean Portrayed PC3 ± SEM: {hi} vs {lo}\n"
                 f"(p = {r['p_corr']:.4g}, {method}-corrected)",
                 fontsize=16)
    plt.tight_layout()
    plt.show()
