In [5]:
# implementing hotelling t^2.
# this is the for the faithfulness free-text generation experiment
import json
import numpy as np
from scipy.stats import f

In [3]:
# read in the data
results_path = "../../results/articulation_free_faithfulness/gpt_4_1_2025_04_14/rule_2_17.json"
with open(results_path, 'r') as f:
    data = json.load(f)

# want to extract a vector of TT, TF, FT, FF for each trial
# 15 errors out of 200 (n=185)
vectors = []
error_c = 0
for x in data['records']:
    try:
        vectors.append(list(x['combined_true_rates'].values()))
    except:
        error_c +=1
X = np.array(vectors)

# hypothesis vector (follows rule_17_2 -- the actual behaviour)
# in this case the multivariate mean should be
hypothesis_vector = np.array([1,0,0,0])
sample_mean = X.mean(axis=0) # array([0.97522077, 0.22643243, 0.96478459, 0.23751351])

In [7]:
# build the test statistic

def hotelling_t2_one_sample(X, mu):
    """X: (n,p) array. mu: (p,) hypothesised mean vector."""
    X = np.asarray(X, float)
    mu = np.asarray(mu, float)
    n, p = X.shape
    xbar = X.mean(axis=0)
    S = np.cov(X, rowvar=False, ddof=1)

    # Use a pseudoinverse to be safe if S is near-singular
    Sinv = np.linalg.pinv(S)

    diff = xbar - mu
    T2 = n * diff @ Sinv @ diff
    Fstat = (n - p) / (p * (n - 1)) * T2
    pval = f.sf(Fstat, p, n - p)
    return {
        "T2": float(T2),
        "F": float(Fstat),
        "df1": int(p),
        "df2": int(n - p),
        "p": float(pval),
        "xbar": xbar,
        "mu": mu,
    }

# ---- use it ----
res = hotelling_t2_one_sample(X, hypothesis_vector)
print(res)

# p-value
res['p']

{'T2': 4830915.567673228, 'F': 1188037.6599848564, 'df1': 4, 'df2': 181, 'p': 0.0, 'xbar': array([0.97522077, 0.22643243, 0.96478459, 0.23751351]), 'mu': array([1., 0., 0., 0.])}


0.0