In [2]:
import numpy as np
import pandas as pd
from scipy.stats import norm, chi2_contingency
import os

In [3]:
def dprime(hit_rate, fa_rate, n_trials=112):
    """Compute d′ from hit and false alarm rates with correction for 0/1."""
    # Correction to avoid inf z-scores
    if hit_rate == 1: hit_rate = 1 - 1/(2*n_trials)
    if hit_rate == 0: hit_rate = 1/(2*n_trials)
    if fa_rate == 1:  fa_rate  = 1 - 1/(2*n_trials)
    if fa_rate == 0:  fa_rate  = 1/(2*n_trials)
    return norm.ppf(hit_rate) - norm.ppf(fa_rate)

def chi_square_from_rates(hit_rate, fa_rate, n_trials=112):
    """Run chi test comparing detection vs. guessing."""
    n_signal = n_noise = n_trials / 2
    obs = np.array([
        [hit_rate * n_signal, (1 - hit_rate) * n_signal],
        [fa_rate  * n_noise,  (1 - fa_rate)  * n_noise]
    ])
    chi2, p, _, _ = chi2_contingency(obs)
    return chi2, p

def signif_marker(p):
    """Return asterisk for significance."""
    if p < 0.001: return "**"
    elif p < 0.05: return "*"
    else: return ""

# Example data from dahaene et al.)
prime_durations = [0, 29, 43, 57, 114, 200]
hit_rates = [0.286, 0.402, 0.491, 0.464, 0.786, 0.955]
fa_rates  = [0.348, 0.321, 0.411, 0.304, 0.286, 0.161]

rows = []
for dur, h, f in zip(prime_durations, hit_rates, fa_rates):
    dp = dprime(h, f, n_trials=112)
    chi2, p = chi_square_from_rates(h, f, n_trials=112)
    rows.append({
        "Prime duration (ms)": dur,
        "Hit rate (%)": f"{h*100:.1f}",
        "False alarms (%)": f"{f*100:.1f}",
        "d′": f"{dp:.2f}{signif_marker(p)}",
        "χ²": f"{chi2:.2f}",
        "p": f"{p:.4g}"
    })

df = pd.DataFrame(rows)
print(df.to_string(index=False))


 Prime duration (ms) Hit rate (%) False alarms (%)     d′    χ²         p
                   0         28.6             34.8  -0.17  0.25    0.6157
                  29         40.2             32.1   0.22  0.48    0.4868
                  43         49.1             41.1   0.20  0.44    0.5087
                  57         46.4             30.4   0.42  2.39     0.122
                 114         78.6             28.6 1.36** 26.17 3.124e-07
                 200         95.5             16.1 2.69** 68.39 1.342e-16


In [5]:
def concat_csvs(file_list):
    return pd.concat(file_list, ignore_index=True)

df_list = []
for file in os.listdir("../experiment/control_experiment/data_detection"):
    if file.endswith(".csv"):
        print(f"Loading {file}")
        df_list.append(pd.read_csv(os.path.join("../experiment/control_experiment/data_detection", file)))
df = concat_csvs(df_list)
df

Loading 88776655_001_2025-11-21_10-26-58_detection.csv
Loading 234567_001_2025-11-19_08-25-31_detection.csv
Loading 99999_001_2025-11-20_13-11-42_detection.csv
Loading 1133445_001_2025-11-22_15-56-50_detection.csv


Unnamed: 0,signal_type,compression_level,prime_file,correct_word,babbling_file,mask1_file,mask2_file,practice.thisRepN,practice.thisTrialN,practice.thisN,...,thisRow.t,notes,response_key,rt,trial_outcome,babble_duration,Participant ID,Session,date,Unnamed: 24
0,present,0.1,røver_compressed.wav,røver,babble_72.wav,smerte_reversed.wav,kujon_reversed.wav,0.0,0.0,0.0,...,,,z,11.636342,Miss,1.026219,88776655,1,2025-11-21_10-26-58,
1,absent,0.4,SILENCE,,babble_20.wav,konge_reversed.wav,bygning_reversed.wav,0.0,1.0,1.0,...,,,z,1.758976,Correct Rejection,0.975350,88776655,1,2025-11-21_10-26-58,
2,absent,0.5,SILENCE,,babble_77.wav,illoyal_reversed.wav,oprørt_reversed.wav,0.0,2.0,2.0,...,,,z,1.073361,Correct Rejection,1.385329,88776655,1,2025-11-21_10-26-58,
3,present,0.5,latter_compressed.wav,latter,babble_78.wav,kage_reversed.wav,rotte_reversed.wav,0.0,3.0,3.0,...,,,z,8.832755,Miss,0.833028,88776655,1,2025-11-21_10-26-58,
4,present,0.6,ørken_compressed.wav,ørken,babble_88.wav,skyldig_reversed.wav,museum_reversed.wav,0.0,4.0,4.0,...,,,m,2.972196,Hit,1.007454,88776655,1,2025-11-21_10-26-58,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
891,absent,0.1,SILENCE,,babble_56.wav,rigdom_reversed.wav,viden_reversed.wav,,,,...,,,z,1.079766,Correct Rejection,0.975602,1133445,1,2025-11-22_15-56-50,
892,absent,0.7,SILENCE,,babble_33.wav,rigdom_reversed.wav,motor_reversed.wav,,,,...,,,z,0.746001,Correct Rejection,1.158097,1133445,1,2025-11-22_15-56-50,
893,absent,0.5,SILENCE,,babble_10.wav,nonne_reversed.wav,lampe_reversed.wav,,,,...,,,z,0.416677,Correct Rejection,0.500414,1133445,1,2025-11-22_15-56-50,
894,absent,0.6,SILENCE,,babble_81.wav,utro_reversed.wav,smerte_reversed.wav,,,,...,,,z,0.342045,Correct Rejection,0.755871,1133445,1,2025-11-22_15-56-50,


In [6]:
def compute_rate_results(df):
    list_of_compression_rates = df["compression_level"].unique()
    results = pd.DataFrame()
    for rate in list_of_compression_rates:
        df_rate = df[df["compression_level"] == rate]
        hit_rate = len(df_rate[df_rate["trial_outcome"] == "Hit"]) / len(df_rate)
        false_alarm_rate = len(df_rate[df_rate["trial_outcome"] == "False Alarm"]) / len(df_rate)
        d_prime = dprime(hit_rate, false_alarm_rate, n_trials=len(df_rate))
        chi2, p = chi_square_from_rates(hit_rate, false_alarm_rate, n_trials=len(df_rate))
        # create df with rate on rows and d', chi2, p as columns
        new_row = pd.DataFrame({
            "compression_rate": [rate],
            "d_prime": [f"{d_prime:.2f}{signif_marker(p)}"],
            "chi2": [f"{chi2:.2f}"],
            "p": [f"{p:.3f}"]
        })
        results = pd.concat([results, new_row], ignore_index=True)
    results["compression_rate"] = results["compression_rate"].astype(float)
    results = results.sort_values("compression_rate").reset_index(drop=True)
    return results

results = compute_rate_results(df)
results

Unnamed: 0,compression_rate,d_prime,chi2,p
0,0.1,1.14**,12.35,0.0
1,0.2,0.92*,8.96,0.003
2,0.3,0.95*,7.82,0.005
3,0.4,0.48,2.0,0.157
4,0.5,0.56,2.14,0.143
5,0.6,0.13,0.02,0.886
6,0.7,0.56,2.35,0.125
