In [13]:
import numpy as np
import pandas as pd
from scipy.stats import norm, chi2_contingency
import os

In [14]:
def dprime(hit_rate, fa_rate, n_trials=100):
    """Compute d′ from hit and false alarm rates with correction for 0/1."""
    # Correction to avoid inf z-scores
    if hit_rate == 1: hit_rate = 1 - 1/(2*n_trials)
    if hit_rate == 0: hit_rate = 1/(2*n_trials)
    if fa_rate == 1:  fa_rate  = 1 - 1/(2*n_trials)
    if fa_rate == 0:  fa_rate  = 1/(2*n_trials)
    return norm.ppf(hit_rate) - norm.ppf(fa_rate)

def chi_square_from_rates(hit_rate, fa_rate, n_trials=112):
    """Run chi test comparing detection vs. guessing."""
    n_signal = n_noise = n_trials / 2
    obs = np.array([
        [hit_rate * n_signal, (1 - hit_rate) * n_signal],
        [fa_rate  * n_noise,  (1 - fa_rate)  * n_noise]
    ])
    chi2, p, _, _ = chi2_contingency(obs)
    return chi2, p

def signif_marker(p):
    """Return asterisk for significance."""
    if p < 0.001: return "**"
    elif p < 0.05: return "*"
    else: return ""

# Example data from dahaene et al.)
prime_durations = [0, 29, 43, 57, 114, 200]
hit_rates = [0.286, 0.402, 0.491, 0.464, 0.786, 0.955]
fa_rates  = [0.348, 0.321, 0.411, 0.304, 0.286, 0.161]

rows = []
for dur, h, f in zip(prime_durations, hit_rates, fa_rates):
    dp = dprime(h, f, n_trials=112)
    chi2, p = chi_square_from_rates(h, f, n_trials=112)
    rows.append({
        "Prime duration (ms)": dur,
        "Hit rate (%)": f"{h*100:.1f}",
        "False alarms (%)": f"{f*100:.1f}",
        "d′": f"{dp:.2f}{signif_marker(p)}",
        "χ²": f"{chi2:.2f}",
        "p": f"{p:.4g}"
    })

df = pd.DataFrame(rows)
print(df.to_string(index=False))


 Prime duration (ms) Hit rate (%) False alarms (%)     d′    χ²         p
                   0         28.6             34.8  -0.17  0.25    0.6157
                  29         40.2             32.1   0.22  0.48    0.4868
                  43         49.1             41.1   0.20  0.44    0.5087
                  57         46.4             30.4   0.42  2.39     0.122
                 114         78.6             28.6 1.36** 26.17 3.124e-07
                 200         95.5             16.1 2.69** 68.39 1.342e-16


In [21]:
## Process data
import pandas as pd
df = pd.read_csv("../experiment/control_experiment/data_detection/ny_data/johan_new_001_2025-11-16_23-36-49_detection.csv")
df.head()

Unnamed: 0,signal_type,compression_level,prime_file,correct_word,babbling_file,mask1_file,mask2_file,practice.thisRepN,practice.thisTrialN,practice.thisN,...,thisRow.t,notes,response_key,rt,trial_outcome,babble_duration,Participant ID,Session,date,Unnamed: 24
0,absent,0.2,SILENCE,,babble_43.wav,pervers_reversed.wav,skuffe_reversed.wav,0.0,0.0,0.0,...,,,z,3.409997,Correct Rejection,1.437216,johan_new,1,2025-11-16_23-36-49,
1,absent,0.6,SILENCE,,babble_18.wav,fjendtlig_reversed.wav,bange_reversed.wav,0.0,1.0,1.0,...,,,z,0.874813,Correct Rejection,0.868399,johan_new,1,2025-11-16_23-36-49,
2,present,0.7,redskab_compressed.wav,redskab,babble_47.wav,beskidt_reversed.wav,konkurs_reversed.wav,0.0,2.0,2.0,...,,,z,2.022793,Miss,0.510702,johan_new,1,2025-11-16_23-36-49,
3,absent,0.3,SILENCE,,babble_35.wav,grusom_reversed.wav,enhed_reversed.wav,0.0,3.0,3.0,...,,,z,0.605991,Correct Rejection,1.196351,johan_new,1,2025-11-16_23-36-49,
4,present,0.6,gade_compressed.wav,gade,babble_46.wav,tortur_reversed.wav,mave_reversed.wav,0.0,4.0,4.0,...,,,m,0.501148,Hit,0.814466,johan_new,1,2025-11-16_23-36-49,


In [None]:
def compute_rate_results(df):
    list_of_compression_rates = df["compression_level"].unique()
    results = pd.DataFrame()
    for rate in list_of_compression_rates:
        df_rate = df[df["compression_level"] == rate]
        hit_rate = len(df_rate[df_rate["trial_outcome"] == "Hit"]) / len(df_rate)
        false_alarm_rate = len(df_rate[df_rate["trial_outcome"] == "False Alarm"]) / len(df_rate)
        d_prime = dprime(hit_rate, false_alarm_rate, n_trials=len(df_rate))
        chi2, p = chi_square_from_rates(hit_rate, false_alarm_rate, n_trials=len(df_rate))
        # create df with rate on rows and d', chi2, p as columns
        new_row = pd.DataFrame({
            "compression_rate": [rate],
            "d_prime": [f"{d_prime:.2f}{signif_marker(p)}"],
            "chi2": [f"{chi2:.2f}"],
            "p": [f"{p:.3f}"]
        })
        results = pd.concat([results, new_row], ignore_index=True)
    results["compression_rate"] = results["compression_rate"].astype(float)
    results = results.sort_values("compression_rate").reset_index(drop=True)
    return results

results = compute_rate_results(df)
results



Unnamed: 0,compression_rate,d_prime,chi2,p
0,0.1,1.84*,5.13,0.024
1,0.2,1.92*,5.84,0.016
2,0.3,1.37,2.69,0.101
3,0.4,1.84*,5.13,0.024
4,0.5,1.67,3.79,0.051
5,0.6,1.14,0.98,0.323
6,0.7,1.48,2.57,0.109


In [17]:
d_prime = dprime(hit_rate, false_alarm_rate, n_trials=len(df))
chi2, p = chi_square_from_rates(hit_rate, false_alarm_rate, n_trials=len(df))

print(d_prime, chi2, p)

2.6061418949976667 54.615815556992025 1.4655147410831456e-13


In [18]:
def concat_csvs(file_list):
    return pd.concat(file_list, ignore_index=True)

df_list = []
for file in os.listdir("../experiment/control_experiment/data_detection/ny_data"):
    if file.endswith(".csv"):
        print(f"Loading {file}")
        df_list.append(pd.read_csv(os.path.join("../experiment/control_experiment/data_detection/ny_data", file)))
df = concat_csvs(df_list)
df

Loading johan_new_001_2025-11-16_23-36-49_detection.csv


Unnamed: 0,signal_type,compression_level,prime_file,correct_word,babbling_file,mask1_file,mask2_file,practice.thisRepN,practice.thisTrialN,practice.thisN,...,thisRow.t,notes,response_key,rt,trial_outcome,babble_duration,Participant ID,Session,date,Unnamed: 24
0,absent,0.2,SILENCE,,babble_43.wav,pervers_reversed.wav,skuffe_reversed.wav,0.0,0.0,0.0,...,,,z,3.409997,Correct Rejection,1.437216,johan_new,1,2025-11-16_23-36-49,
1,absent,0.6,SILENCE,,babble_18.wav,fjendtlig_reversed.wav,bange_reversed.wav,0.0,1.0,1.0,...,,,z,0.874813,Correct Rejection,0.868399,johan_new,1,2025-11-16_23-36-49,
2,present,0.7,redskab_compressed.wav,redskab,babble_47.wav,beskidt_reversed.wav,konkurs_reversed.wav,0.0,2.0,2.0,...,,,z,2.022793,Miss,0.510702,johan_new,1,2025-11-16_23-36-49,
3,absent,0.3,SILENCE,,babble_35.wav,grusom_reversed.wav,enhed_reversed.wav,0.0,3.0,3.0,...,,,z,0.605991,Correct Rejection,1.196351,johan_new,1,2025-11-16_23-36-49,
4,present,0.6,gade_compressed.wav,gade,babble_46.wav,tortur_reversed.wav,mave_reversed.wav,0.0,4.0,4.0,...,,,m,0.501148,Hit,0.814466,johan_new,1,2025-11-16_23-36-49,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
219,absent,0.5,SILENCE,,babble_40.wav,røver_reversed.wav,albue_reversed.wav,,,,...,,,z,0.629058,Correct Rejection,1.012694,johan_new,1,2025-11-16_23-36-49,
220,absent,0.2,SILENCE,,babble_39.wav,fremskridt_reversed.wav,forfald_reversed.wav,,,,...,,,z,0.745249,Correct Rejection,0.735691,johan_new,1,2025-11-16_23-36-49,
221,absent,0.7,SILENCE,,babble_24.wav,taber_reversed.wav,metal_reversed.wav,,,,...,,,z,0.428590,Correct Rejection,1.461272,johan_new,1,2025-11-16_23-36-49,
222,absent,0.6,SILENCE,,babble_38.wav,brænde_reversed.wav,kæle_reversed.wav,,,,...,,,z,0.654537,Correct Rejection,1.271758,johan_new,1,2025-11-16_23-36-49,
