In [105]:
import pandas as pd
import numpy as np
import os

In [106]:
results_dir = "Analyzer Results"

output_dir = "Tester Results"

recording_order = [15, 2, 1, 6, 10, 4]

strength_metrics = ['peakValue', 'RMS', 'tau', 'AUC']

reaction_metrics = ['reactionTime', 'peakTime', 'difference']

In [107]:
os.makedirs(output_dir, exist_ok=True)

dfs = {}

files = [file for file in os.listdir(results_dir) if file.endswith(('.xlsx', '.xls'))]

for file in files:
    file_path = os.path.join(results_dir, file)
    dfs[file.split('.')[0]] = pd.read_excel(file_path)


print(list(dfs.keys()))
print(f"Found {len(dfs)} dataframes in {results_dir}.")

experiments = dfs[list(dfs.keys())[1]]['experiment'].unique().tolist()
variables = dfs[list(dfs.keys())[1]].columns[4:].tolist()
parameters = dfs[list(dfs.keys())[1]].columns[:4].tolist()
print(f"\nFound {len(experiments)} experiments, {len(variables)} variables and {len(parameters)} parameters:")
print(" "+', '.join(experiments))
print(" "+', '.join(variables))
print(" "+', '.join(parameters))
print(f"\nStrength metrics are {', '.join(strength_metrics)}, reaction metrics are {', '.join(reaction_metrics)}.")

['RESULTS', 'RESULTS_MERGED', 'RESULTS_MERGED_DATE', 'RESULTS_MERGED_EXP', 'RESULTS_MTT', 'RESULTS_MTT_MERGED', 'RESULTS_MTT_MERGED_DATE', 'RESULTS_MTT_MERGED_EXP', 'RESULTS_TT', 'RESULTS_TT_MERGED', 'RESULTS_TT_MERGED_DATE', 'RESULTS_TT_MERGED_EXP']
Found 12 dataframes in Analyzer Results.

Found 18 experiments, 7 variables and 4 parameters:
 ASR_control, gap_depth, tone_in_noise, gap_duration_4, gap_duration_8, gap_duration_10, gap_duration_20, gap_duration_50, offset_PPI_4, offset_PPI_6, offset_PPI_8, offset_PPI_10, offset_PPI_12, offset_PPI_14, offset_PPI_16, offset_PPI_18, offset_PPI_20, offset_PPI_50
 reactionTime, peakTime, difference, peakValue, RMS, tau, AUC
 animal, sex, date, experiment

Strength metrics are peakValue, RMS, tau, AUC, reaction metrics are reactionTime, peakTime, difference.


---

Expectations:
- date fluctuations
- no repetition differences
- sex differences in strength metrics
- experiment differences, in particular with increased gap / offset

---

## Top Ten

In [108]:
# ...existing code...

# Compare all variables between RESULTS_MTT_MERGED and RESULTS_TT_MERGED (parametric for strength metrics, non-parametric for reaction metrics), split by sex and date
from scipy.stats import mannwhitneyu, ttest_ind
import pandas as pd
import scikit_posthocs as sp

df_mtt = dfs['RESULTS_MTT_MERGED']
df_tt = dfs['RESULTS_TT_MERGED']

results = []
for sex in ['male', 'female']:
    for date in df_mtt['date'].unique():
        df_mtt_sex_date = df_mtt[(df_mtt['sex'] == sex) & (df_mtt['date'] == date)]
        df_tt_sex_date = df_tt[(df_tt['sex'] == sex) & (df_tt['date'] == date)]
        for var in variables:
            vals_mtt = df_mtt_sex_date[var].dropna()
            vals_tt = df_tt_sex_date[var].dropna()
            if len(vals_mtt) > 1 and len(vals_tt) > 1:
                mean_diff = vals_mtt.mean() - vals_tt.mean()
                if var in strength_metrics:
                    # Parametric t-test for strength metrics
                    stat, p = ttest_ind(vals_mtt, vals_tt, equal_var=False)
                    test_type = "t-test"
                    # Cohen's d effect size
                    pooled_std = ((vals_mtt.std(ddof=1) ** 2 + vals_tt.std(ddof=1) ** 2) / 2) ** 0.5
                    effect_strength = mean_diff / pooled_std if pooled_std > 0 else None
                    # Post hoc: Tukey HSD
                    try:
                        from statsmodels.stats.multicomp import pairwise_tukeyhsd
                        combined = pd.concat([vals_mtt, vals_tt])
                        group = ['MTT'] * len(vals_mtt) + ['TT'] * len(vals_tt)
                        tukey = pairwise_tukeyhsd(combined, group)
                        posthoc_p = tukey.pvalues[0] if len(tukey.pvalues) > 0 else None
                        posthoc_test = "Tukey HSD"
                    except Exception:
                        posthoc_p = None
                        posthoc_test = "Tukey HSD"
                else:
                    # Non-parametric Mann-Whitney U for reaction metrics
                    stat, p = mannwhitneyu(vals_mtt, vals_tt)
                    test_type = "mannwhitneyu"
                    # Effect strength (rank-biserial)
                    u, _ = mannwhitneyu(vals_mtt, vals_tt, alternative='two-sided')
                    n1, n2 = len(vals_mtt), len(vals_tt)
                    effect_strength = 1 - (2 * u) / (n1 * n2)
                    # Post hoc: Dunn's test
                    try:
                        data = pd.DataFrame({var: pd.concat([vals_mtt, vals_tt]),
                                            'group': ['MTT'] * len(vals_mtt) + ['TT'] * len(vals_tt)})
                        dunn = sp.posthoc_dunn(data, val_col=var, group_col='group', p_adjust='bonferroni')
                        posthoc_p = dunn.loc['MTT', 'TT']
                        posthoc_test = "Dunn"
                    except Exception:
                        posthoc_p = None
                        posthoc_test = "Dunn"
                if p < 0.05:
                    results.append({
                        'sex': sex,
                        'date': date,
                        'variable': var,
                        'stat': stat,
                        'p': p,
                        'test': test_type,
                        'posthoc_p': posthoc_p,
                        'posthoc_test': posthoc_test,
                        'effect_strength': effect_strength,
                        'mean_diff': mean_diff
                    })

test_top_ten = pd.DataFrame(results)
print(test_top_ten)

test_top_ten.to_excel(os.path.join(output_dir, "TEST_TOP_TEN.xlsx"), index=False)
# ...existing code...

      sex     date      variable        stat         p          test  \
0  female  April16  reactionTime  914.500000  0.003310  mannwhitneyu   
1  female  April16      peakTime  941.500000  0.000362  mannwhitneyu   
2  female   June26           tau    2.710889  0.010070        t-test   
3  female    May20      peakTime  883.500000  0.007059  mannwhitneyu   
4  female    May20           AUC   -2.844176  0.007101        t-test   

   posthoc_p posthoc_test  effect_strength  mean_diff  
0   0.003251         Dunn        -0.411265   0.766667  
1   0.000354         Dunn        -0.452932   0.924074  
2   0.003721    Tukey HSD         0.686849   0.114580  
3   0.006939         Dunn        -0.363426  -0.296296  
4   0.003340    Tukey HSD        -0.719183  -0.125304  


---

## Average Reaction Time

In [109]:
# Calculate IQR bounds
df = dfs['RESULTS_MTT_MERGED']
vals = df['reactionTime'].dropna()

q1 = vals.quantile(0.25)
q3 = vals.quantile(0.75)
iqr = q3 - q1
lower = q1 - 1.5 * iqr
upper = q3 + 1.5 * iqr

filtered = vals[(vals >= lower) & (vals <= upper)]

min_val = filtered.min()
max_val = filtered.max()
print(f"Average reaction time (excluding outliers): min = {min_val:.2f}, max = {max_val:.2f}")

# Save result to file named by the result
filename = f"RT_iqr_{min_val:.2f}-{max_val:.2f}.xlsx"
iqr_df = pd.DataFrame({'min_reaction_time': [min_val], 'max_reaction_time': [max_val]})
iqr_df.to_excel(os.path.join(output_dir, filename), index=False)

Average reaction time (excluding outliers): min = 8.00, max = 13.60


---

## Repetition Differences

In [110]:
# ...existing code...

# Test if the value changes over repetitions (trial order) for each variable in RESULTS_MTT (non-parametric)
import ast
from scipy.stats import kruskal

def test_repetition_effect(df, variables, max_reps=5, alpha=0.05):
    results = []
    for var in variables:
        # Convert string lists to actual lists if needed
        vals = df[var].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) and x.startswith('[') else x)
        # Filter to rows that are lists and have enough length
        list_rows = vals[vals.apply(lambda x: isinstance(x, list) and len(x) > 1)]
        if list_rows.empty:
            continue
        # Find the minimum length across all lists (to avoid index errors)
        min_len = min(list_rows.apply(len))
        min_len = min(min_len, max_reps)
        # Gather values by repetition index
        rep_groups = []
        for i in range(min_len):
            group = list_rows.apply(lambda x: x[i] if len(x) > i else np.nan).dropna()
            if len(group) > 1:
                rep_groups.append(group.values)
        if len(rep_groups) < 2:
            continue
        # Kruskal-Wallis test across repetitions
        stat, p = kruskal(*rep_groups)
        if p < alpha:
            mean_diff = np.mean(rep_groups[0]) - np.mean(rep_groups[-1])
            results.append({'variable': var, 'stat': stat, 'p': p, 'test': 'kruskal', 'mean_diff': mean_diff})
    return pd.DataFrame(results)

repetition_effects = test_repetition_effect(dfs['RESULTS_MTT'], variables)
test_repetition = pd.DataFrame(repetition_effects)

if not repetition_effects.empty:
    print(test_repetition)
    test_repetition.to_excel(os.path.join(output_dir, "TEST_REPETITION.xlsx"), index=False)
else:
    print("No significant repetition effects found for any variable.")

No significant repetition effects found for any variable.


### ---> merge across repetitions

---

## Date Differences

In [111]:
# ...existing code...

import pandas as pd
from scipy.stats import ttest_ind, mannwhitneyu, f_oneway, kruskal
import scikit_posthocs as sp

df = dfs['RESULTS_MTT_MERGED']
strength_metrics = ['peakValue', 'RMS', 'tau', 'AUC']
reaction_metrics = ['reactionTime', 'peakTime', 'difference']

date_results = []

for sex in df['sex'].unique():
    df_sex = df[df['sex'] == sex]
    for var in strength_metrics:
        # Parametric ANOVA for strength metrics
        groups = [group[var].dropna().values for _, group in df_sex.groupby('date')]
        groups = [g for g in groups if len(g) > 1]
        if len(groups) > 1:
            stat, p = f_oneway(*groups)
            posthoc_p, posthoc_test, date1, date2, eff, mean_diff = None, None, None, None, None, None
            if p < 0.05:
                try:
                    tukey = sp.posthoc_tukey_hsd(df_sex, val_col=var, group_col='date')
                    min_p = tukey.replace(0, float('nan')).min().min()
                    idx = tukey.stack().idxmin()
                    date1, date2 = idx
                    vals1 = df_sex[df_sex['date'] == date1][var].dropna()
                    vals2 = df_sex[df_sex['date'] == date2][var].dropna()
                    pooled_std = ((vals1.std(ddof=1) ** 2 + vals2.std(ddof=1) ** 2) / 2) ** 0.5
                    eff = (vals1.mean() - vals2.mean()) / pooled_std if pooled_std > 0 else None
                    mean_diff = vals1.mean() - vals2.mean()
                    posthoc_p = min_p
                    posthoc_test = "Tukey HSD"
                except Exception:
                    posthoc_p, posthoc_test, mean_diff = None, "Tukey HSD", None
        else:
            stat, p, posthoc_p, posthoc_test, date1, date2, eff, mean_diff = None, None, None, None, None, None, None, None
        date_results.append({
            'sex': sex,
            'variable': var,
            'stat': stat,
            'p': p,
            'significant': p is not None and p < 0.05,
            'posthoc_p': posthoc_p,
            'posthoc_test': posthoc_test,
            'date1': date1,
            'date2': date2,
            'effect_strength': eff,
            'mean_diff': mean_diff
        })
    for var in reaction_metrics:
        # Non-parametric Kruskal-Wallis for reaction metrics
        groups = [group[var].dropna().values for _, group in df_sex.groupby('date')]
        groups = [g for g in groups if len(g) > 1]
        if len(groups) > 1:
            stat, p = kruskal(*groups)
            posthoc_p, posthoc_test, date1, date2, eff, mean_diff = None, None, None, None, None, None
            if p < 0.05:
                try:
                    dunn = sp.posthoc_dunn(df_sex, val_col=var, group_col='date', p_adjust='bonferroni')
                    min_p = dunn.replace(0, float('nan')).min().min()
                    idx = dunn.stack().idxmin()
                    date1, date2 = idx
                    vals1 = df_sex[df_sex['date'] == date1][var].dropna()
                    vals2 = df_sex[df_sex['date'] == date2][var].dropna()
                    u, _ = mannwhitneyu(vals1, vals2, alternative='two-sided')
                    n1, n2 = len(vals1), len(vals2)
                    eff = 1 - (2 * u) / (n1 * n2)
                    mean_diff = vals1.mean() - vals2.mean()
                    posthoc_p = min_p
                    posthoc_test = "Dunn"
                except Exception:
                    posthoc_p, posthoc_test, mean_diff = None, "Dunn", None
        else:
            stat, p, posthoc_p, posthoc_test, date1, date2, eff, mean_diff = None, None, None, None, None, None, None, None
        date_results.append({
            'sex': sex,
            'variable': var,
            'stat': stat,
            'p': p,
            'significant': p is not None and p < 0.05,
            'posthoc_p': posthoc_p,
            'posthoc_test': posthoc_test,
            'date1': date1,
            'date2': date2,
            'effect_strength': eff,
            'mean_diff': mean_diff
        })

test_date = pd.DataFrame(date_results)
print(test_date)

test_date.to_excel(os.path.join(output_dir, "TEST_DATE.xlsx"), index=False)
# ...existing code...

       sex      variable       stat             p  significant     posthoc_p  \
0   female     peakValue  56.469557  2.955105e-19         True  7.570389e-12   
1   female           RMS  68.692063  3.152127e-22         True  9.869439e-12   
2   female           tau   2.956243  5.488011e-02        False           NaN   
3   female           AUC  97.016611  2.882260e-28         True  1.908584e-12   
4   female  reactionTime  43.708603  3.226989e-10         True  3.220414e-10   
5   female      peakTime   6.602514  3.683684e-02         True  4.982239e-02   
6   female    difference  41.644566  9.057270e-10         True  4.913810e-08   
7     male     peakValue  16.828890  2.347149e-07         True  2.560890e-06   
8     male           RMS   5.830719  3.600036e-03         True  2.767602e-03   
9     male           tau   6.269768  2.393950e-03         True  1.987673e-03   
10    male           AUC   3.505178  3.238356e-02         True  2.962492e-02   
11    male  reactionTime   6.049538  4.8

## Date Effects on Metrics (Grouped by Sex)

- **Females**
  - Significant date effects for: `peakValue`, `RMS`, `AUC`, `reactionTime`, `peakTime`, `difference`
  - Largest differences are between **April16** and **June26**
  - Effect strengths are very large (e.g., peakValue: -2.28)
  - `tau` is not significant

- **Males**
  - Significant date effects for: `peakValue`, `RMS`, `tau`, `AUC`, `reactionTime`, `peakTime`
  - Most pronounced for `peakValue`, `RMS`, `tau`, `AUC` (April16 vs June26)
  - Effect strengths are moderate to large (e.g., peakValue: -0.87)
  - `difference` is not significant

**Interpretation:**  
- Date (batch/day) strongly impacts most metrics for both sexes.
- Effect strengths are large, especially for strength metrics.
- Always control for date in analysis; batch effects can overshadow experimental manipulations.

---

In [112]:
import pandas as pd
import statsmodels.formula.api as smf

df = dfs['RESULTS_MTT_MERGED']
strength_metrics = ['peakValue', 'RMS', 'tau', 'AUC']

glm_results = []
for metric in strength_metrics:
    # Remove rows with missing values for predictors or metric
    sub_df = df[['experiment', 'sex', 'date', metric]].dropna()
    # Fit GLM (ordinary least squares) including date
    model = smf.ols(f"{metric} ~ C(experiment) + C(sex) + C(date)", data=sub_df).fit()
    # Collect p-values for predictors
    for predictor, pval in model.pvalues.items():
        glm_results.append({
            "metric": metric,
            "predictor": predictor,
            "p_value": pval
        })

test_glm = pd.DataFrame(glm_results)
test_glm_table = test_glm.pivot(index="metric", columns="predictor", values="p_value")
print("GLM p-values for strength metrics (predictors: experiment, sex, date):")
print(test_glm_table)

# Save both the raw and pivoted results to files
test_glm.to_excel(os.path.join(output_dir, "TEST_GLM.xlsx"), index=False)
test_glm_table.to_excel(os.path.join(output_dir, "TEST_GLM_TABLE.xlsx"))

GLM p-values for strength metrics (predictors: experiment, sex, date):
predictor  C(date)[T.June26]  C(date)[T.May20]  C(experiment)[T.gap_depth]  \
metric                                                                       
AUC             9.352379e-18      7.317571e-06                    0.923026   
RMS             3.523026e-18      4.026825e-09                    0.894243   
peakValue       1.182816e-21      5.227890e-16                    0.880205   
tau             5.429501e-02      1.102856e-04                    0.846302   

predictor  C(experiment)[T.gap_duration_10]  C(experiment)[T.gap_duration_20]  \
metric                                                                          
AUC                                0.732574                          0.826593   
RMS                                0.519635                          0.565744   
peakValue                          0.405871                          0.457347   
tau                                0.783219            

## GLM Summary: Effects of Experiment, Sex, and Date

- **Date effects:**  
  - **AUC, RMS, peakValue:** Extremely strong date effects (June26, May20; p < 1e-5).
  - **tau:** Moderate date effect (significant for May20, borderline for June26).

- **Sex effects:**  
  - **AUC, RMS, peakValue, tau:** All show highly significant sex differences (male vs. female; p < 1e-18).

- **Experiment effects:**  
  - **tone_in_noise:** Significant for AUC (p = 0.0088), RMS (p = 0.0011), peakValue (p = 0.00083).
  - **Other experiments (gap durations, offset_PPI, etc.):** No significant effects (p > 0.05).

**Summary:**  
- The variables most affected are **AUC, RMS, peakValue** (by date, sex, and tone_in_noise experiment).
- **tau** is mainly affected by sex and date.
- Other experimental manipulations do **not** significantly alter strength metrics.

In [113]:
# ...existing code...

import pandas as pd
from scipy.stats import ttest_ind, mannwhitneyu
import scikit_posthocs as sp

df = dfs['RESULTS_MTT_MERGED']
strength_metrics = ['peakValue', 'RMS', 'tau', 'AUC']
reaction_metrics = ['reactionTime', 'peakTime', 'difference']

sex_diff_results = []

for date in df['date'].unique():
    df_date = df[df['date'] == date]
    for var in strength_metrics:
        vals_male = df_date[df_date['sex'] == 'male'][var].dropna()
        vals_female = df_date[df_date['sex'] == 'female'][var].dropna()
        mean_diff = vals_male.mean() - vals_female.mean() if len(vals_male) > 0 and len(vals_female) > 0 else None
        if len(vals_male) > 1 and len(vals_female) > 1:
            stat, p = ttest_ind(vals_male, vals_female, equal_var=False)
            pooled_std = ((vals_male.std(ddof=1) ** 2 + vals_female.std(ddof=1) ** 2) / 2) ** 0.5
            eff = mean_diff / pooled_std if pooled_std > 0 else None
            # Tukey HSD posthoc
            try:
                from statsmodels.stats.multicomp import pairwise_tukeyhsd
                combined = pd.concat([vals_male, vals_female])
                group = ['male'] * len(vals_male) + ['female'] * len(vals_female)
                tukey = pairwise_tukeyhsd(combined, group)
                posthoc_p = tukey.pvalues[0] if len(tukey.pvalues) > 0 else None
                posthoc_test = "Tukey HSD"
            except Exception:
                posthoc_p = None
                posthoc_test = "Tukey HSD"
        else:
            stat, p, eff, posthoc_p, posthoc_test = None, None, None, None, None
        sex_diff_results.append({
            'date': date,
            'variable': var,
            'stat': stat,
            'p': p,
            'significant': p is not None and p < 0.05,
            'effect_strength': eff,
            'posthoc_p': posthoc_p,
            'posthoc_test': posthoc_test,
            'mean_diff': mean_diff,
            'test': 't-test'
        })
    for var in reaction_metrics:
        vals_male = df_date[df_date['sex'] == 'male'][var].dropna()
        vals_female = df_date[df_date['sex'] == 'female'][var].dropna()
        mean_diff = vals_male.mean() - vals_female.mean() if len(vals_male) > 0 and len(vals_female) > 0 else None
        if len(vals_male) > 1 and len(vals_female) > 1:
            stat, p = mannwhitneyu(vals_male, vals_female, alternative='two-sided')
            n1, n2 = len(vals_male), len(vals_female)
            u, _ = mannwhitneyu(vals_male, vals_female, alternative='two-sided')
            eff = 1 - (2 * u) / (n1 * n2)
            # Dunn's posthoc
            try:
                data = pd.DataFrame({var: pd.concat([vals_male, vals_female]),
                                    'group': ['male'] * len(vals_male) + ['female'] * len(vals_female)})
                dunn = sp.posthoc_dunn(data, val_col=var, group_col='group', p_adjust='bonferroni')
                posthoc_p = dunn.loc['male', 'female']
                posthoc_test = "Dunn"
                # Calculate Dunn's d (rank-biserial for posthoc)
                vals1 = data[data['group'] == 'male'][var].dropna()
                vals2 = data[data['group'] == 'female'][var].dropna()
                if len(vals1) > 1 and len(vals2) > 1:
                    u_post, _ = mannwhitneyu(vals1, vals2, alternative='two-sided')
                    n1_post, n2_post = len(vals1), len(vals2)
                    dunn_d = 1 - (2 * u_post) / (n1_post * n2_post)
                else:
                    dunn_d = None
            except Exception:
                posthoc_p = None
                posthoc_test = "Dunn"
                dunn_d = None
        else:
            stat, p, eff, posthoc_p, posthoc_test, dunn_d = None, None, None, None, None, None
        sex_diff_results.append({
            'date': date,
            'variable': var,
            'stat': stat,
            'p': p,
            'significant': p is not None and p < 0.05,
            'effect_strength': eff,
            'posthoc_p': posthoc_p,
            'posthoc_test': posthoc_test,
            'dunn_d': dunn_d,
            'mean_diff': mean_diff,
            'test': 'mannwhitneyu'
        })

test_sex = pd.DataFrame(sex_diff_results)
print(test_sex)

test_sex.to_excel(os.path.join(output_dir, "TEST_SEX.xlsx"), index=False)
# ...existing code...

       date      variable         stat             p  significant  \
0   April16     peakValue    16.746598  1.037408e-25         True   
1   April16           RMS    15.345657  1.345198e-23         True   
2   April16           tau    -6.609417  3.943414e-09         True   
3   April16           AUC    12.856188  5.834794e-20         True   
4   April16  reactionTime   574.500000  4.731508e-08         True   
5   April16      peakTime  1702.000000  1.222350e-01        False   
6   April16    difference  2104.000000  6.944884e-05         True   
7    June26     peakValue     7.270643  1.342305e-10         True   
8    June26           RMS     4.571715  1.471229e-05         True   
9    June26           tau    -6.935457  1.277682e-09         True   
10   June26           AUC     2.071514  4.099126e-02         True   
11   June26  reactionTime  1575.500000  4.525481e-01        False   
12   June26      peakTime  1183.500000  8.026984e-02        False   
13   June26    difference  1277.50

### Summary of Sex Differences by Date

- **Strength metrics** (peakValue, RMS, tau, AUC) show **large and highly significant sex differences** across all dates, with males generally having higher values except for tau (where males are lower).
- **Effect strengths** for strength metrics are very large (often >1), indicating robust differences.
- **Reaction metrics** (reactionTime, peakTime, difference) show **smaller and less consistent sex differences**. Some are significant (e.g., reactionTime and difference on April16, reactionTime and peakTime on May20), but most are not, especially on June26.
- **Post hoc tests** (Tukey HSD for strength metrics, Dunn's for reaction metrics) confirm the primary results and provide adjusted p-values.
- **Interpretation:** Sex differences are strong for strength metrics and moderate or absent for reaction metrics. The magnitude and significance of these differences can vary by date.

---

## Recording Order Differences

In [114]:
# ...existing code...

import pandas as pd
from scipy.stats import ttest_ind, mannwhitneyu
import scikit_posthocs as sp

df = dfs['RESULTS_MTT_MERGED']
strength_metrics = ['peakValue', 'RMS', 'tau', 'AUC']
reaction_metrics = ['reactionTime', 'peakTime', 'difference']

recording_order = [15, 2, 1, 6, 10, 4]
order_map = {f'Animal{num}': i+1 for i, num in enumerate(recording_order)}

results = []

for sex in df['sex'].unique():
    for date in df['date'].unique():
        df_sub = df[(df['sex'] == sex) & (df['date'] == date)].copy()
        df_sub['recording_order'] = df_sub['animal'].map(order_map)
        df_sub = df_sub[df_sub['recording_order'].notnull()]
        median_order = df_sub['recording_order'].median()
        df_sub['group'] = ['early' if o <= median_order else 'late' for o in df_sub['recording_order']]
        for metric in strength_metrics:
            vals_early = df_sub[df_sub['group'] == 'early'][metric].dropna()
            vals_late = df_sub[df_sub['group'] == 'late'][metric].dropna()
            mean_diff = vals_early.mean() - vals_late.mean() if len(vals_early) > 0 and len(vals_late) > 0 else None
            if len(vals_early) > 1 and len(vals_late) > 1:
                # Parametric t-test
                stat, p = ttest_ind(vals_early, vals_late, equal_var=False)
                # Effect strength (Cohen's d)
                pooled_std = ((vals_early.std(ddof=1) ** 2 + vals_late.std(ddof=1) ** 2) / 2) ** 0.5
                effect_strength = mean_diff / pooled_std if pooled_std > 0 else None
                # Post hoc: Tukey HSD
                try:
                    combined = pd.concat([vals_early, vals_late])
                    group = ['early'] * len(vals_early) + ['late'] * len(vals_late)
                    tukey = sp.posthoc_tukey_hsd(pd.DataFrame({metric: combined, 'group': group}), val_col=metric, group_col='group')
                    posthoc_p = tukey.loc['early', 'late']
                except Exception:
                    posthoc_p = None
                results.append({
                    'sex': sex,
                    'date': date,
                    'metric': metric,
                    'test': 't-test',
                    'stat': stat,
                    'p': p,
                    'effect_strength': effect_strength,
                    'posthoc_p': posthoc_p,
                    'posthoc_test': 'Tukey HSD',
                    'significant': p < 0.05,
                    'mean_diff': mean_diff
                })
        for metric in reaction_metrics:
            vals_early = df_sub[df_sub['group'] == 'early'][metric].dropna()
            vals_late = df_sub[df_sub['group'] == 'late'][metric].dropna()
            mean_diff = vals_early.mean() - vals_late.mean() if len(vals_early) > 0 and len(vals_late) > 0 else None
            if len(vals_early) > 1 and len(vals_late) > 1:
                # Non-parametric Mann-Whitney U
                stat, p = mannwhitneyu(vals_early, vals_late, alternative='two-sided')
                n1, n2 = len(vals_early), len(vals_late)
                u, _ = mannwhitneyu(vals_early, vals_late, alternative='two-sided')
                effect_strength = 1 - (2 * u) / (n1 * n2)
                # Post hoc: Dunn's test
                try:
                    data = pd.DataFrame({metric: pd.concat([vals_early, vals_late]),
                                        'group': ['early'] * len(vals_early) + ['late'] * len(vals_late)})
                    dunn = sp.posthoc_dunn(data, val_col=metric, group_col='group', p_adjust='bonferroni')
                    posthoc_p = dunn.loc['early', 'late']
                except Exception:
                    posthoc_p = None
                results.append({
                    'sex': sex,
                    'date': date,
                    'metric': metric,
                    'test': 'mannwhitneyu',
                    'stat': stat,
                    'p': p,
                    'effect_strength': effect_strength,
                    'posthoc_p': posthoc_p,
                    'posthoc_test': 'Dunn',
                    'significant': p < 0.05,
                    'mean_diff': mean_diff
                })

test_rec_order = pd.DataFrame(results)
print(test_rec_order)

test_rec_order.to_excel(os.path.join(output_dir, "TEST_REC_ORDER.xlsx"), index=False)
# ...existing code...

       sex     date        metric          test        stat             p  \
0   female  April16     peakValue        t-test   -1.420735  1.617757e-01   
1   female  April16           RMS        t-test   -2.561296  1.429029e-02   
2   female  April16           tau        t-test   -2.920564  5.344195e-03   
3   female  April16           AUC        t-test   -3.382671  1.812942e-03   
4   female  April16  reactionTime  mannwhitneyu  469.000000  7.113408e-03   
5   female  April16      peakTime  mannwhitneyu  103.500000  1.624793e-05   
6   female  April16    difference  mannwhitneyu   94.000000  2.060878e-05   
7   female   June26     peakValue        t-test   -2.040056  4.700479e-02   
8   female   June26           RMS        t-test   -2.602420  1.231007e-02   
9   female   June26           tau        t-test   -7.012162  4.782149e-09   
10  female   June26           AUC        t-test   -3.727794  5.218556e-04   
11  female   June26  reactionTime  mannwhitneyu  479.500000  3.827184e-03   

## Summary: Does Recording Order Impact Metrics? (Grouped by Date & Sex)

### **Strength Metrics (Parametric t-test, Cohen's d, Tukey HSD post hoc)**
- **Significant effects of recording order** (p < 0.05, Tukey HSD post hoc also significant) are seen for:
    - **Females:** RMS, tau, AUC (April16 & June26); peakValue (June26)
    - **Males:** peakValue, RMS, tau, AUC (April16, June26, May20)
- **Effect strengths (Cohen's d)** are moderate to very large (e.g., RMS: 1.7, AUC: 2.5, tau: 1.9, peakValue: 1.3–7.4).
- **Interpretation:** Recording order can strongly impact strength metrics, especially in males and on June26.

### **Reaction Metrics (Mann-Whitney U, Rank-biserial, Dunn post hoc)**
- **Significant effects** for:
    - **Females:** reactionTime, peakTime, difference (April16 & June26)
    - **Males:** reactionTime, peakTime, difference (June26)
- **Effect strengths** (rank-biserial) are moderate to large (e.g., peakTime/difference: ~0.7–0.8).
- **Interpretation:** Recording order also impacts reaction metrics, but effects are less consistent than for strength metrics.

### **General Notes**
- **Post hoc tests** (Tukey HSD for strength, Dunn for reaction) confirm most significant findings.
- **Effect strengths** are often large, indicating robust differences between early and late recordings.
- **Some metrics/dates/sexes show no significant effect** (e.g., May20/female, tau in male/June26).


**Conclusion:**  
Recording order can significantly affect both strength and reaction metrics, with large effect sizes and consistent post hoc support, especially in males and on certain dates. Always control for recording order in analysis.

In [115]:
# Show direction of recording order effect for significant results

import pandas as pd

df = dfs['RESULTS_MTT_MERGED']
strength_metrics = ['peakValue', 'RMS', 'tau', 'AUC']
reaction_metrics = ['reactionTime', 'peakTime', 'difference']
recording_order = [15, 2, 1, 6, 10, 4]
order_map = {f'Animal{num}': i+1 for i, num in enumerate(recording_order)}

summary_rows = []

for sex in df['sex'].unique():
    for date in df['date'].unique():
        df_sub = df[(df['sex'] == sex) & (df['date'] == date)].copy()
        df_sub['recording_order'] = df_sub['animal'].map(order_map)
        df_sub = df_sub[df_sub['recording_order'].notnull()]
        median_order = df_sub['recording_order'].median()
        df_sub['group'] = ['early' if o <= median_order else 'late' for o in df_sub['recording_order']]
        for metric in strength_metrics + reaction_metrics:
            vals_early = df_sub[df_sub['group'] == 'early'][metric].dropna()
            vals_late = df_sub[df_sub['group'] == 'late'][metric].dropna()
            if len(vals_early) > 1 and len(vals_late) > 1:
                mean_early = vals_early.mean()
                mean_late = vals_late.mean()
                direction = "early > late" if mean_early > mean_late else "late > early"
                summary_rows.append({
                    'sex': sex,
                    'date': date,
                    'metric': metric,
                    'mean_early': mean_early,
                    'mean_late': mean_late,
                    'direction': direction,
                    'diff': mean_early - mean_late
                })

test_rec_order_direction = pd.DataFrame(summary_rows)
print(test_rec_order_direction)

test_rec_order_direction.to_excel(os.path.join(output_dir, "TEST_REC_ORDER_DIRECTION.xlsx"), index=False)

       sex     date        metric  mean_early  mean_late     direction  \
0   female  April16     peakValue    0.110367   0.133918  late > early   
1   female  April16           RMS    0.106122   0.154921  late > early   
2   female  April16           tau    0.664748   0.735505  late > early   
3   female  April16           AUC    0.110491   0.186752  late > early   
4   female  April16  reactionTime   11.719444  11.277778  early > late   
5   female  April16      peakTime   30.058333  30.655556  late > early   
6   female  April16    difference   18.338889  19.377778  late > early   
7   female   June26     peakValue    0.395391   0.486508  late > early   
8   female   June26           RMS    0.446902   0.581448  late > early   
9   female   June26           tau    0.629340   0.816857  late > early   
10  female   June26           AUC    0.510251   0.701457  late > early   
11  female   June26  reactionTime   10.482407   9.459259  early > late   
12  female   June26      peakTime   31

## Summary: Direction of Recording Order Effects

- **Females:**  
  - For all dates, strength metrics (`peakValue`, `RMS`, `tau`, `AUC`) are **higher in late recordings** (`late > early`).
  - Reaction metrics (`reactionTime`, `peakTime`, `difference`) are mostly **higher in late recordings**, except `reactionTime` (which is higher in early recordings).
  - The effect is consistent: **late recordings tend to have higher values** for most metrics.

- **Males:**  
  - For all dates and all metrics, **early recordings have higher values** (`early > late`), except for `reactionTime` on June26 (where late is higher).
  - The effect is strong and consistent: **early recordings show higher strength and reaction metrics**.

- **Magnitude:**  
  - The difference (`diff`) between early and late groups is often substantial, especially for males (e.g., peakValue, RMS, AUC, difference).

**Interpretation:**  
- **Recording order has a clear directional effect:**  
  - **Females:** Metrics increase with recording order (late > early).
  - **Males:** Metrics decrease with recording order (early > late).
- **This effect is robust across dates and metrics.**
- **Always control for recording order in analysis, as it can confound experimental results.**

### ---> peakTime higher and reactionTime lower with less strength?

In [116]:
# ...existing code...

import pandas as pd
from scipy.stats import pearsonr, spearmanr

df = dfs['RESULTS_MTT_MERGED']
strength_metrics = ['peakValue', 'RMS', 'tau', 'AUC']
reaction_metrics = ['peakTime', 'reactionTime']

correlation_results = []

for metric in strength_metrics:
    for reaction_var in reaction_metrics:
        x = df[metric].dropna()
        y = df[reaction_var].dropna()
        # Align indices to avoid mismatches
        common_idx = x.index.intersection(y.index)
        x_aligned = x.loc[common_idx]
        y_aligned = y.loc[common_idx]
        if len(x_aligned) > 2:
            # Pearson correlation
            pearson_r, pearson_p = pearsonr(x_aligned, y_aligned)
            # Spearman correlation
            spearman_r, spearman_p = spearmanr(x_aligned, y_aligned)
            mean_diff = x_aligned.mean() - y_aligned.mean()
            correlation_results.append({
                'strength_metric': metric,
                'reaction_metric': reaction_var,
                'pearson_r': pearson_r,
                'pearson_p': pearson_p,
                'spearman_r': spearman_r,
                'spearman_p': spearman_p,
                'n': len(x_aligned),
                'mean_diff': mean_diff
            })

test_peakTime_reactionTime_to_strength_cor = pd.DataFrame(correlation_results)
print("Correlation between strength metrics and peakTime/reactionTime:")
print(test_peakTime_reactionTime_to_strength_cor)

test_peakTime_reactionTime_to_strength_cor.to_excel(os.path.join(output_dir, "TEST_PEAKTIME_REACTIONTIME_TO_STRENGTH.xlsx"), index=False)
# ...existing code...

Correlation between strength metrics and peakTime/reactionTime:
  strength_metric reaction_metric  pearson_r     pearson_p  spearman_r  \
0       peakValue        peakTime   0.087235  1.170776e-01    0.025790   
1       peakValue    reactionTime  -0.400215  6.811440e-14   -0.536682   
2             RMS        peakTime   0.149436  7.047380e-03    0.077919   
3             RMS    reactionTime  -0.403854  3.849695e-14   -0.538670   
4             tau        peakTime   0.181496  1.032215e-03    0.160016   
5             tau    reactionTime   0.117104  3.511889e-02    0.136935   
6             AUC        peakTime   0.153613  5.591734e-03    0.081657   
7             AUC    reactionTime  -0.385134  6.737467e-13   -0.505347   

     spearman_p    n  mean_diff  
0  6.437244e-01  324 -30.073451  
1  1.449731e-25  324 -10.122217  
2  1.617386e-01  324 -30.075950  
3  8.901558e-26  324 -10.124715  
4  3.879662e-03  324 -30.037172  
5  1.362777e-02  324 -10.085938  
6  1.424866e-01  324 -30.081894

### Correlation between Strength Metrics and PeakTime/ReactionTime

| Strength Metric | Reaction Metric | Pearson r | Pearson p | Spearman r | Spearman p |   n   |
|-----------------|----------------|-----------|-----------|------------|------------|-------|
| peakValue       | peakTime       |  0.087    |  0.117    |   0.026    |   0.644    |  324  |
| peakValue       | reactionTime   | -0.400    | 6.8e-14   |  -0.537    | 1.4e-25    |  324  |
| RMS             | peakTime       |  0.149    | 0.007     |   0.078    |  0.162     |  324  |
| RMS             | reactionTime   | -0.404    | 3.8e-14   |  -0.539    | 8.9e-26    |  324  |
| tau             | peakTime       |  0.181    | 0.001     |   0.160    |  0.004     |  324  |
| tau             | reactionTime   |  0.117    | 0.035     |   0.137    |  0.014     |  324  |
| AUC             | peakTime       |  0.154    | 0.006     |   0.082    |  0.143     |  324  |
| AUC             | reactionTime   | -0.385    | 6.7e-13   |  -0.505    | 2.1e-22    |  324  |

**Interpretation:**
- **PeakTime:** Weak positive correlations with all strength metrics; only tau is significant for Spearman (p = 0.004).
- **ReactionTime:** Moderate negative correlations with peakValue, RMS, and AUC (Pearson r ≈ -0.4, Spearman r ≈ -0.5, p < 1e-12), indicating higher strength is associated with shorter reaction time.
- **tau:** Shows weak positive correlation with reactionTime (Pearson r = 0.12, Spearman r = 0.14, p < 0.05).
- **Summary:** ReactionTime is more strongly and consistently (negatively) correlated with strength metrics than peakTime.

### ---> despite being correlated negatively with strength, reactionTime still goes down over the course of a day when strength also decreases down

---

## Experiment Differences

In [117]:
# ...existing code...

import pandas as pd
from scipy.stats import f_oneway, kruskal, mannwhitneyu
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import scikit_posthocs as sp

df = dfs['RESULTS_MTT_MERGED']
strength_metrics = ['peakValue', 'RMS', 'tau', 'AUC']
reaction_metrics = ['reactionTime', 'peakTime', 'difference']

results = []

for sex in df['sex'].unique():
    for date in df['date'].unique():
        df_sub = df[(df['sex'] == sex) & (df['date'] == date)]
        # Parametric ANOVA for strength metrics
        for metric in strength_metrics:
            groups = [df_sub[df_sub['experiment'] == exp][metric].dropna() for exp in df_sub['experiment'].unique()]
            groups = [g for g in groups if len(g) > 1]
            mean_diff = None
            if len(groups) > 1:
                stat, p = f_oneway(*groups)
                posthoc_p, posthoc_test, exp1, exp2, eff = None, None, None, None, None
                if p < 0.05:
                    try:
                        tukey = pairwise_tukeyhsd(df_sub[metric].dropna(), df_sub['experiment'][df_sub[metric].notna()])
                        min_p = tukey.pvalues.min()
                        idx = tukey.pvalues.argmin()
                        exp1, exp2 = tukey.groupsunique[tukey._multicomp.pairindices[idx][0]], tukey.groupsunique[tukey._multicomp.pairindices[idx][1]]
                        vals1 = df_sub[df_sub['experiment'] == exp1][metric].dropna()
                        vals2 = df_sub[df_sub['experiment'] == exp2][metric].dropna()
                        pooled_std = ((vals1.std(ddof=1) ** 2 + vals2.std(ddof=1) ** 2) / 2) ** 0.5
                        eff = (vals1.mean() - vals2.mean()) / pooled_std if pooled_std > 0 else None
                        mean_diff = vals1.mean() - vals2.mean()
                        posthoc_p = min_p
                        posthoc_test = "Tukey HSD"
                    except Exception:
                        posthoc_p, posthoc_test, mean_diff = None, "Tukey HSD", None
                results.append({
                    'sex': sex,
                    'date': date,
                    'metric': metric,
                    'test': 'ANOVA',
                    'stat': stat,
                    'p': p,
                    'significant': p < 0.05,
                    'posthoc_p': posthoc_p,
                    'posthoc_test': posthoc_test,
                    'exp1': exp1,
                    'exp2': exp2,
                    'effect_strength': eff,
                    'mean_diff': mean_diff
                })
        # Non-parametric Kruskal-Wallis for reaction metrics
        for metric in reaction_metrics:
            groups = [df_sub[df_sub['experiment'] == exp][metric].dropna() for exp in df_sub['experiment'].unique()]
            groups = [g for g in groups if len(g) > 1]
            mean_diff = None
            if len(groups) > 1:
                stat, p = kruskal(*groups)
                posthoc_p, posthoc_test, exp1, exp2, eff = None, None, None, None, None
                if p < 0.05:
                    try:
                        dunn = sp.posthoc_dunn(df_sub, val_col=metric, group_col='experiment', p_adjust='bonferroni')
                        min_p = dunn.replace(0, float('nan')).min().min()
                        idx = dunn.stack().idxmin()
                        exp1, exp2 = idx
                        vals1 = df_sub[df_sub['experiment'] == exp1][metric].dropna()
                        vals2 = df_sub[df_sub['experiment'] == exp2][metric].dropna()
                        u, _ = mannwhitneyu(vals1, vals2, alternative='two-sided')
                        n1, n2 = len(vals1), len(vals2)
                        eff = 1 - (2 * u) / (n1 * n2)
                        mean_diff = vals1.mean() - vals2.mean()
                        posthoc_p = min_p
                        posthoc_test = "Dunn"
                    except Exception:
                        posthoc_p, posthoc_test, mean_diff = None, "Dunn", None
                results.append({
                    'sex': sex,
                    'date': date,
                    'metric': metric,
                    'test': 'Kruskal-Wallis',
                    'stat': stat,
                    'p': p,
                    'significant': p < 0.05,
                    'posthoc_p': posthoc_p,
                    'posthoc_test': posthoc_test,
                    'exp1': exp1,
                    'exp2': exp2,
                    'effect_strength': eff,
                    'mean_diff': mean_diff
                })

test_experiment = pd.DataFrame(results)
print("Experiment effects on metrics (parametric for strength, non-parametric for reaction metrics):")
print(test_experiment)

test_experiment.to_excel(os.path.join(output_dir, "TEST_EXPERIMENT.xlsx"), index=False)
# ...existing code...

Experiment effects on metrics (parametric for strength, non-parametric for reaction metrics):
       sex     date        metric            test       stat         p  \
0   female  April16     peakValue           ANOVA   0.980462  0.498748   
1   female  April16           RMS           ANOVA   1.164230  0.339109   
2   female  April16           tau           ANOVA   0.577098  0.886888   
3   female  April16           AUC           ANOVA   1.098563  0.391711   
4   female  April16  reactionTime  Kruskal-Wallis  20.457419  0.251504   
5   female  April16      peakTime  Kruskal-Wallis   9.130543  0.936077   
6   female  April16    difference  Kruskal-Wallis  11.613911  0.822932   
7   female   June26     peakValue           ANOVA   0.784332  0.698134   
8   female   June26           RMS           ANOVA   0.721425  0.761322   
9   female   June26           tau           ANOVA   0.343029  0.989380   
10  female   June26           AUC           ANOVA   0.632730  0.842861   
11  female   June2

## Experiment Effects on Metrics (Grouped by Sex and Date)

**Summary Table:**  
No significant experiment effects were found for any metric (all p > 0.05) when grouping by sex and date.

| Sex    | Date    | Metric        | Test            | Stat      | p-value   | Significant |
|--------|---------|--------------|-----------------|-----------|-----------|-------------|
| female | April16 | peakValue    | ANOVA           | 0.98      | 0.50      | False       |
| female | April16 | RMS          | ANOVA           | 1.16      | 0.34      | False       |
| ...    | ...     | ...          | ...             | ...       | ...       | ...         |
| male   | May20   | difference   | Kruskal-Wallis  | 15.05     | 0.59      | False       |

**Interpretation:**
- Across all combinations of sex and date, **none of the strength metrics (peakValue, RMS, tau, AUC) nor reaction metrics (reactionTime, peakTime, difference) showed significant differences between experiments**.
- All p-values are much greater than 0.05, indicating **no experiment effect** on these metrics after controlling for sex and date.
- **Post hoc tests** were not performed since no primary test was significant.
- **Conclusion:**  
  - Experimental manipulations (e.g., gap durations, offset_PPI, tone_in_noise) do **not** significantly alter strength or reaction metrics when sex and date are controlled.
  - **Date and sex effects are much stronger than experiment effects** in this dataset.

---