In [43]:
import pandas as pd
import numpy as np
import os

In [44]:
results_dir = "Analyzer Results"

output_dir = "Tester Results"

recording_order = [15, 2, 1, 6, 10, 4]

In [45]:
# ...existing code...

dfs = {}
dfs_fit = {}

files = [file for file in os.listdir(results_dir) if file.endswith(('.xlsx', '.xls'))]

for file in files:
    file_path = os.path.join(results_dir, file)
    key = file.split('.')[0]
    if key.endswith('_FIT'):
        dfs_fit[key] = pd.read_excel(file_path)
    else:
        dfs[key] = pd.read_excel(file_path)

print("Raw dataframes:", list(dfs.keys()))
print("Fit dataframes:", list(dfs_fit.keys()))
print(f"Found {len(dfs)} raw and {len(dfs_fit)} fit dataframes in {results_dir}.")
if len(dfs) != len(dfs_fit):
    print("Warning: Mismatch between raw and fit dataframes!")

experiments = dfs[list(dfs.keys())[1]]['experiment'].unique().tolist()
variables = dfs[list(dfs.keys())[1]].columns[4:].tolist()
parameters = dfs[list(dfs.keys())[1]].columns[:4].tolist()
print(f"\nFound {len(experiments)} experiments, {len(variables)} variables and {len(parameters)} parameters:")
print(" "+', '.join(experiments))
print(" "+', '.join(variables))
print(" "+', '.join(parameters))

Raw dataframes: ['RESULTS', 'RESULTS_MERGED', 'RESULTS_MERGED_DATE', 'RESULTS_MERGED_EXP', 'RESULTS_MTT', 'RESULTS_MTT_MERGED', 'RESULTS_MTT_MERGED_DATE', 'RESULTS_MTT_MERGED_EXP', 'RESULTS_TT', 'RESULTS_TT_MERGED', 'RESULTS_TT_MERGED_DATE', 'RESULTS_TT_MERGED_EXP']
Fit dataframes: ['RESULTS_FIT', 'RESULTS_MERGED_DATE_FIT', 'RESULTS_MERGED_EXP_FIT', 'RESULTS_MERGED_FIT', 'RESULTS_MTT_FIT', 'RESULTS_MTT_MERGED_DATE_FIT', 'RESULTS_MTT_MERGED_EXP_FIT', 'RESULTS_MTT_MERGED_FIT', 'RESULTS_TT_FIT', 'RESULTS_TT_MERGED_DATE_FIT', 'RESULTS_TT_MERGED_EXP_FIT', 'RESULTS_TT_MERGED_FIT']
Found 12 raw and 12 fit dataframes in Analyzer Results.

Found 18 experiments, 7 variables and 4 parameters:
 ASR_control, gap_depth, tone_in_noise, gap_duration_4, gap_duration_8, gap_duration_10, gap_duration_20, gap_duration_50, offset_PPI_4, offset_PPI_6, offset_PPI_8, offset_PPI_10, offset_PPI_12, offset_PPI_14, offset_PPI_16, offset_PPI_18, offset_PPI_20, offset_PPI_50
 reactionTime, peakTime, difference, pea

---

Expectations:
- date fluctuations
- no repetition differences
- sex differences in strength metrics
- experiment differences, in particular with increased gap / offset

---

## Top Ten

In [46]:
# ...existing code...

# Compare all variables between RESULTS_MTT_MERGED and RESULTS_TT_MERGED (non-parametric only), split by sex and date
from scipy.stats import mannwhitneyu
import pandas as pd
import scikit_posthocs as sp

df_mtt = dfs['RESULTS_MTT_MERGED']
df_tt = dfs['RESULTS_TT_MERGED']

results = []
for sex in ['male', 'female']:
    for date in df_mtt['date'].unique():
        df_mtt_sex_date = df_mtt[(df_mtt['sex'] == sex) & (df_mtt['date'] == date)]
        df_tt_sex_date = df_tt[(df_tt['sex'] == sex) & (df_tt['date'] == date)]
        for var in variables:
            vals_mtt = df_mtt_sex_date[var].dropna()
            vals_tt = df_tt_sex_date[var].dropna()
            if len(vals_mtt) > 1 and len(vals_tt) > 1:
                mean_diff = vals_mtt.mean() - vals_tt.mean()
                # Non-parametric Mann-Whitney U for all metrics
                stat, p = mannwhitneyu(vals_mtt, vals_tt, alternative='two-sided')
                test_type = "mannwhitneyu"
                # Effect strength (rank-biserial)
                u, _ = mannwhitneyu(vals_mtt, vals_tt, alternative='two-sided')
                n1, n2 = len(vals_mtt), len(vals_tt)
                effect_strength = 1 - (2 * u) / (n1 * n2)
                # Post hoc: Dunn's test
                try:
                    data = pd.DataFrame({var: pd.concat([vals_mtt, vals_tt]),
                                        'group': ['MTT'] * len(vals_mtt) + ['TT'] * len(vals_tt)})
                    dunn = sp.posthoc_dunn(data, val_col=var, group_col='group', p_adjust='bonferroni')
                    posthoc_p = dunn.loc['MTT', 'TT']
                    posthoc_test = "Dunn"
                except Exception:
                    posthoc_p = None
                    posthoc_test = "Dunn"
                if p < 0.05:
                    results.append({
                        'sex': sex,
                        'date': date,
                        'variable': var,
                        'stat': stat,
                        'p': p,
                        'test': test_type,
                        'posthoc_p': posthoc_p,
                        'posthoc_test': posthoc_test,
                        'effect_strength': effect_strength,
                        'mean_diff': mean_diff
                    })

test_top_ten = pd.DataFrame(results)
print(test_top_ten)

test_top_ten.to_excel(os.path.join(output_dir, "TEST_TOP_TEN.xlsx"), index=False)
# ...existing code...

       sex     date      variable    stat         p          test  posthoc_p  \
0     male   June26     peakValue   416.0  0.022842  mannwhitneyu   0.022511   
1     male   June26           tau   853.0  0.009980  mannwhitneyu   0.009821   
2     male    May20     peakValue   478.0  0.021683  mannwhitneyu   0.021391   
3   female  April16  reactionTime   914.5  0.003310  mannwhitneyu   0.003251   
4   female  April16      peakTime   941.5  0.000362  mannwhitneyu   0.000354   
5   female  April16     peakValue   463.0  0.045768  mannwhitneyu   0.045184   
6   female   June26     peakValue   391.0  0.002803  mannwhitneyu   0.002755   
7   female   June26           tau  1068.0  0.000035  mannwhitneyu   0.000034   
8   female    May20      peakTime   883.5  0.007059  mannwhitneyu   0.006939   
9   female    May20     peakValue   397.5  0.006796  mannwhitneyu   0.006686   
10  female    May20           RMS   379.0  0.003651  mannwhitneyu   0.003588   
11  female    May20           AUC   359.

## Summary of TT vs MTT Differences (Mann-Whitney U, Dunn's post hoc)

| Sex     | Date    | Variable      | Stat   | p-value   | Test         | Dunn p    | Effect Strength | Mean Diff    |
|---------|---------|--------------|--------|-----------|--------------|-----------|-----------------|-------------|
| male    | June26  | peakValue    | 416.0  | 0.022842  | mannwhitneyu | 0.022511  | 0.33            | -30.78      |
| male    | June26  | tau          | 853.0  | 0.009980  | mannwhitneyu | 0.009821  | -0.37           | 28.15       |
| male    | May20   | peakValue    | 478.0  | 0.021683  | mannwhitneyu | 0.021391  | 0.32            | -19.03      |
| female  | April16 | reactionTime | 914.5  | 0.003310  | mannwhitneyu | 0.003251  | -0.41           | 0.77        |
| female  | April16 | peakTime     | 941.5  | 0.000362  | mannwhitneyu | 0.000354  | -0.45           | 0.92        |
| female  | April16 | peakValue    | 463.0  | 0.045768  | mannwhitneyu | 0.045184  | 0.29            | -7.78       |
| female  | June26  | peakValue    | 391.0  | 0.002803  | mannwhitneyu | 0.002755  | 0.42            | -19.14      |
| female  | June26  | tau          | 1068.0 | 0.000035  | mannwhitneyu | 0.000034  | -0.58           | 15.91       |
| female  | May20   | peakTime     | 883.5  | 0.007059  | mannwhitneyu | 0.006939  | -0.36           | -0.30       |
| female  | May20   | peakValue    | 397.5  | 0.006796  | mannwhitneyu | 0.006686  | 0.39            | -22.67      |
| female  | May20   | RMS          | 379.0  | 0.003651  | mannwhitneyu | 0.003588  | 0.42            | -6.16       |
| female  | May20   | AUC          | 359.0  | 0.001788  | mannwhitneyu | 0.001755  | 0.45            | -2480.93    |

**Interpretation:**
- Significant differences are found for peakValue, tau, reactionTime, peakTime, RMS, and AUC across sex and date.
- Effect strengths range from moderate to large.
- Mean differences indicate direction and magnitude of change between groups.

# ---> remove TT

---

## Average Reaction Time

In [47]:
# Calculate IQR bounds
df = dfs['RESULTS_MTT_MERGED']
vals = df['reactionTime'].dropna()

q1 = vals.quantile(0.25)
q3 = vals.quantile(0.75)
iqr = q3 - q1
lower = q1 - 1.5 * iqr
upper = q3 + 1.5 * iqr

filtered = vals[(vals >= lower) & (vals <= upper)]

min_val = filtered.min()
max_val = filtered.max()
print(f"Average reaction time (excluding outliers): min = {min_val:.2f}, max = {max_val:.2f}")

# Save result to file named by the result
filename = f"RT_iqr_{min_val:.2f}-{max_val:.2f}.xlsx"
iqr_df = pd.DataFrame({'min_reaction_time': [min_val], 'max_reaction_time': [max_val]})
iqr_df.to_excel(os.path.join(output_dir, filename), index=False)

Average reaction time (excluding outliers): min = 8.00, max = 13.60


---

## Repetition Differences

In [57]:
# ...existing code...

# Test if the value changes over repetitions (trial order) for each variable in RESULTS_MTT (non-parametric)
import ast
from scipy.stats import kruskal

def test_repetition_effect(df, variables, max_reps=5, alpha=0.05):
    results = []
    for var in variables:
        # Convert string lists to actual lists if needed
        vals = df[var].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) and x.startswith('[') else x)
        # Filter to rows that are lists and have enough length
        list_rows = vals[vals.apply(lambda x: isinstance(x, list) and len(x) > 1)]
        if list_rows.empty:
            continue
        # Find the minimum length across all lists (to avoid index errors)
        min_len = min(list_rows.apply(len))
        min_len = min(min_len, max_reps)
        # Gather values by repetition index
        rep_groups = []
        for i in range(min_len):
            group = list_rows.apply(lambda x: x[i] if len(x) > i else np.nan).dropna()
            if len(group) > 1:
                rep_groups.append(group.values)
        if len(rep_groups) < 2:
            continue
        # Kruskal-Wallis test across repetitions
        stat, p = kruskal(*rep_groups)
        if p < alpha:
            mean_diff = np.mean(rep_groups[0]) - np.mean(rep_groups[-1])
            results.append({'variable': var, 'stat': stat, 'p': p, 'test': 'kruskal', 'mean_diff': mean_diff})
    return pd.DataFrame(results)

repetition_effects = test_repetition_effect(dfs['RESULTS_MTT'], variables)
test_repetition = pd.DataFrame(repetition_effects)


print(test_repetition) if not repetition_effects.empty else print("No significant repetition effects found for any variable.")
test_repetition.to_excel(os.path.join(output_dir, "TEST_REPETITION.xlsx"), index=False)

No significant repetition effects found for any variable.


# ---> merge across repetitions

---

## Date Differences

In [59]:
# ...existing code...

import pandas as pd
from scipy.stats import kruskal, mannwhitneyu
import scikit_posthocs as sp

df = dfs['RESULTS_MTT_MERGED']

date_results = []

for sex in df['sex'].unique():
    df_sex = df[df['sex'] == sex]
    for var in variables:
        # Non-parametric Kruskal-Wallis for all metrics
        groups = [group[var].dropna().values for _, group in df_sex.groupby('date')]
        groups = [g for g in groups if len(g) > 1]
        if len(groups) > 1:
            stat, p = kruskal(*groups)
            posthoc_p, posthoc_test, date1, date2, eff, mean_diff = None, None, None, None, None, None
            if p < 0.05:
                try:
                    dunn = sp.posthoc_dunn(df_sex, val_col=var, group_col='date', p_adjust='bonferroni')
                    min_p = dunn.replace(0, float('nan')).min().min()
                    idx = dunn.stack().idxmin()
                    date1, date2 = idx
                    vals1 = df_sex[df_sex['date'] == date1][var].dropna()
                    vals2 = df_sex[df_sex['date'] == date2][var].dropna()
                    u, _ = mannwhitneyu(vals1, vals2, alternative='two-sided')
                    n1, n2 = len(vals1), len(vals2)
                    eff = 1 - (2 * u) / (n1 * n2)
                    mean_diff = vals1.mean() - vals2.mean()
                    posthoc_p = min_p
                    posthoc_test = "Dunn"
                except Exception:
                    posthoc_p, posthoc_test, mean_diff = None, "Dunn", None
        else:
            stat, p, posthoc_p, posthoc_test, date1, date2, eff, mean_diff = None, None, None, None, None, None, None, None
        date_results.append({
            'sex': sex,
            'variable': var,
            'stat': stat,
            'p': p,
            'significant': p is not None and p < 0.05,
            'posthoc_p': posthoc_p,
            'posthoc_test': posthoc_test,
            'date1': date1,
            'date2': date2,
            'effect_strength': eff,
            'mean_diff': mean_diff
        })

test_date = pd.DataFrame(date_results)
print(test_date)

test_date.to_excel(os.path.join(output_dir, "TEST_DATE.xlsx"), index=False)
# ...existing code...

       sex      variable       stat             p  significant     posthoc_p  \
0   female  reactionTime  43.708603  3.226989e-10         True  3.220414e-10   
1   female      peakTime   6.602514  3.683684e-02         True  4.982239e-02   
2   female    difference  41.644566  9.057270e-10         True  4.913810e-08   
3   female     peakValue  70.002950  6.295822e-16         True  2.517974e-15   
4   female           RMS  78.108445  1.093872e-17         True  1.811059e-17   
5   female           tau   3.399749  1.827064e-01        False           NaN   
6   female           AUC  92.456782  8.380404e-21         True  8.009213e-21   
7     male  reactionTime   6.049538  4.856904e-02         True  4.532128e-02   
8     male      peakTime   7.310487  2.585519e-02         True  5.618624e-02   
9     male    difference   1.652426  4.377036e-01        False           NaN   
10    male     peakValue  34.529943  3.176275e-08         True  4.553857e-08   
11    male           RMS  15.213197  4.9

## Summary of Date Differences (Non-parametric Tests)

All results use the **Kruskal-Wallis test** for date effects, with **Dunn's post hoc** for pairwise comparison.  
Effect strength is rank-biserial correlation.  
Mean difference is (date1 mean - date2 mean) for the most significant pair.

| Sex    | Variable      | Stat     | p-value    | Significant | Posthoc p   | Posthoc Test | Date1    | Date2    | Effect Strength | Mean Diff    |
|--------|--------------|----------|------------|-------------|-------------|--------------|----------|----------|-----------------|-------------|
| female | reactionTime | 43.71    | 3.23e-10   | True        | 3.22e-10    | Dunn         | April16  | June26   | -0.71           | 1.43        |
| female | peakTime     | 6.60     | 3.68e-02   | True        | 4.98e-02    | Dunn         | June26   | May20    | 0.23            | -0.61       |
| female | difference   | 41.64    | 9.06e-10   | True        | 4.91e-08    | Dunn         | April16  | June26   | 0.59            | -2.02       |
| female | peakValue    | 70.00    | 6.30e-16   | True        | 2.52e-15    | Dunn         | April16  | June26   | 0.88            | -43.05      |
| female | RMS          | 78.11    | 1.09e-17   | True        | 1.81e-17    | Dunn         | April16  | June26   | 0.89            | -13.43      |
| female | tau          | 3.40     | 0.183      | False       | NaN         | None         | None     | None     | NaN             | NaN         |
| female | AUC          | 92.46    | 8.38e-21   | True        | 8.01e-21    | Dunn         | April16  | June26   | 0.94            | -6202.25    |
| male   | reactionTime | 6.05     | 4.86e-02   | True        | 4.53e-02    | Dunn         | April16  | May20    | -0.25           | 0.22        |
| male   | peakTime     | 7.31     | 2.59e-02   | True        | 5.62e-02    | Dunn         | April16  | June26   | -0.22           | 0.98        |
| male   | difference   | 1.65     | 0.438      | False       | NaN         | None         | None     | None     | NaN             | NaN         |
| male   | peakValue    | 34.53    | 3.18e-08   | True        | 4.55e-08    | Dunn         | April16  | June26   | 0.47            | -64.83      |
| male   | RMS          | 15.21    | 4.97e-04   | True        | 2.97e-04    | Dunn         | April16  | June26   | 0.41            | -14.40      |
| male   | tau          | 13.65    | 1.09e-03   | True        | 8.54e-04    | Dunn         | April16  | May20    | -0.40           | 28.50       |
| male   | AUC          | 8.07     | 1.77e-02   | True        | 1.69e-02    | Dunn         | April16  | June26   | 0.32            | -5053.16    |



**Interpretation:**
- **Females:** Significant date effects for all metrics except tau. Largest differences are between April16 and June26, with very large effect strengths for strength metrics (peakValue, RMS, AUC).
- **Males:** Significant date effects for all metrics except difference. Most pronounced for peakValue, RMS, tau, and AUC (April16 vs June26 or May20), with moderate to large effect strengths.
- **Conclusion:** Date (batch/day) strongly impacts most metrics for both sexes, especially strength metrics. Always control for date in analysis.

# ---> split by date

---

In [60]:
# ...existing code...

import pandas as pd
from scipy.stats import mannwhitneyu
import scikit_posthocs as sp

df = dfs['RESULTS_MTT_MERGED']

sex_diff_results = []

for date in df['date'].unique():
    df_date = df[df['date'] == date]
    for var in variables:
        vals_male = df_date[df_date['sex'] == 'male'][var].dropna()
        vals_female = df_date[df_date['sex'] == 'female'][var].dropna()
        mean_diff = vals_male.mean() - vals_female.mean() if len(vals_male) > 0 and len(vals_female) > 0 else None
        if len(vals_male) > 1 and len(vals_female) > 1:
            stat, p = mannwhitneyu(vals_male, vals_female, alternative='two-sided')
            n1, n2 = len(vals_male), len(vals_female)
            u, _ = mannwhitneyu(vals_male, vals_female, alternative='two-sided')
            eff = 1 - (2 * u) / (n1 * n2)
            # Dunn's posthoc
            try:
                data = pd.DataFrame({var: pd.concat([vals_male, vals_female]),
                                    'group': ['male'] * len(vals_male) + ['female'] * len(vals_female)})
                dunn = sp.posthoc_dunn(data, val_col=var, group_col='group', p_adjust='bonferroni')
                posthoc_p = dunn.loc['male', 'female']
                posthoc_test = "Dunn"
            except Exception:
                posthoc_p = None
                posthoc_test = "Dunn"
        else:
            stat, p, eff, posthoc_p, posthoc_test = None, None, None, None, None
        sex_diff_results.append({
            'date': date,
            'variable': var,
            'stat': stat,
            'p': p,
            'significant': p is not None and p < 0.05,
            'effect_strength': eff,
            'posthoc_p': posthoc_p,
            'posthoc_test': posthoc_test,
            'mean_diff': mean_diff,
            'test': 'mannwhitneyu'
        })

test_sex = pd.DataFrame(sex_diff_results)
print(test_sex)

test_sex.to_excel(os.path.join(output_dir, "TEST_SEX.xlsx"), index=False)
# ...existing code...

       date      variable    stat             p  significant  effect_strength  \
0   April16  reactionTime   574.5  4.731508e-08         True         0.605967   
1   April16      peakTime  1702.0  1.222350e-01        False        -0.167353   
2   April16    difference  2104.0  6.944884e-05         True        -0.443073   
3   April16     peakValue  2881.0  2.319260e-18         True        -0.975995   
4   April16           RMS  2869.0  4.445153e-18         True        -0.967764   
5   April16           tau   554.0  2.831992e-08         True         0.620027   
6   April16           AUC  2802.0  1.518006e-16         True        -0.921811   
7    June26  reactionTime  1575.5  4.525481e-01        False        -0.080590   
8    June26      peakTime  1183.5  8.026984e-02        False         0.188272   
9    June26    difference  1277.5  2.659279e-01        False         0.123800   
10   June26     peakValue  2317.0  1.327252e-07         True        -0.589163   
11   June26           RMS  2

## Summary of Sex Differences by Date (Non-parametric Tests)

All results below use the **Mann-Whitney U test** for sex differences and **Dunn's post hoc** for pairwise comparison.  
Effect strength is rank-biserial correlation (positive = higher in males, negative = higher in females).  
Mean difference is (male mean - female mean).

| Date     | Variable      | Stat   | p-value    | Significant | Effect Strength | Dunn p      | Mean Diff   |
|----------|--------------|--------|------------|-------------|-----------------|-------------|-------------|
| April16  | reactionTime | 574.5  | 4.73e-08   | True        | 0.61            | 4.65e-08    | -1.05       |
| April16  | peakTime     | 1702.0 | 0.122      | False       | -0.17           | 0.121       | 0.73        |
| April16  | difference   | 2104.0 | 6.94e-05   | True        | -0.44           | 6.86e-05    | 1.78        |
| April16  | peakValue    | 2881.0 | 2.32e-18   | True        | -0.98           | 2.26e-18    | 61.08       |
| April16  | RMS          | 2869.0 | 4.45e-18   | True        | -0.97           | 4.33e-18    | 16.36       |
| April16  | tau          | 554.0  | 2.83e-08   | True        | 0.62            | 2.78e-08    | -21.62      |
| April16  | AUC          | 2802.0 | 1.52e-16   | True        | -0.92           | 1.48e-16    | 5809.32     |
| June26   | reactionTime | 1575.5 | 0.453      | False       | -0.08           | 0.451       | 0.28        |
| June26   | peakTime     | 1183.5 | 0.080      | False       | 0.19            | 0.080       | -0.83       |
| June26   | difference   | 1277.5 | 0.266      | False       | 0.12            | 0.265       | -1.11       |
| June26   | peakValue    | 2317.0 | 1.33e-07   | True        | -0.59           | 1.31e-07    | 82.86       |
| June26   | RMS          | 2175.0 | 1.07e-05   | True        | -0.49           | 1.06e-05    | 17.32       |
| June26   | tau          | 619.0  | 2.58e-07   | True        | 0.58            | 2.53e-07    | -42.61      |
| June26   | AUC          | 1869.0 | 1.17e-02   | True        | -0.28           | 1.16e-02    | 4660.23     |
| May20    | reactionTime | 1113.0 | 0.030      | True        | 0.24            | 3.00e-02    | -0.42       |
| May20    | peakTime     | 916.5  | 0.00060    | True        | 0.37            | 5.96e-04    | -1.62       |
| May20    | difference   | 1190.0 | 0.098      | False       | 0.18            | 0.098       | -1.20       |
| May20    | peakValue    | 2794.5 | 2.23e-16   | True        | -0.92           | 2.17e-16    | 63.34       |
| May20    | RMS          | 2526.0 | 5.41e-11   | True        | -0.73           | 5.30e-11    | 12.88       |
| May20    | tau          | 608.0  | 1.79e-07   | True        | 0.58            | 1.76e-07    | -48.50      |
| May20    | AUC          | 2040.0 | 3.53e-04   | True        | -0.40           | 3.49e-04    | 3766.80     |



**Interpretation:**
- **Strength metrics** (peakValue, RMS, tau, AUC) show large and highly significant sex differences on all dates.
- **Males have higher values** for peakValue, RMS, and AUC; **females have higher tau**.
- **Reaction metrics** (reactionTime, peakTime, difference) show less consistent and weaker sex differences, mostly significant on April16 and May20.
- All results are robust to non-normality and outliers.

# ---> strong effect of sex on strength
# ---> weak effect of sex on reaction

---

## Recording Order Differences

In [61]:
# ...existing code...

import pandas as pd
from scipy.stats import mannwhitneyu
import scikit_posthocs as sp

df = dfs['RESULTS_MTT_MERGED']
strength_metrics = ['peakValue', 'RMS', 'tau', 'AUC']
reaction_metrics = ['reactionTime', 'peakTime', 'difference']

recording_order = [15, 2, 1, 6, 10, 4]
order_map = {f'Animal{num}': i+1 for i, num in enumerate(recording_order)}

results = []

for sex in df['sex'].unique():
    for date in df['date'].unique():
        df_sub = df[(df['sex'] == sex) & (df['date'] == date)].copy()
        df_sub['recording_order'] = df_sub['animal'].map(order_map)
        df_sub = df_sub[df_sub['recording_order'].notnull()]
        median_order = df_sub['recording_order'].median()
        df_sub['group'] = ['early' if o <= median_order else 'late' for o in df_sub['recording_order']]
        for metric in strength_metrics + reaction_metrics:
            vals_early = df_sub[df_sub['group'] == 'early'][metric].dropna()
            vals_late = df_sub[df_sub['group'] == 'late'][metric].dropna()
            mean_diff = vals_early.mean() - vals_late.mean() if len(vals_early) > 0 and len(vals_late) > 0 else None
            if len(vals_early) > 1 and len(vals_late) > 1:
                # Non-parametric Mann-Whitney U for all metrics
                stat, p = mannwhitneyu(vals_early, vals_late, alternative='two-sided')
                n1, n2 = len(vals_early), len(vals_late)
                u, _ = mannwhitneyu(vals_early, vals_late, alternative='two-sided')
                effect_strength = 1 - (2 * u) / (n1 * n2)
                # Post hoc: Dunn's test
                try:
                    data = pd.DataFrame({metric: pd.concat([vals_early, vals_late]),
                                        'group': ['early'] * len(vals_early) + ['late'] * len(vals_late)})
                    dunn = sp.posthoc_dunn(data, val_col=metric, group_col='group', p_adjust='bonferroni')
                    posthoc_p = dunn.loc['early', 'late']
                except Exception:
                    posthoc_p = None
                results.append({
                    'sex': sex,
                    'date': date,
                    'metric': metric,
                    'test': 'mannwhitneyu',
                    'stat': stat,
                    'p': p,
                    'effect_strength': effect_strength,
                    'posthoc_p': posthoc_p,
                    'posthoc_test': 'Dunn',
                    'significant': p < 0.05,
                    'mean_diff': mean_diff
                })

test_rec_order = pd.DataFrame(results)
print(test_rec_order)

test_rec_order.to_excel(os.path.join(output_dir, "TEST_REC_ORDER.xlsx"), index=False)
# ...existing code...

       sex     date        metric          test   stat             p  \
0   female  April16     peakValue  mannwhitneyu  252.0  1.895094e-01   
1   female  April16           RMS  mannwhitneyu  194.5  1.792725e-02   
2   female  April16           tau  mannwhitneyu  186.0  1.163445e-02   
3   female  April16           AUC  mannwhitneyu  155.0  1.988978e-03   
4   female  April16  reactionTime  mannwhitneyu  469.0  7.113408e-03   
5   female  April16      peakTime  mannwhitneyu  103.5  1.624793e-05   
6   female  April16    difference  mannwhitneyu   94.0  2.060878e-05   
7   female   June26     peakValue  mannwhitneyu  243.0  1.396267e-01   
8   female   June26           RMS  mannwhitneyu  216.0  4.854571e-02   
9   female   June26           tau  mannwhitneyu   76.0  5.586187e-06   
10  female   June26           AUC  mannwhitneyu  167.0  4.082956e-03   
11  female   June26  reactionTime  mannwhitneyu  479.5  3.827184e-03   
12  female   June26      peakTime  mannwhitneyu  364.5  4.186367

## Summary: Recording Order Effects (Non-parametric Mann-Whitney U, Dunn's post hoc)

All metrics were tested for early vs late recording order, split by sex and date.  
Significance is based on p < 0.05 (Mann-Whitney U). Effect strength is rank-biserial correlation.

| Sex    | Date    | Metric        | Stat   | p-value    | Significant | Effect Strength | Dunn p    | Mean Diff   |
|--------|---------|--------------|--------|------------|-------------|-----------------|-----------|-------------|
| female | April16 | RMS          | 194.5  | 1.79e-02   | True        | 0.40            | 1.75e-02  | -2.59       |
| female | April16 | tau          | 186.0  | 1.16e-02   | True        | 0.43            | 1.13e-02  | -9.09       |
| female | April16 | AUC          | 155.0  | 1.99e-03   | True        | 0.52            | 1.93e-03  | -1380.63    |
| female | April16 | reactionTime | 469.0  | 7.11e-03   | True        | -0.45           | 6.92e-03  | 0.44        |
| female | April16 | peakTime     | 103.5  | 1.62e-05   | True        | 0.68            | 1.55e-05  | -0.60       |
| female | April16 | difference   | 94.0   | 2.06e-05   | True        | 0.71            | 1.98e-05  | -1.04       |
| female | June26  | RMS          | 216.0  | 4.85e-02   | True        | 0.33            | 4.75e-02  | -4.45       |
| female | June26  | tau          | 76.0   | 5.59e-06   | True        | 0.77            | 5.35e-06  | -26.00      |
| female | June26  | AUC          | 167.0  | 4.08e-03   | True        | 0.48            | 3.97e-03  | -2747.84    |
| female | June26  | reactionTime | 479.5  | 3.83e-03   | True        | -0.48           | 3.72e-03  | 1.02        |
| female | May20   | AUC          | 212.0  | 4.08e-02   | True        | 0.35            | 3.99e-02  | -1186.96    |
| female | May20   | peakTime     | 206.5  | 2.66e-02   | True        | 0.36            | 2.60e-02  | -0.27       |
| male   | April16 | peakValue    | 542.0  | 6.58e-05   | True        | -0.67           | 6.33e-05  | 27.67       |
| male   | April16 | RMS          | 577.0  | 3.60e-06   | True        | -0.78           | 3.44e-06  | 9.71        |
| male   | April16 | tau          | 562.0  | 1.31e-05   | True        | -0.73           | 1.26e-05  | 27.45       |
| male   | April16 | AUC          | 610.0  | 1.62e-07   | True        | -0.88           | 1.54e-07  | 4737.36     |
| male   | June26  | peakValue    | 648.0  | 2.92e-09   | True        | -1.00           | 2.76e-09  | 135.04      |
| male   | June26  | RMS          | 648.0  | 2.92e-09   | True        | -1.00           | 2.76e-09  | 35.43       |
| male   | June26  | AUC          | 648.0  | 2.92e-09   | True        | -1.00           | 2.76e-09  | 12919.49    |
| male   | June26  | reactionTime | 46.0   | 2.12e-08   | True        | 0.86            | 2.00e-08  | -1.22       |
| male   | June26  | peakTime     | 610.0  | 1.17e-07   | True        | -0.88           | 1.11e-07  | 5.81        |
| male   | June26  | difference   | 604.5  | 2.26e-07   | True        | -0.87           | 2.15e-07  | 7.03        |
| male   | May20   | RMS          | 554.0  | 2.54e-05   | True        | -0.71           | 2.44e-05  | 12.09       |
| male   | May20   | tau          | 622.0  | 4.79e-08   | True        | -0.92           | 4.55e-08  | 68.56       |
| male   | May20   | AUC          | 606.0  | 2.40e-07   | True        | -0.87           | 2.28e-07  | 6741.81     |

**Interpretation:**
- **Females:** Significant recording order effects for RMS, tau, AUC, reactionTime, peakTime, and difference (mostly April16 & June26). Effects are moderate to strong.
- **Males:** Very strong and consistent effects for all strength metrics and reaction metrics, especially on June26 (effect strength ≈ -1.0).
- **Direction:** For females, late recordings tend to have higher values. For males, early recordings have higher values.
- **Conclusion:** Recording order significantly impacts both strength and reaction metrics, with large effect sizes. Always control for recording order in analysis.

# ---> strong effect of recording order

In [62]:
# Show direction of recording order effect for significant results

import pandas as pd

df = dfs['RESULTS_MTT_MERGED']
strength_metrics = ['peakValue', 'RMS', 'tau', 'AUC']
reaction_metrics = ['reactionTime', 'peakTime', 'difference']
recording_order = [15, 2, 1, 6, 10, 4]
order_map = {f'Animal{num}': i+1 for i, num in enumerate(recording_order)}

summary_rows = []

for sex in df['sex'].unique():
    for date in df['date'].unique():
        df_sub = df[(df['sex'] == sex) & (df['date'] == date)].copy()
        df_sub['recording_order'] = df_sub['animal'].map(order_map)
        df_sub = df_sub[df_sub['recording_order'].notnull()]
        median_order = df_sub['recording_order'].median()
        df_sub['group'] = ['early' if o <= median_order else 'late' for o in df_sub['recording_order']]
        for metric in strength_metrics + reaction_metrics:
            vals_early = df_sub[df_sub['group'] == 'early'][metric].dropna()
            vals_late = df_sub[df_sub['group'] == 'late'][metric].dropna()
            if len(vals_early) > 1 and len(vals_late) > 1:
                mean_early = vals_early.mean()
                mean_late = vals_late.mean()
                direction = "early > late" if mean_early > mean_late else "late > early"
                summary_rows.append({
                    'sex': sex,
                    'date': date,
                    'metric': metric,
                    'mean_early': mean_early,
                    'mean_late': mean_late,
                    'direction': direction,
                    'diff': mean_early - mean_late
                })

test_rec_order_direction = pd.DataFrame(summary_rows)
print(test_rec_order_direction)

test_rec_order_direction.to_excel(os.path.join(output_dir, "TEST_REC_ORDER_DIRECTION.xlsx"), index=False)

       sex     date        metric    mean_early     mean_late     direction  \
0   female  April16     peakValue     61.327778     67.205556  late > early   
1   female  April16           RMS     15.679750     18.265194  late > early   
2   female  April16           tau    153.751639    162.841056  late > early   
3   female  April16           AUC   6127.447361   7508.073028  late > early   
4   female  April16  reactionTime     11.719444     11.277778  early > late   
5   female  April16      peakTime     30.058333     30.655556  late > early   
6   female  April16    difference     18.338889     19.377778  late > early   
7   female   June26     peakValue    102.475463    114.063889  late > early   
8   female   June26           RMS     28.489486     32.938194  late > early   
9   female   June26           tau    150.827454    176.830574  late > early   
10  female   June26           AUC  11873.962755  14621.798343  late > early   
11  female   June26  reactionTime     10.482407     

## Summary: Direction of Recording Order Effects

- **Females:**  
  - For all dates, strength metrics (`peakValue`, `RMS`, `tau`, `AUC`) are **higher in late recordings** (`late > early`).
  - Reaction metrics (`reactionTime`, `peakTime`, `difference`) are mostly **higher in late recordings**, except `reactionTime` (which is higher in early recordings).
  - The effect is consistent: **late recordings tend to have higher values** for most metrics.

- **Males:**  
  - For all dates and all metrics, **early recordings have higher values** (`early > late`), except for `reactionTime` on June26 (where late is higher).
  - The effect is strong and consistent: **early recordings show higher strength and reaction metrics**.

- **Magnitude:**  
  - The difference (`diff`) between early and late groups is often substantial, especially for males (e.g., peakValue, RMS, AUC, difference).

**Interpretation:**  
- **Recording order has a clear directional effect:**  
  - **Females:** Metrics increase with recording order (late > early).
  - **Males:** Metrics decrease with recording order (early > late).
- **This effect is robust across dates and metrics.**
- **Always control for recording order in analysis, as it can confound experimental results.**

### ---> peakTime higher and reactionTime lower with less strength?

In [65]:
# ...existing code...

import pandas as pd
from scipy.stats import spearmanr

df = dfs['RESULTS_MTT_MERGED']
strength_metrics = ['peakValue', 'RMS', 'tau', 'AUC']
reaction_metrics = ['peakTime', 'reactionTime']

correlation_results = []

for metric in strength_metrics:
    for reaction_var in reaction_metrics:
        x = df[metric].dropna()
        y = df[reaction_var].dropna()
        # Align indices to avoid mismatches
        common_idx = x.index.intersection(y.index)
        x_aligned = x.loc[common_idx]
        y_aligned = y.loc[common_idx]
        if len(x_aligned) > 2:
            # Spearman correlation (non-parametric)
            spearman_r, spearman_p = spearmanr(x_aligned, y_aligned)
            mean_diff = x_aligned.mean() - y_aligned.mean()
            correlation_results.append({
                'strength_metric': metric,
                'reaction_metric': reaction_var,
                'spearman_r': spearman_r,
                'spearman_p': spearman_p,
                'n': len(x_aligned),
                'mean_diff': mean_diff
            })

test_peakTime_reactionTime_to_strength_cor = pd.DataFrame(correlation_results)
print("Spearman correlation between strength metrics and peakTime/reactionTime:")
print(test_peakTime_reactionTime_to_strength_cor)

test_peakTime_reactionTime_to_strength_cor.to_excel(os.path.join(output_dir, "TEST_PEAKTIME_REACTIONTIME_TO_STRENGTH.xlsx"), index=False)
# ...existing code...

Spearman correlation between strength metrics and peakTime/reactionTime:
  strength_metric reaction_metric  spearman_r    spearman_p    n     mean_diff
0       peakValue        peakTime    0.025790  6.437244e-01  324     92.256893
1       peakValue    reactionTime   -0.536682  1.449731e-25  324    112.208128
2             RMS        peakTime    0.077919  1.617386e-01  324      1.134897
3             RMS    reactionTime   -0.538670  8.901558e-26  324     21.086132
4             tau        peakTime    0.160016  3.879662e-03  324    107.794012
5             tau    reactionTime    0.136935  1.362777e-02  324    127.745246
6             AUC        peakTime    0.081657  1.424866e-01  324  12145.060785
7             AUC    reactionTime   -0.505347  2.099658e-22  324  12165.012020


### Spearman Correlation Summary: Strength Metrics vs PeakTime/ReactionTime

| Strength Metric | Reaction Metric | Spearman r | Spearman p | Significant? | n   | Mean Diff      |
|-----------------|----------------|------------|------------|--------------|-----|---------------|
| peakValue       | peakTime       |  0.026     | 0.644      | No           | 324 |    92.26      |
| peakValue       | reactionTime   | -0.537     | 1.45e-25   | Yes          | 324 |   112.21      |
| RMS             | peakTime       |  0.078     | 0.162      | No           | 324 |     1.13      |
| RMS             | reactionTime   | -0.539     | 8.90e-26   | Yes          | 324 |    21.09      |
| tau             | peakTime       |  0.160     | 0.0039     | Yes          | 324 |   107.79      |
| tau             | reactionTime   |  0.137     | 0.0136     | Yes          | 324 |   127.75      |
| AUC             | peakTime       |  0.082     | 0.142      | No           | 324 | 12145.06      |
| AUC             | reactionTime   | -0.505     | 2.10e-22   | Yes          | 324 | 12165.01      |

**Interpretation:**
- **ReactionTime** is moderately and significantly negatively correlated with peakValue, RMS, and AUC (Spearman r ≈ -0.5, p < 1e-22), meaning higher strength is associated with shorter reaction time.
- **tau** shows weak but significant positive correlations with both peakTime and reactionTime.
- **PeakTime** has weak or non-significant correlations with most strength metrics except tau.
- **Summary:** ReactionTime is the most strongly and consistently (negatively) correlated with strength metrics.

## ---> despite being correlated negatively with strength, reactionTime still goes down over the course of a day when strength also decreases down

---

## Experiment Differences

In [66]:
# ...existing code...

import pandas as pd
from scipy.stats import kruskal, mannwhitneyu
import scikit_posthocs as sp

df = dfs['RESULTS_MTT_MERGED']
strength_metrics = ['peakValue', 'RMS', 'tau', 'AUC']
reaction_metrics = ['reactionTime', 'peakTime', 'difference']

results = []

for sex in df['sex'].unique():
    for date in df['date'].unique():
        df_sub = df[(df['sex'] == sex) & (df['date'] == date)]
        # Non-parametric Kruskal-Wallis for all metrics
        for metric in strength_metrics + reaction_metrics:
            groups = [df_sub[df_sub['experiment'] == exp][metric].dropna() for exp in df_sub['experiment'].unique()]
            groups = [g for g in groups if len(g) > 1]
            mean_diff = None
            stat, p, posthoc_p, posthoc_test, exp1, exp2, eff = None, None, None, None, None, None, None
            if len(groups) > 1:
                stat, p = kruskal(*groups)
                if p < 0.05:
                    try:
                        dunn = sp.posthoc_dunn(df_sub, val_col=metric, group_col='experiment', p_adjust='bonferroni')
                        min_p = dunn.replace(0, float('nan')).min().min()
                        idx = dunn.stack().idxmin()
                        exp1, exp2 = idx
                        vals1 = df_sub[df_sub['experiment'] == exp1][metric].dropna()
                        vals2 = df_sub[df_sub['experiment'] == exp2][metric].dropna()
                        u, _ = mannwhitneyu(vals1, vals2, alternative='two-sided')
                        n1, n2 = len(vals1), len(vals2)
                        eff = 1 - (2 * u) / (n1 * n2)
                        mean_diff = vals1.mean() - vals2.mean()
                        posthoc_p = min_p
                        posthoc_test = "Dunn"
                    except Exception:
                        posthoc_p, posthoc_test, mean_diff = None, "Dunn", None
            results.append({
                'sex': sex,
                'date': date,
                'metric': metric,
                'test': 'Kruskal-Wallis',
                'stat': stat,
                'p': p,
                'significant': p is not None and p < 0.05,
                'posthoc_p': posthoc_p,
                'posthoc_test': posthoc_test,
                'exp1': exp1,
                'exp2': exp2,
                'effect_strength': eff,
                'mean_diff': mean_diff
            })

test_experiment = pd.DataFrame(results)
print("Experiment effects on metrics (all non-parametric):")
print(test_experiment)

test_experiment.to_excel(os.path.join(output_dir, "TEST_EXPERIMENT.xlsx"), index=False)
# ...existing code...

Experiment effects on metrics (all non-parametric):
       sex     date        metric            test       stat         p  \
0   female  April16     peakValue  Kruskal-Wallis  15.418684  0.565352   
1   female  April16           RMS  Kruskal-Wallis  16.970344  0.456377   
2   female  April16           tau  Kruskal-Wallis  13.913131  0.673241   
3   female  April16           AUC  Kruskal-Wallis  16.498990  0.488777   
4   female  April16  reactionTime  Kruskal-Wallis  20.457419  0.251504   
5   female  April16      peakTime  Kruskal-Wallis   9.130543  0.936077   
6   female  April16    difference  Kruskal-Wallis  11.613911  0.822932   
7   female   June26     peakValue  Kruskal-Wallis  13.834725  0.678765   
8   female   June26           RMS  Kruskal-Wallis  13.643771  0.692138   
9   female   June26           tau  Kruskal-Wallis   8.135354  0.963566   
10  female   June26           AUC  Kruskal-Wallis  12.706397  0.755608   
11  female   June26  reactionTime  Kruskal-Wallis   9.902469

## Experiment Effects on Metrics (All Non-Parametric)

| Sex    | Date    | Metric        | Test            | Stat      | p-value   | Significant |
|--------|---------|--------------|-----------------|-----------|-----------|-------------|
| female | April16 | peakValue    | Kruskal-Wallis  | 15.42     | 0.57      | False       |
| female | April16 | RMS          | Kruskal-Wallis  | 16.97     | 0.46      | False       |
| female | April16 | tau          | Kruskal-Wallis  | 13.91     | 0.67      | False       |
| female | April16 | AUC          | Kruskal-Wallis  | 16.50     | 0.49      | False       |
| female | April16 | reactionTime | Kruskal-Wallis  | 20.46     | 0.25      | False       |
| female | April16 | peakTime     | Kruskal-Wallis  | 9.13      | 0.94      | False       |
| female | April16 | difference   | Kruskal-Wallis  | 11.61     | 0.82      | False       |
| male   | May20   | difference   | Kruskal-Wallis  | 15.05     | 0.59      | False       |
| ...    | ...     | ...          | ...             | ...       | ...       | ...         |

**Summary:**  
- No significant experiment effects were found for any metric, sex, or date (all p > 0.05).
- All metrics tested: peakValue, RMS, tau, AUC, reactionTime, peakTime, difference.
- No post hoc tests performed (no significant results).

**Interpretation:**  
- Experimental manipulations (e.g., gap durations, offset_PPI, tone_in_noise) do **not** significantly alter strength or reaction metrics when sex and date are controlled.
- **Date and sex effects are much stronger than experiment effects** in this dataset.

In [None]:
# Test if there is a significant difference in peakValue on June26 between 'tone_in_noise' and 'gap_duration_4' (visually the highest)
import pandas as pd
from scipy.stats import ttest_ind

df = dfs['RESULTS_MTT_MERGED']

# Filter for June26 and the two experiments
df_june26 = df[df['date'] == 'June26']
vals_tone = df_june26[df_june26['experiment'] == 'tone_in_noise']['peakValue'].dropna()
vals_gap4 = df_june26[df_june26['experiment'] == 'gap_duration_4']['peakValue'].dropna()

# Print means
mean_tone = vals_tone.mean()
mean_gap4 = vals_gap4.mean()
print(f"Mean peakValue (tone_in_noise): {mean_tone:.4f}")
print(f"Mean peakValue (gap_duration_4): {mean_gap4:.4f}")

# Perform t-test (parametric, as peakValue is a strength metric)
stat, p = ttest_ind(vals_tone, vals_gap4, equal_var=False)

print(f"peakValue on June26: tone_in_noise vs gap_duration_4")
print(f"t-statistic = {stat:.3f}, p-value = {p:.3e}")
if p < 0.05:
    print("Significant difference found.")
else:
    print("No significant difference found.")

Mean peakValue (tone_in_noise): 110.2306
Mean peakValue (gap_duration_4): 161.8167
peakValue on June26: tone_in_noise vs gap_duration_4
t-statistic = -1.309, p-value = 2.199e-01
No significant difference found.


In [68]:
# Compare peakValue between 'tone_in_noise' and 'gap_duration_4' across all dates (merged)

import pandas as pd
from scipy.stats import ttest_ind

df = dfs['RESULTS_MTT_MERGED']

# Merge all dates for both experiments
vals_tone_all = df[df['experiment'] == 'tone_in_noise']['peakValue'].dropna()
vals_gap4_all = df[df['experiment'] == 'gap_duration_4']['peakValue'].dropna()

# Calculate means
mean_tone_all = vals_tone_all.mean()
mean_gap4_all = vals_gap4_all.mean()

print(f"Mean peakValue (tone_in_noise, all dates): {mean_tone_all:.4f}")
print(f"Mean peakValue (gap_duration_4, all dates): {mean_gap4_all:.4f}")

# Perform t-test
stat_all, p_all = ttest_ind(vals_tone_all, vals_gap4_all, equal_var=False)

print("peakValue: tone_in_noise vs gap_duration_4 (all dates merged)")
print(f"t-statistic = {stat_all:.3f}, p-value = {p_all:.3e}")
if p_all < 0.05:
    print("Significant difference found.")
else:
    print("No significant difference found.")

Mean peakValue (tone_in_noise, all dates): 99.5417
Mean peakValue (gap_duration_4, all dates): 137.4667
peakValue: tone_in_noise vs gap_duration_4 (all dates merged)
t-statistic = -2.198, p-value = 3.497e-02
Significant difference found.


## ---> maybe merge for date anyways?

In [70]:
# Test for experiment effect on peakValue (all dates merged, split by sex)

import pandas as pd
from scipy.stats import kruskal, mannwhitneyu
import scikit_posthocs as sp

df = dfs['RESULTS_MTT_MERGED']

for sex in df['sex'].unique():
    df_sex = df[df['sex'] == sex]
    experiments = df_sex['experiment'].unique().tolist()
    groups = [df_sex[df_sex['experiment'] == exp]['peakValue'].dropna() for exp in experiments]
    groups = [g for g in groups if len(g) > 1]
    print(f"\nSex: {sex}")
    if len(groups) > 1:
        stat, p = kruskal(*groups)
        print(f"Kruskal-Wallis test for experiment effect on peakValue (all dates merged):")
        print(f"statistic = {stat:.3f}, p-value = {p:.3e}")
        if p < 0.05:
            print("Significant experiment effect found.")
            try:
                dunn = sp.posthoc_dunn(df_sex, val_col='peakValue', group_col='experiment', p_adjust='bonferroni')
                print("Dunn's post hoc p-values (experiment pairs):")
                print(dunn)
            except Exception as e:
                print("Post hoc test failed:", e)
        else:
            print("No significant experiment effect found.")
    else:
        print("Not enough data for experiment effect test.")


Sex: female
Kruskal-Wallis test for experiment effect on peakValue (all dates merged):
statistic = 16.410, p-value = 4.950e-01
No significant experiment effect found.

Sex: male
Kruskal-Wallis test for experiment effect on peakValue (all dates merged):
statistic = 7.649, p-value = 9.735e-01
No significant experiment effect found.


In [71]:
# Test for experiment effect on peakValue (merged by sex, split by date)

import pandas as pd
from scipy.stats import kruskal, mannwhitneyu
import scikit_posthocs as sp

df = dfs['RESULTS_MTT_MERGED']

for date in df['date'].unique():
    df_date = df[df['date'] == date]
    for sex in df_date['sex'].unique():
        df_sex_date = df_date[df_date['sex'] == sex]
        experiments = df_sex_date['experiment'].unique().tolist()
        groups = [df_sex_date[df_sex_date['experiment'] == exp]['peakValue'].dropna() for exp in experiments]
        groups = [g for g in groups if len(g) > 1]
        print(f"\nDate: {date}, Sex: {sex}")
        if len(groups) > 1:
            stat, p = kruskal(*groups)
            print(f"Kruskal-Wallis test for experiment effect on peakValue (sex merged, date split):")
            print(f"statistic = {stat:.3f}, p-value = {p:.3e}")
            if p < 0.05:
                print("Significant experiment effect found.")
                try:
                    dunn = sp.posthoc_dunn(df_sex_date, val_col='peakValue', group_col='experiment', p_adjust='bonferroni')
                    print("Dunn's post hoc p-values (experiment pairs):")
                    print(dunn)
                except Exception as e:
                    print("Post hoc test failed:", e)
            else:
                print("No significant experiment effect found.")
        else:
            print("Not enough data for experiment effect test.")


Date: April16, Sex: female
Kruskal-Wallis test for experiment effect on peakValue (sex merged, date split):
statistic = 15.419, p-value = 5.654e-01
No significant experiment effect found.

Date: April16, Sex: male
Kruskal-Wallis test for experiment effect on peakValue (sex merged, date split):
statistic = 15.308, p-value = 5.733e-01
No significant experiment effect found.

Date: June26, Sex: female
Kruskal-Wallis test for experiment effect on peakValue (sex merged, date split):
statistic = 13.835, p-value = 6.788e-01
No significant experiment effect found.

Date: June26, Sex: male
Kruskal-Wallis test for experiment effect on peakValue (sex merged, date split):
statistic = 6.360, p-value = 9.904e-01
No significant experiment effect found.

Date: May20, Sex: female
Kruskal-Wallis test for experiment effect on peakValue (sex merged, date split):
statistic = 6.730, p-value = 9.868e-01
No significant experiment effect found.

Date: May20, Sex: male
Kruskal-Wallis test for experiment effect

In [69]:
# Test for experiment effect on peakValue (all dates merged, all experiments)

import pandas as pd
from scipy.stats import kruskal, mannwhitneyu
import scikit_posthocs as sp

df = dfs['RESULTS_MTT_MERGED']

# List of experiments present in the data
experiments = df['experiment'].unique().tolist()

# Gather peakValue for each experiment (merged across all dates)
groups = [df[df['experiment'] == exp]['peakValue'].dropna() for exp in experiments]
groups = [g for g in groups if len(g) > 1]

# Kruskal-Wallis test for experiment effect
if len(groups) > 1:
    stat, p = kruskal(*groups)
    print(f"Kruskal-Wallis test for experiment effect on peakValue (all dates merged):")
    print(f"statistic = {stat:.3f}, p-value = {p:.3e}")
    if p < 0.05:
        print("Significant experiment effect found.")
        # Dunn's post hoc test
        try:
            dunn = sp.posthoc_dunn(df, val_col='peakValue', group_col='experiment', p_adjust='bonferroni')
            print("Dunn's post hoc p-values (experiment pairs):")
            print(dunn)
        except Exception as e:
            print("Post hoc test failed:", e)
    else:
        print("No significant experiment effect found.")
else:
    print("Not enough data for experiment effect test.")

Kruskal-Wallis test for experiment effect on peakValue (all dates merged):
statistic = 11.107, p-value = 8.509e-01
No significant experiment effect found.


# ---> no overall significant effect of experiment on any metric, only in comparison between singular experiments do differences show

---