-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstats.py
104 lines (93 loc) · 3.43 KB
/
stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import itertools
import pandas as pd
import scipy.stats as st
from mne.stats import fdr_correction
from plots import update_names
def load_data(dataset):
return pd.read_csv(f"../tables/raw/{dataset}.csv")
def calc_stats(dataset, reverse_grouping=False):
groupings, S1, S2, tval, pval = [], [], [], [], []
data = load_data(dataset)
data = update_names(data)
if reverse_grouping:
data = data.rename(columns={"Feature": "Temp"})
data = data.rename(columns={"Target": "Feature"})
data = data.rename(columns={"Temp": "Target"})
for grouping in data["Target"].unique():
samples = data[data["Target"] == grouping]
pairings = samples["Feature"].unique()
for p1, p2 in itertools.combinations(pairings, 2):
if "ablation" in dataset:
if "+" not in p1 or "+" in p2 or not any([n in p2 for n in p1]):
continue
s1 = samples[samples["Feature"] == p1].iloc[0, 2:].values
s2 = samples[samples["Feature"] == p2].iloc[0, 2:].values
t, p = st.ttest_rel(s1, s2)
groupings.append(grouping)
S1.append(p1)
S2.append(p2)
tval.append(t)
pval.append(p)
h_corrected, pval_corrected = fdr_correction(pval, alpha=0.05)
stats = pd.DataFrame(
{
"Grouping": groupings,
"S1": S1,
"S2": S2,
"t": tval,
"p": pval,
"p (corrected)": pval_corrected,
"h (corrected)": h_corrected.astype("int"),
}
)
if reverse_grouping:
dataset = f"{dataset}_crossed"
stats.to_csv(f"../stats/raw/{dataset}_stats.csv", index=False)
def calc_anova(dataset, reverse_grouping=False):
groupings, fval, pval = [], [], []
data = load_data(dataset)
data = update_names(data)
if reverse_grouping:
data = data.rename(columns={"Feature": "Temp"})
data = data.rename(columns={"Target": "Feature"})
data = data.rename(columns={"Temp": "Target"})
for grouping in data["Target"].unique():
samples = data[data["Target"] == grouping]
samples = samples.iloc[:, 2:].values
f, p = st.f_oneway(*tuple(samples))
groupings.append(grouping)
fval.append(f)
pval.append(p)
h_corrected, pval_corrected = fdr_correction(pval)
stats = pd.DataFrame(
{
"Grouping": groupings,
"f": fval,
"p": pval,
"p (corrected)": pval_corrected,
"h (corrected)": h_corrected.astype("int"),
}
)
if reverse_grouping:
dataset = f"{dataset}_crossed"
dataset = f"{dataset}_anova"
stats.to_csv(f"../stats/raw/{dataset}_stats.csv", index=False)
def main():
datasets = [
"mvpa_properties_all",
"mvpa_models",
"mvpa_properties_all_ablation",
"mvpa_models_ablation",
]
for dataset in datasets:
try:
calc_stats(f"{dataset}_subjects")
calc_anova(f"{dataset}_subjects")
if any([id in datasets for id in ["mvpa_models"]]):
calc_stats(f"{dataset}_subjects", reverse_grouping=True)
calc_anova(f"{dataset}_subjects", reverse_grouping=True)
calc_stats("mvpa_properties_rgr_subjects", reverse_grouping=True)
except:
print("not calculating all supplemental analysis statistics:", dataset)
if __name__ == "__main__":
main()