/
stats.py
154 lines (127 loc) · 4.94 KB
/
stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
"""
Statistical functions
"""
import logging
import numpy as np
from scipy import stats
from tedana import utils
LGR = logging.getLogger(__name__)
RepLGR = logging.getLogger('REPORT')
RefLGR = logging.getLogger('REFERENCES')
def getfbounds(n_echos):
"""
Gets F-statistic boundaries based on number of echos
Parameters
----------
n_echos : :obj:`int`
Number of echoes
Returns
-------
fmin, fmid, fmax : :obj:`float`
F-statistic thresholds for alphas of 0.05, 0.025, and 0.01,
respectively.
"""
f05 = stats.f.ppf(q=(1 - 0.05), dfn=1, dfd=(n_echos - 1))
f025 = stats.f.ppf(q=(1 - 0.025), dfn=1, dfd=(n_echos - 1))
f01 = stats.f.ppf(q=(1 - 0.01), dfn=1, dfd=(n_echos - 1))
return f05, f025, f01
def computefeats2(data, mmix, mask=None, normalize=True):
"""
Converts `data` to component space using `mmix`
Parameters
----------
data : (S x T) array_like
Input data
mmix : (T [x C]) array_like
Mixing matrix for converting input data to component space, where `C`
is components and `T` is the same as in `data`
mask : (S,) array_like or None, optional
Boolean mask array. Default: None
normalize : bool, optional
Whether to z-score output. Default: True
Returns
-------
data_Z : (S x C) :obj:`numpy.ndarray`
Data in component space
"""
if data.ndim != 2:
raise ValueError('Parameter data should be 2d, not {0}d'.format(data.ndim))
elif mmix.ndim not in [2]:
raise ValueError('Parameter mmix should be 2d, not '
'{0}d'.format(mmix.ndim))
elif (mask is not None) and (mask.ndim != 1):
raise ValueError('Parameter mask should be 1d, not {0}d'.format(mask.ndim))
elif (mask is not None) and (data.shape[0] != mask.shape[0]):
raise ValueError('First dimensions (number of samples) of data ({0}) '
'and mask ({1}) do not match.'.format(data.shape[0],
mask.shape[0]))
elif data.shape[1] != mmix.shape[0]:
raise ValueError('Second dimensions (number of volumes) of data ({0}) '
'and mmix ({1}) do not match.'.format(data.shape[0],
mmix.shape[0]))
# demean masked data
if mask is not None:
data = data[mask, ...]
data_vn = stats.zscore(data, axis=-1)
# get betas of `data`~`mmix` and limit to range [-0.999, 0.999]
data_R = get_coeffs(data_vn, mmix, mask=None)
data_R[data_R < -0.999] = -0.999
data_R[data_R > 0.999] = 0.999
# R-to-Z transform
data_Z = np.arctanh(data_R)
if data_Z.ndim == 1:
data_Z = np.atleast_2d(data_Z).T
# normalize data
if normalize:
data_Zm = stats.zscore(data_Z, axis=0)
data_Z = data_Zm + (data_Z.mean(axis=0, keepdims=True) /
data_Z.std(axis=0, keepdims=True))
return data_Z
def get_coeffs(data, X, mask=None, add_const=False):
"""
Performs least-squares fit of `X` against `data`
Parameters
----------
data : (S [x E] x T) array_like
Array where `S` is samples, `E` is echoes, and `T` is time
X : (T [x C]) array_like
Array where `T` is time and `C` is predictor variables
mask : (S [x E]) array_like
Boolean mask array
add_const : bool, optional
Add intercept column to `X` before fitting. Default: False
Returns
-------
betas : (S [x E] x C) :obj:`numpy.ndarray`
Array of `S` sample betas for `C` predictors
"""
if data.ndim not in [2, 3]:
raise ValueError('Parameter data should be 2d or 3d, not {0}d'.format(data.ndim))
elif X.ndim not in [2]:
raise ValueError('Parameter X should be 2d, not {0}d'.format(X.ndim))
elif data.shape[-1] != X.shape[0]:
raise ValueError('Last dimension (dimension {0}) of data ({1}) does not '
'match first dimension of '
'X ({2})'.format(data.ndim, data.shape[-1], X.shape[0]))
# mask data and flip (time x samples)
if mask is not None:
if mask.ndim not in [1, 2]:
raise ValueError('Parameter data should be 1d or 2d, not {0}d'.format(mask.ndim))
elif data.shape[0] != mask.shape[0]:
raise ValueError('First dimensions of data ({0}) and mask ({1}) do not '
'match'.format(data.shape[0], mask.shape[0]))
mdata = data[mask, :].T
else:
mdata = data.T
# coerce X to >=2d
X = np.atleast_2d(X)
if len(X) == 1:
X = X.T
if add_const: # add intercept, if specified
X = np.column_stack([X, np.ones((len(X), 1))])
betas = np.linalg.lstsq(X, mdata, rcond=None)[0].T
if add_const: # drop beta for intercept, if specified
betas = betas[:, :-1]
if mask is not None:
betas = utils.unmask(betas, mask)
return betas