Skip to content

Commit

Permalink
Merge pull request #458 from swaroopgj/featselfix
Browse files Browse the repository at this point in the history
Few bug fixes to handle feature selection, roi_seed, and datasets with different features
  • Loading branch information
yarikoptic committed Apr 13, 2016
2 parents 7614648 + 07b940c commit b6fd7fc
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 28 deletions.
45 changes: 29 additions & 16 deletions mvpa2/algorithms/searchlight_hyperalignment.py
Expand Up @@ -24,8 +24,9 @@
from mvpa2.base.progress import ProgressBar
from mvpa2.base import externals, warning
from mvpa2.support import copy
from mvpa2.featsel.helpers import FractionTailSelector, FixedNElementTailSelector
from mvpa2.featsel.helpers import FixedNElementTailSelector
from mvpa2.base.types import is_datasetlike
from mvpa2.misc.surfing.queryengine import SurfaceVerticesQueryEngine

if externals.exists('h5py'):
from mvpa2.base.hdf5 import h5save, h5load
Expand Down Expand Up @@ -115,14 +116,14 @@ def __init__(self, hyperalignment=Hyperalignment(ref_ds=0),
def __call__(self, datasets):
ref_ds = self.hyperalignment.params.ref_ds
nsamples, nfeatures = datasets[ref_ds].shape
if 'roi_seed' in datasets[ref_ds].fa and np.any(datasets[ref_ds].fa['roi_seed']) :
if 'roi_seed' in datasets[ref_ds].fa and np.any(datasets[ref_ds].fa['roi_seed']):
seed_index = np.where(datasets[ref_ds].fa.roi_seed)
else:
if not self.full_matrix:
raise ValueError(
"Setting full_matrix=False requires roi_seed `fa` in the "
"reference dataset indicating center feature and some "
"feature(s) being marked as `roi_sid`.")
"feature(s) being marked as `roi_seed`.")
seed_index = None
# Voxel selection within Searchlight
# Usual metric of between-subject between-voxel correspondence
Expand All @@ -132,10 +133,12 @@ def __call__(self, datasets):
if self.featsel != 1.0:
# computing feature scores from the data
feature_scores = compute_feature_scores(datasets, self.exclude_from_model)
if self.featsel < 1.0:
fselector = FractionTailSelector(self.featsel, tail='upper', mode='select', sort=False)
else:
fselector = FixedNElementTailSelector(np.floor(self.featsel), tail='upper', mode='select', sort=False)
nfeatures_sel = nfeatures # default
if self.featsel < 1.0 and int(self.featsel * nfeatures) > 0:
nfeatures_sel = int(self.featsel * nfeatures)
if self.featsel > 1.0:
nfeatures_sel = min(nfeatures, self.featsel)
fselector = FixedNElementTailSelector(nfeatures_sel, tail='upper', mode='select', sort=False)
# XXX Artificially make the seed_index feature score high to keep it(?)
if self.use_same_features:
if len(self.exclude_from_model):
Expand Down Expand Up @@ -177,7 +180,8 @@ def __call__(self, datasets):
mappers = [m.proj.astype(self.dtype) for m in mappers]
if self.featsel != 1.0:
# Reshape the projection matrix from selected to all features
mappers_full = [np.zeros((nfeatures, nfeatures)) for im in range(len(mappers))]
mappers_full = [np.zeros((nfeatures_all[im], nfeatures_all[ref_ds]))
for im in range(len(mappers))]
if self.use_same_features:
for mf, m in zip(mappers_full, mappers):
mf[np.ix_(features_selected, features_selected)] = m
Expand Down Expand Up @@ -268,9 +272,9 @@ class SearchlightHyperalignment(ClassWithCollections):
constraints=EnsureBool(),
doc="""This param determines whether to combine mappers for each voxel
from its neighborhood searchlights or just use the mapper for which
it is the center voxel. Use this option with caution, as enabling
it might square the runtime memory requirement. If you run into
memory issues, reduce the nproc in sl.""")
it is the center voxel. This will not be applicable for certain
queryengines whose ids and neighborhoods are from different spaces,
such as for SurfaceVerticesQueryEngine""")

compute_recon = Parameter(
True,
Expand All @@ -286,7 +290,7 @@ class SearchlightHyperalignment(ClassWithCollections):
EnsureInt() & EnsureRange(min=2),
doc="""Determines if feature selection will be performed in each searchlight.
1.0: Use all features. < 1.0 is understood as selecting that
proportion of features in each searchlight using feature scores;
proportion of features in each searchlight of ref_ds using feature scores;
> 1.0 is understood as selecting at most that many features in each
searchlight.""")

Expand Down Expand Up @@ -332,6 +336,8 @@ def __init__(self, **kwargs):
self.ndatasets = 0
self.nfeatures = 0
self.projections = None
# This option makes the roi_seed in each SL to be selected during feature selection
self.force_roi_seed = True
if self.params.nproc is not None and self.params.nproc > 1 \
and not externals.exists('pprocess'):
raise RuntimeError("The 'pprocess' module is required for "
Expand Down Expand Up @@ -381,10 +387,12 @@ def _proc_block(self, block, datasets, featselhyper, queryengines, seed=None, ib
continue
# selecting neighborhood for all subject for hyperalignment
ds_temp = [sd[:, ids] for sd, ids in zip(datasets, roi_feature_ids_all)]
roi_seed = np.array(roi_feature_ids_all[self.params.ref_ds]) == node_id
ds_temp[self.params.ref_ds].fa['roi_seed'] = roi_seed
if self.force_roi_seed:
roi_seed = np.array(roi_feature_ids_all[self.params.ref_ds]) == node_id
ds_temp[self.params.ref_ds].fa['roi_seed'] = roi_seed
if __debug__:
msg = 'ROI (%i/%i), %i features' % (i + 1, len(block), len(roi_seed))
msg = 'ROI (%i/%i), %i features' % (i + 1, len(block),
ds_temp[self.params.ref_ds].nfeatures)
debug('SLC', bar(float(i + 1) / len(block), msg), cr=True)
hmappers = featselhyper(ds_temp)
assert(len(hmappers) == len(datasets))
Expand Down Expand Up @@ -546,7 +554,12 @@ def __call__(self, datasets):
# alignment, i.e. the SL ROIs cover roughly the same area
queryengines = self._get_trained_queryengines(
datasets, params.queryengine, params.radius, params.ref_ds)

# For surface nodes to voxels queryengines, roi_seed hardly makes sense
if isinstance(queryengines[params.ref_ds], SurfaceVerticesQueryEngine):
self.force_roi_seed = False
if not self.params.combine_neighbormappers:
raise NotImplementedError("Mapping from voxels to surface nodes is not "
"implmented yet. Try setting combine_neighbormappers to True.")
self.nfeatures = datasets[params.ref_ds].nfeatures
_shpaldebug("Performing Hyperalignment in searchlights")
# Setting up centers for running SL Hyperalignment
Expand Down
20 changes: 8 additions & 12 deletions mvpa2/tests/test_searchlight_hyperalignment.py
Expand Up @@ -153,6 +153,8 @@ def test_hyperalignment_measure(self):
mappers_fsf_same = fsha_fsf_same(dss_rotated)
mappers_fsn = fsha_fsn(dss_rotated)
mappers = fsha(dss_rotated_clean)
mappers_diffsizedss = fsha_fsf([sd[:, nfs] for nfs, sd in
zip([np.arange(5), np.random.permutation(np.arange(8)), np.arange(8)[::-1], np.arange(8)], dss_rotated)])
# Testing that most of noisy features are eliminated from reference data
assert_true(np.alltrue([np.sum(m[:4, :4].std(0) > 0) > 2 for m in mappers_fsf]))
# using same features make it most likely to eliminate all noisy features
Expand Down Expand Up @@ -242,7 +244,7 @@ def test_searchlight_hyperalignment(self):
max_weight = proj[0].proj.toarray().max(0).squeeze()
diag_weight = proj[0].proj.diagonal()
# Check to make sure diagonal is the max weight, in almost all rows for reference subject
assert(np.sum(max_weight == diag_weight) / float(len(diag_weight)) > 0.98)
assert(np.sum(max_weight == diag_weight) / float(len(diag_weight)) > 0.90)
# and not true for other subjects
for i in range(1, nds - 1):
assert(np.sum(proj[i].proj.toarray().max(0).squeeze() == proj[i].proj.diagonal())
Expand All @@ -251,7 +253,7 @@ def test_searchlight_hyperalignment(self):
max_weight = proj[-1].proj.toarray().max(0).squeeze()
diag_weight = proj[-1].proj.diagonal()
# Check to make sure diagonal is the max weight, in almost all rows for reference subject
assert(np.sum(max_weight == diag_weight) / float(len(diag_weight)) > 0.95)
assert(np.sum(max_weight == diag_weight) / float(len(diag_weight)) > 0.90)
# project data
dss_hyper = [hm.forward(sd) for hm, sd in zip(projs[0], dss)]
_ = [zscore(sd, chunks_attr=None) for sd in dss_hyper]
Expand Down Expand Up @@ -279,7 +281,6 @@ def test_searchlight_hyperalignment_warnings_and_exceptions(self):
# currently they are just suppressed :-/ So this is just a smoke test
mappers = slhyper([ds_orig, ds_orig.copy()])


@reseed_rng()
def test_custom_qas(self):
# Test if we could provide custom QEs per each of the datasets
Expand Down Expand Up @@ -353,19 +354,12 @@ def apply_slhyper(queryengine, dss=[ds0, ds1], return_mappers=False, **kw):
projs = apply_slhyper(qe0)
# Test that in general we get larger coefficients for "correct" transformation
for p, tproj in zip(projs, tprojs_shifted):
#print np.round(p, decimals=3)
#print np.asarray(p)[tproj>0]
assert(np.all(np.asarray(p)[tproj>0] >= 1.0)) # .astype(int), tproj)
#print np.asarray(p)[tproj==0]
assert_array_lequal(np.mean(np.asarray(p)[tproj==0]), 0.3)
assert(np.all(np.asarray(p)[tproj>0] >= 1.0))
assert_array_lequal(np.mean(np.asarray(p)[tproj == 0]), 0.3)

qe1 = FancyQE([[0, 1, 2, 3], [1, 2, 3], [2, 3], [3]])
# Just a smoke test, for now TODO
projs = apply_slhyper([qe0, qe1])
# print
# for p, tproj in zip(projs, tprojs_shifted):
# print np.round(p, decimals=3)



def assert_no_offdiag(a):
Expand All @@ -375,6 +369,7 @@ def assert_no_offdiag(a):
else:
assert_array_equal(a - np.diag(np.diagonal(a)), 0)


class FancyQE(object):
"""Little helper QE which knows what to return for neighbors"""

Expand All @@ -395,6 +390,7 @@ def train(self, ds):
def __getitem__(self, i):
return self.results[i]


def suite(): # pragma: no cover
return unittest.makeSuite(SearchlightHyperalignmentTests)

Expand Down

0 comments on commit b6fd7fc

Please sign in to comment.