Merge pull request #458 from swaroopgj/featselfix

Few bug fixes to handle feature selection, roi_seed, and datasets with different features
PyMVPA · Apr 13, 2016 · b6fd7fc · b6fd7fc
2 parents 7614648 + 07b940c
commit b6fd7fc
Show file tree

Hide file tree

Showing 2 changed files with 37 additions and 28 deletions.
diff --git a/mvpa2/algorithms/searchlight_hyperalignment.py b/mvpa2/algorithms/searchlight_hyperalignment.py
@@ -24,8 +24,9 @@
 from mvpa2.base.progress import ProgressBar
 from mvpa2.base import externals, warning
 from mvpa2.support import copy
-from mvpa2.featsel.helpers import FractionTailSelector, FixedNElementTailSelector
+from mvpa2.featsel.helpers import FixedNElementTailSelector
 from mvpa2.base.types import is_datasetlike
+from mvpa2.misc.surfing.queryengine import SurfaceVerticesQueryEngine
 
 if externals.exists('h5py'):
     from mvpa2.base.hdf5 import h5save, h5load
@@ -115,14 +116,14 @@ def __init__(self, hyperalignment=Hyperalignment(ref_ds=0),
     def __call__(self, datasets):
         ref_ds = self.hyperalignment.params.ref_ds
         nsamples, nfeatures = datasets[ref_ds].shape
-        if 'roi_seed' in datasets[ref_ds].fa and np.any(datasets[ref_ds].fa['roi_seed']) :
+        if 'roi_seed' in datasets[ref_ds].fa and np.any(datasets[ref_ds].fa['roi_seed']):
             seed_index = np.where(datasets[ref_ds].fa.roi_seed)
         else:
             if not self.full_matrix:
                 raise ValueError(
                     "Setting full_matrix=False requires roi_seed `fa` in the "
                     "reference dataset indicating center feature and some "
-                    "feature(s) being marked as `roi_sid`.")
+                    "feature(s) being marked as `roi_seed`.")
             seed_index = None
         # Voxel selection within Searchlight
         # Usual metric of between-subject between-voxel correspondence
@@ -132,10 +133,12 @@ def __call__(self, datasets):
         if self.featsel != 1.0:
             # computing feature scores from the data
             feature_scores = compute_feature_scores(datasets, self.exclude_from_model)
-            if self.featsel < 1.0:
-                fselector = FractionTailSelector(self.featsel, tail='upper', mode='select', sort=False)
-            else:
-                fselector = FixedNElementTailSelector(np.floor(self.featsel), tail='upper', mode='select', sort=False)
+            nfeatures_sel = nfeatures  # default
+            if self.featsel < 1.0 and int(self.featsel * nfeatures) > 0:
+                nfeatures_sel = int(self.featsel * nfeatures)
+            if self.featsel > 1.0:
+                nfeatures_sel = min(nfeatures, self.featsel)
+            fselector = FixedNElementTailSelector(nfeatures_sel, tail='upper', mode='select', sort=False)
             # XXX Artificially make the seed_index feature score high to keep it(?)
             if self.use_same_features:
                 if len(self.exclude_from_model):
@@ -177,7 +180,8 @@ def __call__(self, datasets):
                 mappers = [m.proj.astype(self.dtype) for m in mappers]
             if self.featsel != 1.0:
                 # Reshape the projection matrix from selected to all features
-                mappers_full = [np.zeros((nfeatures, nfeatures)) for im in range(len(mappers))]
+                mappers_full = [np.zeros((nfeatures_all[im], nfeatures_all[ref_ds]))
+                                for im in range(len(mappers))]
                 if self.use_same_features:
                     for mf, m in zip(mappers_full, mappers):
                         mf[np.ix_(features_selected, features_selected)] = m
@@ -268,9 +272,9 @@ class SearchlightHyperalignment(ClassWithCollections):
         constraints=EnsureBool(),
         doc="""This param determines whether to combine mappers for each voxel
             from its neighborhood searchlights or just use the mapper for which
-            it is the center voxel.  Use this option with caution, as enabling
-            it might square the runtime memory requirement. If you run into
-            memory issues, reduce the nproc in sl.""")
+            it is the center voxel. This will not be applicable for certain
+            queryengines whose ids and neighborhoods are from different spaces,
+            such as for SurfaceVerticesQueryEngine""")
 
     compute_recon = Parameter(
         True,
@@ -286,7 +290,7 @@ class SearchlightHyperalignment(ClassWithCollections):
             EnsureInt() & EnsureRange(min=2),
         doc="""Determines if feature selection will be performed in each searchlight.
             1.0: Use all features. < 1.0 is understood as selecting that
-            proportion of features in each searchlight using feature scores;
+            proportion of features in each searchlight of ref_ds using feature scores;
             > 1.0 is understood as selecting at most that many features in each
             searchlight.""")
 
@@ -332,6 +336,8 @@ def __init__(self, **kwargs):
         self.ndatasets = 0
         self.nfeatures = 0
         self.projections = None
+        # This option makes the roi_seed in each SL to be selected during feature selection
+        self.force_roi_seed = True
         if self.params.nproc is not None and self.params.nproc > 1 \
                 and not externals.exists('pprocess'):
             raise RuntimeError("The 'pprocess' module is required for "
@@ -381,10 +387,12 @@ def _proc_block(self, block, datasets, featselhyper, queryengines, seed=None, ib
                 continue
             # selecting neighborhood for all subject for hyperalignment
             ds_temp = [sd[:, ids] for sd, ids in zip(datasets, roi_feature_ids_all)]
-            roi_seed = np.array(roi_feature_ids_all[self.params.ref_ds]) == node_id
-            ds_temp[self.params.ref_ds].fa['roi_seed'] = roi_seed
+            if self.force_roi_seed:
+                roi_seed = np.array(roi_feature_ids_all[self.params.ref_ds]) == node_id
+                ds_temp[self.params.ref_ds].fa['roi_seed'] = roi_seed
             if __debug__:
-                msg = 'ROI (%i/%i), %i features' % (i + 1, len(block), len(roi_seed))
+                msg = 'ROI (%i/%i), %i features' % (i + 1, len(block),
+                                                    ds_temp[self.params.ref_ds].nfeatures)
                 debug('SLC', bar(float(i + 1) / len(block), msg), cr=True)
             hmappers = featselhyper(ds_temp)
             assert(len(hmappers) == len(datasets))
@@ -546,7 +554,12 @@ def __call__(self, datasets):
         # alignment, i.e. the SL ROIs cover roughly the same area
         queryengines = self._get_trained_queryengines(
             datasets, params.queryengine, params.radius, params.ref_ds)
-
+        # For surface nodes to voxels queryengines, roi_seed hardly makes sense
+        if isinstance(queryengines[params.ref_ds], SurfaceVerticesQueryEngine):
+            self.force_roi_seed = False
+            if not self.params.combine_neighbormappers:
+                raise NotImplementedError("Mapping from voxels to surface nodes is not "
+                        "implmented yet. Try setting combine_neighbormappers to True.")
         self.nfeatures = datasets[params.ref_ds].nfeatures
         _shpaldebug("Performing Hyperalignment in searchlights")
         # Setting up centers for running SL Hyperalignment

diff --git a/mvpa2/tests/test_searchlight_hyperalignment.py b/mvpa2/tests/test_searchlight_hyperalignment.py
@@ -153,6 +153,8 @@ def test_hyperalignment_measure(self):
         mappers_fsf_same = fsha_fsf_same(dss_rotated)
         mappers_fsn = fsha_fsn(dss_rotated)
         mappers = fsha(dss_rotated_clean)
+        mappers_diffsizedss = fsha_fsf([sd[:, nfs] for nfs, sd in
+            zip([np.arange(5), np.random.permutation(np.arange(8)), np.arange(8)[::-1], np.arange(8)], dss_rotated)])
         # Testing that most of noisy features are eliminated from reference data
         assert_true(np.alltrue([np.sum(m[:4, :4].std(0) > 0) > 2 for m in mappers_fsf]))
         # using same features make it most likely to eliminate all noisy features
@@ -242,7 +244,7 @@ def test_searchlight_hyperalignment(self):
             max_weight = proj[0].proj.toarray().max(0).squeeze()
             diag_weight = proj[0].proj.diagonal()
             # Check to make sure diagonal is the max weight, in almost all rows for reference subject
-            assert(np.sum(max_weight == diag_weight) / float(len(diag_weight)) > 0.98)
+            assert(np.sum(max_weight == diag_weight) / float(len(diag_weight)) > 0.90)
             # and not true for other subjects
             for i in range(1, nds - 1):
                 assert(np.sum(proj[i].proj.toarray().max(0).squeeze() == proj[i].proj.diagonal())
@@ -251,7 +253,7 @@ def test_searchlight_hyperalignment(self):
             max_weight = proj[-1].proj.toarray().max(0).squeeze()
             diag_weight = proj[-1].proj.diagonal()
             # Check to make sure diagonal is the max weight, in almost all rows for reference subject
-            assert(np.sum(max_weight == diag_weight) / float(len(diag_weight)) > 0.95)
+            assert(np.sum(max_weight == diag_weight) / float(len(diag_weight)) > 0.90)
         # project data
         dss_hyper = [hm.forward(sd) for hm, sd in zip(projs[0], dss)]
         _ = [zscore(sd, chunks_attr=None) for sd in dss_hyper]
@@ -279,7 +281,6 @@ def test_searchlight_hyperalignment_warnings_and_exceptions(self):
         # currently they are just suppressed :-/  So this is just a smoke test
         mappers = slhyper([ds_orig, ds_orig.copy()])
 
-
     @reseed_rng()
     def test_custom_qas(self):
         # Test if we could provide custom QEs per each of the datasets
@@ -353,19 +354,12 @@ def apply_slhyper(queryengine, dss=[ds0, ds1], return_mappers=False, **kw):
         projs = apply_slhyper(qe0)
         # Test that in general we get larger coefficients for "correct" transformation
         for p, tproj in zip(projs, tprojs_shifted):
-            #print np.round(p, decimals=3)
-            #print np.asarray(p)[tproj>0]
-            assert(np.all(np.asarray(p)[tproj>0] >= 1.0)) # .astype(int), tproj)
-            #print np.asarray(p)[tproj==0]
-            assert_array_lequal(np.mean(np.asarray(p)[tproj==0]), 0.3)
+            assert(np.all(np.asarray(p)[tproj>0] >= 1.0))
+            assert_array_lequal(np.mean(np.asarray(p)[tproj == 0]), 0.3)
 
         qe1 = FancyQE([[0, 1, 2, 3], [1, 2, 3], [2, 3], [3]])
         # Just a smoke test, for now TODO
         projs = apply_slhyper([qe0, qe1])
-        # print
-        # for p, tproj in zip(projs, tprojs_shifted):
-        #     print np.round(p, decimals=3)
-
 
 
 def assert_no_offdiag(a):
@@ -375,6 +369,7 @@ def assert_no_offdiag(a):
     else:
         assert_array_equal(a - np.diag(np.diagonal(a)), 0)
 
+
 class FancyQE(object):
     """Little helper QE which knows what to return for neighbors"""
 
@@ -395,6 +390,7 @@ def train(self, ds):
     def __getitem__(self, i):
         return self.results[i]
 
+
 def suite():  # pragma: no cover
     return unittest.makeSuite(SearchlightHyperalignmentTests)