Merge remote-tracking branch 'origin/master' into enh/split_rfe_stati…

…c_measures * origin/master: ENH: removed some trailing spaces and fixed some typos ChangeLog entry for the duecredit ENH: add duecredit run to travis's matrix BF: We must specify path at this point for module level duecredit references RF: reference -> reference-implementation RF: cite-module tag is no longer (duecredit 0.3.0) needed if dcite'ing a module RF: cite-on-use -> cite-module tag for duecredit ENH: Searchlight and SVM references ENH/RF: Few more duecredit references (hyper, smlr, lars, ESL) + unified formatting BF: adjusted all the cite entries for new API BF: updated duecredit stub with docstrings DOC: few more entries for duecredit ENH: Added duecredit stub and few citations Conflicts: mvpa2/featsel/rfe.py -- there was a stale fixed docstring
PyMVPA · Oct 27, 2015 · f39926b · f39926b
2 parents 3cb7072 + a04a0a7
commit f39926b
Show file tree

Hide file tree

Showing 19 changed files with 242 additions and 42 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -20,6 +20,9 @@ env:
     # Some DEBUG targets are excluded since either their output could
     # not be redirected or they augment output in the doctests
     - COVERAGE="coverage run --rcfile=.travis_coveragerc" MVPA_DEBUG=ENFORCE_CA_ENABLED  DOCTESTS=--with-doctest
+    # So we get duecredit report printed out, and identical to original run so we could compare
+    # time impact
+    - PYTHON=$PY DOCTESTS=--with-doctest DUECREDIT_ENABLE=yes
 before_install:
     # to prevent IPv6 being used for APT
     - sudo bash -c "echo 'Acquire::ForceIPv4 \"true\";' > /etc/apt/apt.conf.d/99force-ipv4"
@@ -41,6 +44,7 @@ before_install:
 install:
     - make
     - if [ ! -z "$COVERAGE" ]; then pip install --user -v coveralls; fi
+    - if [ ! -z "$DUECREDIT_ENABLE"]; then pip install --user -v duecredit; fi
 script:
     # Run only nonlabile tests
     - export MVPA_EXTERNALS_RAISE_EXCEPTION=off  MVPA_MATPLOTLIB_BACKEND=agg

diff --git a/Changelog b/Changelog
@@ -37,6 +37,8 @@ Releases
 
   * Enhancements
 
+    - "Native" use of :mod:`~duecredit` to provide citations for PyMVPA itself
+      and functionality/methods it implements.
     - Unified use of os.path.join as pathjoin.
     - :class:`~mvpa.mappers.procrustean.ProcrusteanMapper` computes reconstruction
       now more efficiently (just a transpose with proper scaling) in case of

diff --git a/mvpa2/__init__.py b/mvpa2/__init__.py
@@ -174,3 +174,16 @@ def _pymvpa_pdb_excepthook(type, value, tb):
 
 if __debug__:
     debug('INIT', 'mvpa end')
+
+# Setup duecredit entry for the entire PyMVPA
+from .support.due import due, Doi
+due.cite(
+    Doi("10.1007/s12021-008-9041-y"),
+    description="Multivariate pattern analysis of neural data",
+    tags=["reference-implementation"],
+    path="mvpa2")
+due.cite(
+    Doi("10.3389/neuro.11.003.2009"),
+    description="Demonstration of PyMVPA capabilities concerning multi-modal or modality-agnostic data analysis",
+    tags=["edu,use"],
+    path="mvpa2")
diff --git a/mvpa2/algorithms/group_clusterthr.py b/mvpa2/algorithms/group_clusterthr.py
@@ -32,6 +32,8 @@
     EnsureInt, EnsureFloat, EnsureRange, EnsureChoice
 from mvpa2.mappers.fx import mean_sample
 
+from mvpa2.support.due import due, Doi
+
 
 class GroupClusterThreshold(Learner):
     """Statistical evaluation of group-level average accuracy maps
@@ -219,6 +221,10 @@ def _untrain(self):
         self._thrmap = None
         self._null_cluster_sizes = None
 
+    @due.dcite(
+        Doi("10.1016/j.neuroimage.2012.09.063"),
+        description="Statistical assessment of (searchlight) MVPA results",
+        tags=['implementation'])
     def _train(self, ds):
         # shortcuts
         chunk_attr = self.params.chunk_attr

diff --git a/mvpa2/algorithms/hyperalignment.py b/mvpa2/algorithms/hyperalignment.py
@@ -33,6 +33,8 @@
 from mvpa2.mappers.zscore import zscore, ZScoreMapper
 from mvpa2.mappers.staticprojection import StaticProjectionMapper
 
+from mvpa2.support.due import due, Doi
+
 if __debug__:
     from mvpa2.base import debug
 
@@ -107,8 +109,9 @@ class Hyperalignment(ClassWithCollections):
     # constructor should accept
     # the ``space`` of the mapper determines where the algorithm places the
     # common space definition in the datasets
-    alignment = Parameter(ProcrusteanMapper(space='commonspace'), # might provide allowedtype
-	    # XXX Currently, there's no way to handle this with connstraints  
+    alignment = Parameter(ProcrusteanMapper(space='commonspace'),
+            # might provide allowedtype
+            # XXX Currently, there's no way to handle this with constraints
             doc="""The multidimensional transformation mapper. If
             `None` (default) an instance of
             :class:`~mvpa2.mappers.procrustean.ProcrusteanMapper` is
@@ -124,7 +127,7 @@ class Hyperalignment(ClassWithCollections):
     level2_niter = Parameter(1, constraints=EnsureInt() & EnsureRange(min=0),
             doc="Number of 2nd-level iterations.")
 
-    ref_ds = Parameter(None, constraints=(EnsureRange(min=0) & EnsureInt() 
+    ref_ds = Parameter(None, constraints=(EnsureRange(min=0) & EnsureInt()
                                           | EnsureNone()),
             doc="""Index of a dataset to use as 1st-level common space
                 reference.  If `None`, then the dataset with the maximum
@@ -164,6 +167,10 @@ def __init__(self, **kwargs):
         self.commonspace = None
 
 
+    @due.dcite(
+        Doi('10.1016/j.neuron.2011.08.026'),
+        description="Hyperalignment of data to a common space",
+        tags=["implementation"])
     def train(self, datasets):
         """Derive a common feature space from a series of datasets.
 
@@ -180,7 +187,7 @@ def train(self, datasets):
         ndatasets = len(datasets)
         nfeatures = [ds.nfeatures for ds in datasets]
         alpha = params.alpha
-        
+
         residuals = None
         if ca['training_residual_errors'].enabled:
             residuals = np.zeros((1 + params.level2_niter, ndatasets))
@@ -198,7 +205,7 @@ def train(self, datasets):
             ref_ds = np.argmax(nfeatures)
         else:
             ref_ds = params.ref_ds
-            # Making sure that ref_ds is within range. 
+            # Making sure that ref_ds is within range.
             #Parameter() already checks for it being a non-negative integer
             if ref_ds >= ndatasets:
                 raise ValueError, "Requested reference dataset %i is out of " \
@@ -458,4 +465,3 @@ def _level3(self, datasets):
                 residuals[0, i] = np.linalg.norm(data_mapped - self.commonspace)
 
         return mappers
-
diff --git a/mvpa2/clfs/__init__.py b/mvpa2/clfs/__init__.py
@@ -22,10 +22,17 @@
 
 __docformat__ = 'restructuredtext'
 
+from mvpa2.support.due import due, Doi
 
 if __debug__:
     from mvpa2.base import debug
     debug('INIT', 'mvpa2.clfs')
 
+due.cite(
+    Doi('10.1007/b94608'),
+    path="mvpa2.clfs",
+    description="Thorough textbook on statistical learning (available online)",
+    tags=["edu"])
+
 if __debug__:
     debug('INIT', 'mvpa2.clfs end')
diff --git a/mvpa2/clfs/_svmbase.py b/mvpa2/clfs/_svmbase.py
@@ -26,6 +26,9 @@
 from mvpa2.base.param import Parameter
 from mvpa2.base.constraints import EnsureListOf
 
+from mvpa2.support.due import due, BibTeX
+
+
 if __debug__:
     from mvpa2.base import debug
 
@@ -301,6 +304,23 @@ def sq_func(x):
 
         return value
 
+    @due.dcite(
+        BibTeX("""
+@Book{Vapnik95:SVM,
+ title = "The Nature of Statistical Learning Theory",
+ author = "Vladimir Vapnik",
+ publisher = "Springer",
+ address = "New York",
+ isbn = "0-387-94559-8",
+ year = "1995",
+ pymvpa-keywords = "support vector machine, SVM"
+}"""),
+        path="mvpa2.clfs.SVM",
+        description="Support Vector Machines (SVM)",
+        tags=["implementation"])
+    def _train(self, dataset):
+        # exists primarily for dcite, and no parent has it defined
+        pass
 
     # TODO: make part of kernel object
     #def _getDefaultGamma(self, dataset):

diff --git a/mvpa2/clfs/lars.py b/mvpa2/clfs/lars.py
@@ -14,6 +14,7 @@
 import numpy as np
 
 import mvpa2.base.externals as externals
+from mvpa2.support.due import due, Doi, BibTeX
 
 # do conditional to be able to build module reference
 if externals.exists('lars', raise_=True):
@@ -141,7 +142,11 @@ def __repr__(self):
                 self.__use_Gram,
                 str(self.ca.enabled))
 
-
+    @due.dcite(
+        Doi('10.1214/009053604000000067'),
+        path="mvpa2.clfs.lars:LARS",
+        description="Least angle regression",
+        tags=["implementation"])
     def _train(self, data):
         """Train the classifier using `data` (`Dataset`).
         """

diff --git a/mvpa2/clfs/libsvmc/svm.py b/mvpa2/clfs/libsvmc/svm.py
@@ -1,4 +1,4 @@
-# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
+# emacs: -*- coding: utf-8; mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
 # vi: set ft=python sts=4 ts=4 sw=4 et:
 ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
 #
@@ -26,6 +26,9 @@
 from mvpa2.kernels.libsvm import LinearLSKernel
 from mvpa2.clfs.libsvmc.sens import LinearSVMWeights
 
+from mvpa2.support.due import due, Doi, BibTeX
+
+
 if __debug__:
     from mvpa2.base import debug
 
@@ -120,10 +123,18 @@ def __init__(self,
         """Holds the trained SVM."""
 
 
-
+    @due.dcite(
+        Doi('10.1145/1961189.1961199'),
+        description="LIBSVM: A library for support vector machines",
+        path="libsvm",
+        tags=["implementation"])
+    # TODO: conditioned citations for nu-SVM and one-class
+    #    B. Schölkopf, A. Smola, R. Williamson, and P. L. Bartlett. New support vector algorithms. Neural Computation, 12, 2000, 1207-1245.
+    #    B. Schölkopf, J. Platt, J. Shawe-Taylor, A. J. Smola, and R. C. Williamson. Estimating the support of a high-dimensional distribution. Neural Computation, 13, 2001, 1443-1471.
     def _train(self, dataset):
         """Train SVM
         """
+        super(SVM, self)._train(dataset)
         targets_sa_name = self.get_space()    # name of targets sa
         targets_sa = dataset.sa[targets_sa_name] # actual targets sa
 

diff --git a/mvpa2/clfs/sg/svm.py b/mvpa2/clfs/sg/svm.py
@@ -1,4 +1,4 @@
-# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
+# emacs: -*- coding: utf-8; mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
 # vi: set ft=python sts=4 ts=4 sw=4 et:
 ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
 #
@@ -78,6 +78,8 @@
 
 from sens import *
 
+from mvpa2.support.due import due, BibTeX
+
 if __debug__:
     from mvpa2.base import debug
 
@@ -286,10 +288,34 @@ def __init__(self, **kwargs):
                           #% (self, e))
 
 
+    @due.dcite(
+        BibTeX("""
+@article{Sonnenburg+2010:Shogun,
+ author = {Sonnenburg, Sören and Rätsch, Gunnar and Henschel, Sebastian
+           and Widmer, Christian and Behr, Jonas and Zien, Alexander
+           and Bona, Fabio de and Binder, Alexander and Gehl, Christian
+           and Franc, Vojtěch},
+ title = {The SHOGUN Machine Learning Toolbox},
+ journal = {J. Mach. Learn. Res.},
+ issue_date = {3/1/2010},
+ volume = {11},
+ month = aug,
+ year = {2010},
+ issn = {1532-4435},
+ pages = {1799--1802},
+ numpages = {4},
+ url = {http://dl.acm.org/citation.cfm?id=1756006.1859911},
+ acmid = {1859911},
+ publisher = {JMLR.org},
+}"""),
+        description="Shogun: Machine learning toolbox. SVM implementations",
+        path="shogun",
+        version=externals.versions['shogun'],
+        tags=["implementation"])
     def _train(self, dataset):
         """Train SVM
         """
-
+        super(SVM, self)._train(dataset)
         # XXX watchout
         # self.untrain()
         newkernel, newsvm = False, False

diff --git a/mvpa2/clfs/smlr.py b/mvpa2/clfs/smlr.py
@@ -22,6 +22,8 @@
 from mvpa2.base.state import ConditionalAttribute
 from mvpa2.datasets.base import Dataset
 
+from mvpa2.support.due import due, Doi
+
 __all__ = [ "SMLR", "SMLRWeights" ]
 
 
@@ -310,6 +312,11 @@ def _python_stepwise_regression(self, w, X, XY, Xw, E,
         return cycles
 
 
+    @due.dcite(
+        Doi('10.1109/TPAMI.2005.127'),
+        path="mvpa2.clfs.smlr:SMLR",
+        description="Sparse multinomial-logistic regression classifier",
+        tags=["implementation"])
     def _train(self, dataset):
         """Train the classifier using `dataset` (`Dataset`).
         """

diff --git a/mvpa2/clfs/transerror.py b/mvpa2/clfs/transerror.py
@@ -29,6 +29,7 @@
      UnknownStateError
 from mvpa2.base.dochelpers import enhanced_doc_string, table2string
 from mvpa2.clfs.stats import auto_null_dist
+from mvpa2.support.due import due, Doi
 
 if __debug__:
     from mvpa2.base import debug
@@ -1282,6 +1283,10 @@ def __init__(self, alpha=None, labels_attr='predictions',
         self._postprob = postprob
         self._hypotheses = hypotheses
 
+    @due.dcite(
+        Doi("10.1016/j.patcog.2011.04.025"),
+        description="Bayesian hypothesis testing",
+        tags=["reference-implementation"])
     def _call(self, ds):
         from mvpa2.support.bayes.partitioner import Partition
         from mvpa2.support.bayes.partial_independence import compute_logp_H

diff --git a/mvpa2/featsel/rfe.py b/mvpa2/featsel/rfe.py
@@ -29,6 +29,7 @@
 from mvpa2.base.dochelpers import _str
 from mvpa2.generators.base import Repeater
 
+from mvpa2.support.due import due, Doi, BibTeX
 
 import numpy as np
 from mvpa2.base.state import ConditionalAttribute
@@ -167,7 +168,24 @@ def __repr__(self, prefixes=[]):
             prefixes=prefixes
             + _repr_attrs(self, ['update_sensitivity'], default=True))
 
-
+    @due.dcite(
+        BibTeX("""
+@Article{ GWB+02,
+    author = "I. Guyon and J. Weston and S. Barnhill and V. Vapnik",
+    title = "Gene Selection for Cancer Classification using Support Vector Machines",
+    volume = "46",
+    year = "2002",
+    pages = "389--422",
+    publisher = "Kluwer",
+    address = "Hingham, MA, USA",
+    journal = "Machine Learning"
+}"""),
+        description="Recursive feature elimination procedure",
+        tags=["implementation"])
+    @due.dcite(
+        Doi("10.1162/neco.2007.09-06-340"),
+        description="Full-brain fMRI decoding using SVM RFE",
+        tags=["use"])
     def _train(self, ds):
         """Proceed and select the features recursively eliminating less
         important ones.
@@ -177,7 +195,6 @@ def _train(self, ds):
         ds : Dataset
           used to compute sensitivity maps and train a classifier
           to determine the transfer error
-
         """
         # get the initial split into train and test
         dataset, testdataset = self._get_traintest_ds(ds)

diff --git a/mvpa2/generators/permutation.py b/mvpa2/generators/permutation.py
@@ -231,7 +231,7 @@ def _permute_chunks_sanity_check(in_pattrs, chunks, uniques):
 
     def _permute_chunks(self, limit_idx, in_pattrs, out_pattrs, chunks=None):
         # limit_idx is doing nothing
-        
+
         if chunks is None:
             raise ValueError("Missing 'chunk_attr' for strategy='chunk'")