From 36e60962714260dd0d25da3d963d1b1ef8f1da4e Mon Sep 17 00:00:00 2001
From: Danylo Ulianych <dizcza@gmail.com>
Date: Wed, 31 Jul 2019 16:55:47 +0200
Subject: [PATCH] Integrated GPFA (#233)

* Rename neural_trajectory to gpfa and refactor the codes accordingly

* Remove codes related to the (not fully implemented) cross-validation feature

* added tqdm as an extra requirement

* push coverage only for requirements-extras test

* gpfa verbose flag; added licence in the docs

* Fixed and extended documentation, removed misleading function outputs.
---
 .travis.yml                        |   4 +-
 MANIFEST.in                        |   3 +-
 elephant/gpfa.py                   | 442 +++++++++++++++++++++
 elephant/gpfa_src/INFO.md          |   7 +
 elephant/gpfa_src/__init__.py      |   0
 elephant/gpfa_src/gpfa_core.py     | 526 +++++++++++++++++++++++++
 elephant/gpfa_src/gpfa_util.py     | 596 +++++++++++++++++++++++++++++
 elephant/spike_train_generation.py |   2 +-
 elephant/test/test_gpfa.py         | 177 +++++++++
 requirements-extras.txt            |   1 +
 10 files changed, 1753 insertions(+), 5 deletions(-)
 create mode 100644 elephant/gpfa.py
 create mode 100644 elephant/gpfa_src/INFO.md
 create mode 100644 elephant/gpfa_src/__init__.py
 create mode 100644 elephant/gpfa_src/gpfa_core.py
 create mode 100644 elephant/gpfa_src/gpfa_util.py
 create mode 100644 elephant/test/test_gpfa.py

diff --git a/.travis.yml b/.travis.yml
index c532810c1..20c71d025 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,6 +35,7 @@ matrix:
       before_install: sudo apt install -y libopenmpi-dev openmpi-bin
       before_script: pip install -r requirements-extras.txt
       script: mpiexec -n 1 nosetests --with-coverage --cover-package=elephant
+      after_success: coveralls || echo "coveralls failed"
 
     - name: "conda 3.7"
       python: 3.7
@@ -80,6 +81,3 @@ install:
 
 script:
   nosetests --with-coverage --cover-package=elephant
-
-after_success:
-  coveralls || echo "coveralls failed"
diff --git a/MANIFEST.in b/MANIFEST.in
index 7df4bcd1a..fe492b888 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -5,7 +5,8 @@ include LICENSE.txt
 include elephant/VERSION
 include elephant/current_source_density_src/README.md
 include elephant/current_source_density_src/test_data.mat
-include elephant/spade_src/LICENSE
+include elephant/neural_trajectory_src/README.md
+include elephant/spade_src/LICENCE
 recursive-include elephant/spade_src *.so *.pyd
 include elephant/test/spike_extraction_test_data.txt
 recursive-include doc *
diff --git a/elephant/gpfa.py b/elephant/gpfa.py
new file mode 100644
index 000000000..9edcc2d33
--- /dev/null
+++ b/elephant/gpfa.py
@@ -0,0 +1,442 @@
+"""
+Gaussian-process factor analysis (GPFA) is a dimensionality reduction method
+ [1] for neural trajectory visualization of parallel spike trains.
+
+The input consists of a set of trials (Y), each containing a list of spike
+trains (N neurons). The output is the projection (X) of the data in space
+of pre-chosen dimension x_dim < N.
+
+Under the assumption of a linear relation between the latent
+variable X and the actual data Y in addition to a noise term (i.e.,
+Y = C * X + d + Gauss(0,R)), the projection corresponds to the conditional
+probability E[X|Y].
+
+A Gaussian process (X) of dimension x_dim < N is adopted to extract smooth
+neural trajectories. The parameters (C, d, R) are estimated from the data using
+factor analysis technique. GPFA is simply a set of Factor Analyzers (FA),
+linked together in the low dimensional space by a Gaussian Process (GP).
+
+Internally, the analysis consists of the following steps:
+
+0) bin the data to get a sequence of N dimensional vectors for each time
+   bin (cf., `gpfa_util.get_seq`), and choose the reduced dimension x_dim
+
+1) expectation maximization for the parameters C, d, R and the time-scale of
+   the gaussian process, using all the trials provided as input (cf.,
+   `gpfa_core.em`)
+
+2) projection of single trial in the low dimensional space (cf.,
+   `gpfa_core.exact_inference_with_ll`)
+
+3) orthonormalization of the matrix C and the corresponding subspace:
+   (cf., `gpfa_util.orthogonalize`)
+
+
+There are two principle scenarios of using the GPFA analysis. In the first
+scenario, only one single dataset is available. The parameters that describe
+the transformation are first extracted from the data, and the orthonormal basis
+is constructed. Then the same data is projected into this basis. This analysis
+is performed using the `gpfa()` function.
+
+In the second scenario, both a training and a test data set is available. Here,
+the parameters are estimated from the training data. In a second step the test
+data is projected into the non-orthonormal space obtained from the training
+data, and then orthonormalized. From this scenario, it is possible to perform a
+cross-validation between training and test data sets. This analysis is
+performed by exectuing first `extract_trajectory()` on the training data,
+followed by `postprocess()` on the training and test datasets.
+
+
+References:
+
+The code was ported from the MATLAB code (see INFO.md for more details).
+
+[1] Yu MB, Cunningham JP, Santhanam G, Ryu SI, Shenoy K V, Sahani M (2009)
+Gaussian-process factor analysis for low-dimensional single-trial analysis of
+neural population activity. J Neurophysiol 102:614-635.
+
+:copyright: Copyright 2015-2019 by the Elephant team, see AUTHORS.txt.
+:license: Modified BSD, see LICENSE.txt for details.
+"""
+
+
+import numpy as np
+import neo
+import quantities as pq
+
+from elephant.gpfa_src import gpfa_core, gpfa_util
+
+
+def extract_trajectory(seqs, bin_size=20., x_dim=3, min_var_frac=0.01,
+                       em_max_iters=500, verbose=False):
+    """
+    Prepares data, estimates parameters of the latent variables and extracts
+    neural trajectories.
+
+    Parameters
+    ----------
+    seqs : np.recarray
+        data structure, whose nth entry (corresponding to the nth experimental
+        trial) has fields
+            * trialId: unique trial identifier
+            * T: (1 x 1) number of timesteps
+            * y: (yDim x T) neural data
+    bin_size : float, optional
+        Width of each time bin in ms (magnitude).
+        Default is 20 ms.
+    x_dim : int, optional
+        State dimensionality.
+        Default is 3.
+    min_var_frac : float, optional
+                   fraction of overall data variance for each observed
+                   dimension to set as the private variance floor.  This is
+                   used to combat Heywood cases, where ML parameter learning
+                   returns one or more zero private variances. (default: 0.01)
+                   (See Martin & McDonald, Psychometrika, Dec 1975.)
+    em_max_iters : int, optional
+        Number of EM iterations to run (default: 500).
+    verbose : bool, optional
+              specifies whether to display status messages (default: False)
+
+    Returns
+    -------
+
+    parameter_estimates: dict
+        Estimated model parameters.
+        When the GPFA method is used, following parameters are contained
+            covType: {'rbf', 'tri', 'logexp'}
+                type of GP covariance.
+                Currently, only 'rbf' is supported.
+            gamma: ndarray of shape (1, #latent_vars)
+                related to GP timescales by 'bin_width / sqrt(gamma)'
+            eps: ndarray of shape (1, #latent_vars)
+                GP noise variances
+            d: ndarray of shape (#units, 1)
+                observation mean
+            C: ndarray of shape (#units, #latent_vars)
+                mapping between the neuronal data space and the latent variable
+                space
+            R: ndarray of shape (#units, #latent_vars)
+                observation noise covariance
+
+    seqs_train: np.recarray
+        Contains the embedding of the training data into the latent variable
+        space.
+        Data structure, whose n-th entry (corresponding to the n-th
+        experimental trial) has fields
+            * trialId: int
+                unique trial identifier
+            * T: int
+                number of timesteps
+            * y: ndarray of shape (#units, #bins)
+                neural data
+            * xsm: ndarray of shape (#latent_vars, #bins)
+                posterior mean of latent variables at each time bin
+            * Vsm: ndarray of shape (#latent_vars, #latent_vars, #bins)
+                posterior covariance between latent variables at each
+                timepoint
+            * VsmGP: ndarray of shape (#bins, #bins, #latent_vars)
+                posterior covariance over time for each latent variable
+
+    fit_info: dict
+        Information of the fitting process and the parameters used there:
+            * iteration_time: A list containing the runtime for each iteration
+                step in the EM algorithm.
+            * log_likelihood: float, maximized likelihood obtained in the
+                E-step of the EM algorithm.
+            * bin_size: int, Width of the bins.
+            * cvf: int, number for cross-validation folding
+                Default is 0 (no cross-validation).
+            * has_spikes_bool: Indicates if a neuron has any spikes across
+                trials.
+            * method: str, Method name.
+
+    Raises
+    ------
+    ValueError
+        If `seqs` is empty.
+
+    """
+    if len(seqs) == 0:
+        raise ValueError("Got empty trials.")
+
+    # Set cross-validation folds
+    num_trials = len(seqs)
+
+    test_mask = np.full(num_trials, False, dtype=bool)
+    train_mask = ~test_mask
+
+    tr = np.arange(num_trials, dtype=np.int)
+    train_trial_idx = tr[train_mask]
+    test_trial_idx = tr[test_mask]
+    seqs_train = seqs[train_trial_idx]
+    seqs_test = seqs[test_trial_idx]
+
+    # Remove inactive units based on training set
+    has_spikes_bool = (np.hstack(seqs_train['y']).mean(1) != 0)
+
+    for seq_train in seqs_train:
+        seq_train['y'] = seq_train['y'][has_spikes_bool, :]
+    for seq_test in seqs_test:
+        seq_test['y'] = seq_test['y'][has_spikes_bool, :]
+
+    # Check if training data covariance is full rank
+    y_all = np.hstack(seqs_train['y'])
+    y_dim = y_all.shape[0]
+
+    if np.linalg.matrix_rank(np.cov(y_all)) < y_dim:
+        errmesg = 'Observation covariance matrix is rank deficient.\n' \
+                  'Possible causes: ' \
+                  'repeated units, not enough observations.'
+        raise ValueError(errmesg)
+
+    if verbose:
+        print('Number of training trials: {}'.format(len(seqs_train)))
+        print('Number of test trials: {}'.format(len(seqs_test)))
+        print('Latent space dimensionality: {}'.format(x_dim))
+        print('Observation dimensionality: {}'.format(has_spikes_bool.sum()))
+
+    # The following does the heavy lifting.
+    params_est, seqs_train, fit_info = gpfa_core.gpfa_engine(
+        seq_train=seqs_train,
+        seq_test=seqs_test,
+        x_dim=x_dim,
+        bin_width=bin_size,
+        min_var_frac=min_var_frac,
+        em_max_iters=em_max_iters,
+        verbose=verbose)
+
+    fit_info['has_spikes_bool'] = has_spikes_bool
+    fit_info['min_var_frac'] = min_var_frac
+    fit_info['bin_size'] = bin_size
+
+    return params_est, seqs_train, fit_info
+
+
+def postprocess(params_est, seqs_train, seqs_test=None):
+    """
+    Orthonormalization and other cleanup.
+
+    Parameters
+    ----------
+
+    params_est : dict
+        First return value of extract_trajectory() on the training data set.
+        Estimated model parameters.
+        When the GPFA method is used, following parameters are contained
+            covType: {'rbf', 'tri', 'logexp'}
+                type of GP covariance
+                Currently, only 'rbf' is supported.
+            gamma: ndarray of shape (1, #latent_vars)
+                related to GP timescales by 'bin_width / sqrt(gamma)'
+            eps: ndarray of shape (1, #latent_vars)
+                GP noise variances
+            d: ndarray of shape (#units, 1)
+                observation mean
+            C: ndarray of shape (#units, #latent_vars)
+                mapping between the neuronal data space and the latent variable
+                space
+            R: ndarray of shape (#units, #latent_vars)
+                observation noise covariance
+
+    seqs_train: np.recarray
+        Contains the embedding of the training data into the latent variable
+        space.
+        Data structure, whose n-th entry (corresponding to the n-th
+        experimental trial) has fields
+            * trialId: int
+                unique trial identifier
+            * T: int
+                number of timesteps
+            * y: ndarray of shape (#units, #bins)
+                neural data
+            * xsm: ndarray of shape (#latent_vars, #bins)
+                posterior mean of latent variables at each time bin
+            * Vsm: ndarray of shape (#latent_vars, #latent_vars, #bins)
+                posterior covariance between latent variables at each
+                timepoint
+            * VsmGP: ndarray of shape (#bins, #bins, #latent_vars)
+                posterior covariance over time for each latent variable
+
+    seqs_test: np.recarray, optional
+        Contains the embedding of the test data into the latent variable space.
+        Same data structure as for `seqs_train`.
+        Default is None.
+
+    Returns
+    -------
+
+    params_est : dict
+        Estimated model parameters, including `Corth`, obtained by
+        orthonormalizing the columns of C.
+    seqs_train : np.recarray
+        Training data structure that contains the new field `xorth`,
+        the orthonormalized neural trajectories.
+    seqs_test : np.recarray
+        Test data structure that contains orthonormalized neural
+        trajectories in `xorth`, obtained using `params_est`.
+        When no test dataset is given, None is returned.
+
+
+    Raises
+    ------
+    ValueError
+        If `fit_info['kernSDList'] != kern_sd`.
+
+    """
+    C = params_est['C']
+    X = np.hstack(seqs_train['xsm'])
+    Xorth, Corth, _ = gpfa_util.orthogonalize(X, C)
+    seqs_train = gpfa_util.segment_by_trial(seqs_train, Xorth, 'xorth')
+
+    params_est['Corth'] = Corth
+
+    if seqs_test is not None:
+        print("Extracting neural trajectories for test data...\n")
+
+        # Copy additional fields from extract_trajectory from training data to
+        # test data
+        seqs_test_dup = seqs_train.copy()
+        seqs_test_dup["T"] = seqs_test["T"]
+        seqs_test_dup["y"] = seqs_test["y"]
+        seqs_test_dup["trialId"] = seqs_test["trialId"]
+
+        seqs_test = seqs_test_dup
+        seqs_test = gpfa_core.exact_inference_with_ll(seqs_test, params_est)
+        X = np.hstack(seqs_test['xsm'])
+        Xorth, Corth, _ = gpfa_util.orthogonalize(X, C)
+        seqs_test = gpfa_util.segment_by_trial(seqs_test, Xorth, 'xorth')
+
+    return params_est, seqs_train, seqs_test
+
+
+def gpfa(data, bin_size=20*pq.ms, x_dim=3, em_max_iters=500):
+    """
+    Prepares data and calls functions for extracting neural trajectories in the
+    orthonormal space.
+
+    The function combines calls to `extract_trajectory()` and `postprocess()`
+    in a scenario, where no cross-validation of the embedding is performed.
+
+    Parameters
+    ----------
+
+    data : list of list of Spiketrain objects
+        The outer list corresponds to trials and the inner list corresponds to
+        the neurons recorded in that trial, such that data[l][n] is the
+        Spiketrain of neuron n in trial l. Note that the number and order of
+        Spiketrains objects per trial must be fixed such that data[l][n] and
+        data[k][n] refer to the same spike generator for any choice of l,k and
+        n.
+    bin_size : quantities.Quantity, optional
+        Width of each time bin.
+        Default is 20 ms.
+    x_dim : int, optional
+        State dimensionality.
+        Default is 3.
+    em_max_iters : int, optional
+        Number of EM iterations to run (default: 500).
+
+    Returns
+    -------
+
+    parameter_estimates: dict
+        Estimated model parameters.
+        When the GPFA method is used, following parameters are contained
+            covType: {'rbf', 'tri', 'logexp'}
+                type of GP covariance
+                Currently, only 'rbf' is supported.
+            gamma: ndarray of shape (1, #latent_vars)
+                related to GP timescales by 'bin_width / sqrt(gamma)'
+            eps: ndarray of shape (1, #latent_vars)
+                GP noise variances
+            d: ndarray of shape (#units, 1)
+                observation mean
+            C: ndarray of shape (#units, #latent_vars)
+                mapping between the neuronal data space and the latent variable
+                space
+            Corth: ndarray of shape (#units, #latent_vars)
+                mapping between the neuronal data space and the orthonormal
+                latent variable space
+            R: ndarray of shape (#units, #latent_vars)
+                observation noise covariance
+
+    seqs_train: numpy.recarray
+        Contains the embedding of the data into the latent variable space.
+        Data structure, whose n-th entry (corresponding to the n-th
+        experimental trial) has fields
+            * trialId: int
+                unique trial identifier
+            * T: int
+                number of timesteps
+            * y: ndarray of shape (#units, #bins)
+                neural data
+            * xsm: ndarray of shape (#latent_vars, #bins)
+                posterior mean of latent variables at each time bin
+            * Vsm: ndarray of shape (#latent_vars, #latent_vars, #bins)
+                posterior covariance between latent variables at each
+                timepoint
+            * VsmGP: ndarray of shape (#bins, #bins, #latent_vars)
+                posterior covariance over time for each latent variable
+            * xorth: ndarray of shape (#latent_vars, #bins)
+                trajectory in the orthonormalized space
+
+    fit_info: dict
+        Information of the fitting process and the parameters used there:
+            * iteration_time: A list containing the runtime for each iteration
+                step in the EM algorithm.
+            * log_likelihood: float, maximized likelihood obtained in the
+                E-step of the EM algorithm.
+            * bin_size: int, Width of the bins.
+            * cvf: int, number for cross-validation folding
+                Default is 0 (no cross-validation).
+            * has_spikes_bool: Indicates if a neuron has any spikes across
+                trials.
+            * method: str, Method name.
+
+    Raises
+    ------
+    ValueError
+        If `data` is an empty list.
+        If `bin_size` if not a `pq.Quantity`.
+        If `data[0][1][0]` is not a `neo.SpikeTrain`.
+
+    Examples
+    --------
+    In the following example, we calculate the neural trajectories of 20
+    Poisson spike train generators recorded in 50 trials with randomized
+    rates up to 100 Hz.
+
+    >>> import numpy as np
+    >>> import quantities as pq
+    >>> from elephant.gpfa import gpfa
+    >>> from elephant.spike_train_generation import homogeneous_poisson_process
+    >>> data = []
+    >>> for trial in range(50):
+    >>>     n_channels = 20
+    >>>     firing_rates = np.random.randint(low=1, high=100,
+    >>>         size=n_channels) * pq.Hz
+    >>>     spike_times = [homogeneous_poisson_process(rate=rate)
+    >>>         for rate in firing_rates]
+    >>>     data.append((trial, spike_times))
+    >>> params_est, seqs_train, seqs_test, fit_info = gpfa(
+    >>>     data, bin_size=20 * pq.ms, x_dim=8)
+
+    """
+    # todo does it makes sense to explicitly pass trial_id?
+    if len(data) == 0:
+        raise ValueError("`data` cannot be empty")
+    if not isinstance(bin_size, pq.Quantity):
+        raise ValueError("'bin_size' must be of type pq.Quantity")
+    if not isinstance(data[0][1][0], neo.SpikeTrain):
+        raise ValueError("structure of the data is not correct: 0-axis "
+                         "should be trials, 1-axis neo spike trains "
+                         "and 2-axis spike times")
+
+    seqs = gpfa_util.get_seq(data, bin_size)
+    params_est, seqs_train, fit_info = extract_trajectory(
+        seqs, bin_size=bin_size.rescale('ms').magnitude, x_dim=x_dim,
+        em_max_iters=em_max_iters)
+    params_est, seqs_train, _ = postprocess(params_est, seqs_train)
+
+    return params_est, seqs_train, fit_info
diff --git a/elephant/gpfa_src/INFO.md b/elephant/gpfa_src/INFO.md
new file mode 100644
index 000000000..84bd5a95e
--- /dev/null
+++ b/elephant/gpfa_src/INFO.md
@@ -0,0 +1,7 @@
+# GPFA
+Gaussian process factor analysis (GPFA), based on Byron Yu's MATLAB implementation
+
+The original MATLAB code is available at Byron Yu's website:
+https://users.ece.cmu.edu/~byronyu/software.shtml
+
+Extra requirements: scikit-learn, tqdm
\ No newline at end of file
diff --git a/elephant/gpfa_src/__init__.py b/elephant/gpfa_src/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/elephant/gpfa_src/gpfa_core.py b/elephant/gpfa_src/gpfa_core.py
new file mode 100644
index 000000000..6d3eee0ec
--- /dev/null
+++ b/elephant/gpfa_src/gpfa_core.py
@@ -0,0 +1,526 @@
+# -*- coding: utf-8 -*-
+"""
+GPFA core functionality.
+
+:copyright: Copyright 2015-2019 by the Elephant team, see AUTHORS.txt.
+:license: Modified BSD, see LICENSE.txt for details.
+"""
+
+
+import time
+import warnings
+
+import numpy as np
+import scipy.linalg as linalg
+import scipy.optimize as optimize
+import scipy.sparse as sparse
+from sklearn.decomposition import FactorAnalysis
+from tqdm import trange
+
+from . import gpfa_util
+
+
+def learn_gp_params(seq, params, verbose=False):
+    """Updates parameters of GP state model, given neural trajectories.
+
+    Parameters
+    ----------
+    seq : np.recarray
+          data structure containing neural trajectories;
+    params : dict
+             current GP state model parameters, which gives starting point
+             for gradient optimization;
+    verbose : bool, optional
+              specifies whether to display status messages (default: False)
+
+    Returns
+    -------
+    param_opt : np.ndarray
+                updated GP state model parameter
+
+    Raises
+    ------
+    ValueError
+        If `params['covType'] != 'rbf'`.
+        If `params['notes']['learnGPNoise']` set to True.
+
+    """
+    if params['covType'] != 'rbf':
+        raise ValueError("Only 'rbf' GP covariance type is supported.")
+    if params['notes']['learnGPNoise']:
+        raise ValueError("learnGPNoise is not supported.")
+    param_name = 'gamma'
+    fname = 'gpfa_util.grad_betgam'
+
+    param_init = params[param_name]
+    param_opt = {param_name: np.empty_like(param_init)}
+
+    x_dim = param_init.shape[-1]
+    precomp = gpfa_util.make_precomp(seq, x_dim)
+
+    # Loop once for each state dimension (each GP)
+    for i in range(x_dim):
+        const = {'eps': params['eps'][i]}
+        initp = np.log(param_init[i])
+        res_opt = optimize.minimize(eval(fname), initp,
+                                    args=(precomp[i], const),
+                                    method='L-BFGS-B', jac=True)
+        param_opt['gamma'][i] = np.exp(res_opt.x)
+
+        if verbose:
+            print('\n Converged p; xDim:{}, p:{}'.format(i, res_opt.x))
+
+    return param_opt
+
+
+def exact_inference_with_ll(seq, params, get_ll=True):
+    """
+    Extracts latent trajectories from neural data, given GPFA model parameters.
+
+    Parameters
+    ----------
+    seq : np.recarray
+          Input data structure, whose n-th element (corresponding to the n-th
+          experimental trial) has fields:
+              y : ndarray of shape (#units, #bins)
+                  neural data
+              T : int
+                  number of bins
+    params : dict
+             GPFA model parameters whe the following fields:
+                C : ndarray
+                    FA factor loadings matrix
+                d : ndarray
+                    FA mean vector
+                R : ndarray
+                    FA noise covariance matrix
+                gamma : ndarray
+                        GP timescale
+                eps : ndarray
+                      GP noise variance
+    get_ll : bool, optional
+             specifies whether to compute data log likelihood (default: True)
+
+    Returns
+    -------
+    seq_lat : np.recarray
+              a copy of the input data structure, augmented with the new
+              fields:
+                  xsm : ndarray of shape (#latent_vars x #bins)
+                        posterior mean of latent variables at each time bin
+                  Vsm : ndarray of shape (#latent_vars, #latent_vars, #bins)
+                        posterior covariance between latent variables at each
+                        timepoint
+                  VsmGP : ndarray of shape (#bins, #bins, #latent_vars)
+                          posterior covariance over time for each latent
+                          variable
+    ll : float
+         data log likelihood, np.nan is returned when `get_ll` is set False
+    """
+    y_dim, x_dim = params['C'].shape
+
+    # copy the contents of the input data structure to output structure
+    dtype_out = [(x, seq[x].dtype) for x in seq.dtype.names]
+    dtype_out.extend([('xsm', np.object), ('Vsm', np.object),
+                      ('VsmGP', np.object)])
+    seq_lat = np.empty(len(seq), dtype=dtype_out)
+    for dtype_name in seq.dtype.names:
+        seq_lat[dtype_name] = seq[dtype_name]
+
+    # Precomputations
+    if params['notes']['RforceDiagonal']:
+        rinv = np.diag(1.0 / np.diag(params['R']))
+        logdet_r = (np.log(np.diag(params['R']))).sum()
+    else:
+        rinv = linalg.inv(params['R'])
+        rinv = (rinv + rinv.T) / 2  # ensure symmetry
+        logdet_r = gpfa_util.logdet(params['R'])
+
+    c_rinv = params['C'].T.dot(rinv)
+    c_rinv_c = c_rinv.dot(params['C'])
+
+    t_all = seq_lat['T']
+    t_uniq = np.unique(t_all)
+    ll = 0.
+
+    # Overview:
+    # - Outer loop on each element of Tu.
+    # - For each element of Tu, find all trials with that length.
+    # - Do inference and LL computation for all those trials together.
+    for t in t_uniq:
+        k_big, k_big_inv, logdet_k_big = gpfa_util.make_k_big(params, t)
+        k_big = sparse.csr_matrix(k_big)
+
+        blah = [c_rinv_c for _ in range(t)]
+        c_rinv_c_big = linalg.block_diag(*blah)  # (xDim*T) x (xDim*T)
+        minv, logdet_m = gpfa_util.inv_persymm(k_big_inv + c_rinv_c_big, x_dim)
+
+        # Note that posterior covariance does not depend on observations,
+        # so can compute once for all trials with same T.
+        # xDim x xDim posterior covariance for each timepoint
+        vsm = np.full((x_dim, x_dim, t), np.nan)
+        idx = np.arange(0, x_dim * t + 1, x_dim)
+        for i in range(t):
+            vsm[:, :, i] = minv[idx[i]:idx[i + 1], idx[i]:idx[i + 1]]
+
+        # T x T posterior covariance for each GP
+        vsm_gp = np.full((t, t, x_dim), np.nan)
+        for i in range(x_dim):
+            vsm_gp[:, :, i] = minv[i::x_dim, i::x_dim]
+
+        # Process all trials with length T
+        n_list = np.where(t_all == t)[0]
+        # dif is yDim x sum(T)
+        dif = np.hstack(seq_lat[n_list]['y']) - params['d'][:, np.newaxis]
+        # term1Mat is (xDim*T) x length(nList)
+        term1_mat = c_rinv.dot(dif).reshape((x_dim * t, -1), order='F')
+
+        # Compute blkProd = CRinvC_big * invM efficiently
+        # blkProd is block persymmetric, so just compute top half
+        t_half = np.int(np.ceil(t / 2.0))
+        blk_prod = np.zeros((x_dim * t_half, x_dim * t))
+        idx = range(0, x_dim * t_half + 1, x_dim)
+        for i in range(t_half):
+            blk_prod[idx[i]:idx[i + 1], :] = c_rinv_c.dot(
+                minv[idx[i]:idx[i + 1], :])
+        blk_prod = k_big[:x_dim * t_half, :].dot(
+            gpfa_util.fill_persymm(np.eye(x_dim * t_half, x_dim * t) -
+                                   blk_prod, x_dim, t))
+        # xsmMat is (xDim*T) x length(nList)
+        xsm_mat = gpfa_util.fill_persymm(blk_prod, x_dim, t).dot(term1_mat)
+
+        for i, n in enumerate(n_list):
+            seq_lat[n]['xsm'] = xsm_mat[:, i].reshape((x_dim, t), order='F')
+            seq_lat[n]['Vsm'] = vsm
+            seq_lat[n]['VsmGP'] = vsm_gp
+
+        if get_ll:
+            # Compute data likelihood
+            val = -t * logdet_r - logdet_k_big - logdet_m \
+                  - y_dim * t * np.log(2 * np.pi)
+            ll = ll + len(n_list) * val - (rinv.dot(dif) * dif).sum() \
+                + (term1_mat.T.dot(minv) * term1_mat.T).sum()
+
+    if get_ll:
+        ll /= 2
+    else:
+        ll = np.nan
+
+    return seq_lat, ll
+
+
+def em(params_init, seq, em_max_iters=500, tol=1.0E-8, min_var_frac=0.01,
+       freq_ll=5, verbose=False):
+    """
+    Fits GPFA model parameters using expectation-maximization (EM) algorithm.
+
+    Parameters
+    ----------
+    params_init : dict
+                  GPFA model parameters at which EM algorithm is initialized
+                      covType : {'rbf', 'tri', 'logexp'}
+                                type of GP covariance
+                      gamma : ndarray of shape (1, #latent_vars)
+                              related to GP timescales by
+                              'bin_width / sqrt(gamma)'
+                      eps : ndarray of shape (1, #latent_vars)
+                            GP noise variances
+                      d : ndarray of shape (#units, 1)
+                          observation mean
+                      C : ndarray of shape (#units, #latent_vars)
+                          mapping between the neuronal data space and the
+                          latent variable space
+                      R : ndarray of shape (#units, #latent_vars)
+                          observation noise covariance
+    seq : np.recarray
+          training data structure, whose n-th entry (corresponding to the n-th
+           experimental trial) has fields
+              trialId :
+                        unique trial identifier
+              T : int
+                  number of bins
+              y : ndarray (yDim x T)
+                  neural data
+    em_max_iters : int, optional
+                   number of EM iterations to run (default: 500)
+    tol : float, optional
+          stopping criterion for EM (default: 1e-8)
+    min_var_frac : float, optional
+                   fraction of overall data variance for each observed
+                   dimension to set as the private variance floor.  This is
+                   used to combat Heywood cases, where ML parameter learning
+                   returns one or more zero private variances. (default: 0.01)
+                   (See Martin & McDonald, Psychometrika, Dec 1975.)
+    freq_ll : int, optional
+              data likelihood is computed at every freq_ll EM iterations.
+              freq_ll = 1 means that data likelihood is computed at every
+              iteration. (default: 5)
+    verbose : bool, optional
+              specifies whether to display status messages (default: false)
+
+    Returns
+    -------
+    params_est : dict
+                 GPFA model parameter estimates, returned by EM algorithm (same
+                 format as params_init)
+    seq_lat : dict
+              a copy of the training data structure, augmented with the new
+              fields:
+                  xsm : ndarray of shape (#latent_vars x #bins)
+                        posterior mean of latent variables at each time bin
+                  Vsm : ndarray of shape (#latent_vars, #latent_vars, #bins)
+                        posterior covariance between latent variables at each
+                        timepoint
+                  VsmGP : ndarray of shape (#bins, #bins, #latent_vars)
+                          posterior covariance over time for each latent
+                          variable
+    ll : list
+        list of log likelihoods after each EM iteration
+    iter_time : list
+        lisf of computation times (in seconds) for each EM iteration
+    """
+    params = params_init
+    t = seq['T']
+    y_dim, x_dim = params['C'].shape
+    lls = []
+    ll_old = ll_base = ll = 0.0
+    iter_time = []
+    var_floor = min_var_frac * np.diag(np.cov(np.hstack(seq['y'])))
+    seq_lat = None
+
+    # Loop once for each iteration of EM algorithm
+    for iter_id in trange(1, em_max_iters + 1, desc='EM iteration'):
+        if verbose:
+            print()
+        tic = time.time()
+
+        if (np.fmod(iter_id, freq_ll) == 0) or (iter_id <= 2):
+            get_ll = True
+        else:
+            get_ll = False
+
+        # ==== E STEP =====
+        if not np.isnan(ll):
+            ll_old = ll
+        seq_lat, ll = exact_inference_with_ll(seq, params, get_ll=get_ll)
+        lls.append(ll)
+
+        # ==== M STEP ====
+        sum_p_auto = np.zeros((x_dim, x_dim))
+        for seq_lat_n in seq_lat:
+            sum_p_auto += seq_lat_n['Vsm'].sum(axis=2) \
+                + seq_lat_n['xsm'].dot(seq_lat_n['xsm'].T)
+        y = np.hstack(seq['y'])
+        xsm = np.hstack(seq_lat['xsm'])
+        sum_yxtrans = y.dot(xsm.T)
+        sum_xall = xsm.sum(axis=1)[:, np.newaxis]
+        sum_yall = y.sum(axis=1)[:, np.newaxis]
+
+        # term is (xDim+1) x (xDim+1)
+        term = np.vstack([np.hstack([sum_p_auto, sum_xall]),
+                          np.hstack([sum_xall.T, t.sum().reshape((1, 1))])])
+        # yDim x (xDim+1)
+        cd = gpfa_util.rdiv(np.hstack([sum_yxtrans, sum_yall]), term)
+
+        params['C'] = cd[:, :x_dim]
+        params['d'] = cd[:, -1]
+
+        # yCent must be based on the new d
+        # yCent = bsxfun(@minus, [seq.y], currentParams.d);
+        # R = (yCent * yCent' - (yCent * [seq.xsm]') * currentParams.C')
+        #     / sum(T);
+        c = params['C']
+        d = params['d'][:, np.newaxis]
+        if params['notes']['RforceDiagonal']:
+            sum_yytrans = (y * y).sum(axis=1)[:, np.newaxis]
+            yd = sum_yall * d
+            term = ((sum_yxtrans - d.dot(sum_xall.T)) * c).sum(axis=1)
+            term = term[:, np.newaxis]
+            r = d ** 2 + (sum_yytrans - 2 * yd - term) / t.sum()
+
+            # Set minimum private variance
+            r = np.maximum(var_floor, r)
+            params['R'] = np.diag(r[:, 0])
+        else:
+            sum_yytrans = y.dot(y.T)
+            yd = sum_yall.dot(d.T)
+            term = (sum_yxtrans - d.dot(sum_xall.T)).dot(c.T)
+            r = d.dot(d.T) + (sum_yytrans - yd - yd.T - term) / t.sum()
+
+            params['R'] = (r + r.T) / 2  # ensure symmetry
+
+        if params['notes']['learnKernelParams']:
+            res = learn_gp_params(seq_lat, params, verbose=verbose)
+            params['gamma'] = res['gamma']
+
+        t_end = time.time() - tic
+        iter_time.append(t_end)
+
+        # Verify that likelihood is growing monotonically
+        if iter_id <= 2:
+            ll_base = ll
+        elif verbose and ll < ll_old:
+            print('\nError: Data likelihood has decreased ',
+                  'from {0} to {1}'.format(ll_old, ll))
+        elif (ll - ll_base) < (1 + tol) * (ll_old - ll_base):
+            break
+
+    if len(lls) < em_max_iters:
+        print('Fitting has converged after {0} EM iterations.)'.format(
+            len(lls)))
+
+    if np.any(np.diag(params['R']) == var_floor):
+        warnings.warn('Private variance floor used for one or more observed '
+                      'dimensions in GPFA.')
+
+    return params, seq_lat, lls, iter_time
+
+
+def gpfa_engine(seq_train, seq_test, x_dim=8, bin_width=20.0, tau_init=100.0,
+                eps_init=1.0E-3, min_var_frac=0.01, em_max_iters=500,
+                verbose=False):
+    """Extract neural trajectories using GPFA.
+
+    Parameters
+    ----------
+    seq_train : np.recarray
+                training data structure, whose n-th element (corresponding to
+                the n-th experimental trial) has fields
+                    trialId :
+                              unique trial identifier
+                    T : int
+                        number of bins
+                    y : ndarray of shape (#units, #bins)
+                        neural data
+    seq_test : np.recarray
+               test data structure (same format as seqTrain)
+    x_dim : int, optional
+            state dimensionality (default: 3)
+    bin_width : float, optional
+                spike bin width in msec (default: 20)
+    tau_init : float, optional
+               GP timescale initialization in msec (default: 100)
+    eps_init : float, optional
+               GP noise variance initialization (default: 1e-3)
+    min_var_frac : float, optional
+                   fraction of overall data variance for each observed
+                   dimension to set as the private variance floor.  This is
+                   used to combat Heywood cases, where ML parameter learning
+                   returns one or more zero private variances. (default: 0.01)
+                   (See Martin & McDonald, Psychometrika, Dec 1975.)
+    em_max_iters : int, optional
+                   number of EM iterations to run (default: 500)
+    verbose : bool, optional
+              specifies whether to display status messages (default: False)
+
+    Returns
+    -------
+
+    parameter_estimates: dict
+        Estimated model parameters.
+        When the GPFA method is used, following parameters are contained
+            covType: {'rbf', 'tri', 'logexp'}
+                type of GP covariance
+            gamma: ndarray of shape (1, #latent_vars)
+                related to GP timescales by 'bin_width / sqrt(gamma)'
+            eps: ndarray of shape (1, #latent_vars)
+                GP noise variances
+            d: ndarray of shape (#units, 1)
+                observation mean
+            C: ndarray of shape (#units, #latent_vars)
+                mapping between the neuronal data space and the latent variable
+                space
+            R: ndarray of shape (#units, #latent_vars)
+                observation noise covariance
+
+    seqs_train: np.recarray
+        Data structure, whose n-th entry (corresponding to the n-th
+        experimental trial) has fields
+            * trialId: int
+                unique trial identifier
+            * T: int
+                number of timesteps
+            * y: ndarray of shape (#units, #bins)
+                neural data
+            * xsm: ndarray of shape (#latent_vars, #bins)
+                posterior mean of latent variables at each time bin
+            * Vsm: ndarray of shape (#latent_vars, #latent_vars, #bins)
+                posterior covariance between latent variables at each
+                timepoint
+            * VsmGP: ndarray of shape (#bins, #bins, #latent_vars)
+                posterior covariance over time for each latent variable
+
+    fit_info: dict
+        Information of the fitting process and the parameters used there:
+            * iteration_time: A list containing the runtime for each iteration
+                step in the EM algorithm.
+            * log_likelihood: float, maximized likelihood obtained in the
+                E-step of the EM algorithm.
+            * bin_size: int, Width of the bins.
+            * cvf: int, number for cross-validation folding
+                Default is 0 (no cross-validation).
+            * has_spikes_bool: Indicates if a neuron has any spikes across
+                trials.
+            * method: str, Method name.
+    """
+    # For compute efficiency, train on equal-length segments of trials
+    seq_train_cut = gpfa_util.cut_trials(seq_train)
+    if len(seq_train_cut) == 0:
+        warnings.warn('No segments extracted for training. Defaulting to '
+                      'segLength=Inf.')
+        seq_train_cut = gpfa_util.cut_trials(seq_train, seg_length=np.inf)
+
+    # ==================================
+    # Initialize state model parameters
+    # ==================================
+    params_init = dict()
+    params_init['covType'] = 'rbf'
+    # GP timescale
+    # Assume binWidth is the time step size.
+    params_init['gamma'] = (bin_width / tau_init) ** 2 * np.ones(x_dim)
+    # GP noise variance
+    params_init['eps'] = eps_init * np.ones(x_dim)
+
+    # ========================================
+    # Initialize observation model parameters
+    # ========================================
+    print('Initializing parameters using factor analysis...')
+
+    y_all = np.hstack(seq_train_cut['y'])
+    fa = FactorAnalysis(n_components=x_dim, copy=True,
+                        noise_variance_init=np.diag(np.cov(y_all, bias=True)))
+    fa.fit(y_all.T)
+    params_init['d'] = y_all.mean(axis=1)
+    params_init['C'] = fa.components_.T
+    params_init['R'] = np.diag(fa.noise_variance_)
+
+    # Define parameter constraints
+    params_init['notes'] = {
+        'learnKernelParams': True,
+        'learnGPNoise': False,
+        'RforceDiagonal': True,
+    }
+
+    # =====================
+    # Fit model parameters
+    # =====================
+    print('\nFitting GPFA model...')
+
+    params_est, seq_train_cut, ll_cut, iter_time = em(
+        params_init, seq_train_cut, min_var_frac=min_var_frac,
+        em_max_iters=em_max_iters, verbose=verbose)
+
+    # Extract neural trajectories for original, unsegmented trials
+    # using learned parameters
+    seq_train, ll_train = exact_inference_with_ll(seq_train, params_est)
+
+    if len(seq_test) > 0:
+        pass
+        # TODO: include the assessment of generalization performance
+
+    fit_info = {'iteration_time': iter_time, 'log_likelihood': ll_train}
+    return params_est, seq_train, fit_info
+
+
+def two_stage_engine(seqTrain, seqTest, typ='fa', xDim=3, binWidth=20.0):
+    raise NotImplemented
diff --git a/elephant/gpfa_src/gpfa_util.py b/elephant/gpfa_src/gpfa_util.py
new file mode 100644
index 000000000..6049ca3b9
--- /dev/null
+++ b/elephant/gpfa_src/gpfa_util.py
@@ -0,0 +1,596 @@
+# -*- coding: utf-8 -*-
+"""
+GPFA util functions.
+
+:copyright: Copyright 2015-2019 by the Elephant team, see AUTHORS.txt.
+:license: Modified BSD, see LICENSE.txt for details.
+"""
+
+
+import warnings
+
+import numpy as np
+import quantities as pq
+import scipy as sp
+
+from elephant.conversion import BinnedSpikeTrain
+
+
+def get_seq(data, bin_size, use_sqrt=True):
+    """
+    Converts the data into a rec array using internally BinnedSpikeTrain.
+
+    Parameters
+    ----------
+
+    data : list of list of Spiketrain objects
+        The outer list corresponds to trials and the inner list corresponds to
+        the neurons recorded in that trial, such that data[l][n] is the
+        Spiketrain of neuron n in trial l. Note that the number and order of
+        Spiketrains objects per trial must be fixed such that data[l][n] and
+        data[k][n] refer to the same spike generator for any choice of l,k and
+        n.
+    bin_size: quantity.Quantity
+        Spike bin width
+
+    use_sqrt: bool
+        Boolean specifying whether or not to use square-root transform on
+        spike counts (see original paper for motivation).
+        Default is  True
+
+    Returns
+    -------
+
+    seq
+        data structure, whose nth entry (corresponding to the nth experimental
+        trial) has fields
+            * trialId: unique trial identifier
+            * T: (1 x 1) number of timesteps
+            * y: (yDim x T) neural data
+
+    Raises
+    ------
+    ValueError
+        if `bin_size` is not a pq.Quantity.
+
+    """
+    if not isinstance(bin_size, pq.Quantity):
+        raise ValueError("'bin_size' must be of type pq.Quantity")
+
+    seq = []
+    for dat in data:
+        trial_id = dat[0]
+        sts = dat[1]
+        binned_sts = BinnedSpikeTrain(sts, binsize=bin_size)
+        if use_sqrt:
+            binned = np.sqrt(binned_sts.to_array())
+        else:
+            binned = binned_sts.to_array()
+        seq.append(
+            (trial_id, binned_sts.num_bins, binned))
+    seq = np.array(seq,
+                   dtype=[('trialId', np.int), ('T', np.int),
+                          ('y', 'O')])
+
+    # Remove trials that are shorter than one bin width
+    if len(seq) > 0:
+        trials_to_keep = seq['T'] > 0
+        seq = seq[trials_to_keep]
+
+    return seq
+
+
+def cut_trials(seq_in, seg_length=20):
+    """
+    Extracts trial segments that are all of the same length.  Uses
+    overlapping segments if trial length is not integer multiple
+    of segment length.  Ignores trials with length shorter than
+    one segment length.
+
+    Parameters
+    ----------
+
+    seq_in
+        data structure, whose nth entry (corresponding to
+        the nth experimental trial) has fields
+            * trialId: unique trial identifier
+            * T: (1 x 1) number of timesteps in trial
+            * y: (yDim x T) neural data
+
+    seg_length : int
+        length of segments to extract, in number of timesteps. If infinite,
+        entire trials are extracted, i.e., no segmenting.
+        Default is 20
+
+
+    Returns
+    -------
+
+    seqOut
+        data structure, whose nth entry (corresponding to
+        the nth experimental trial) has fields
+            * trialId: identifier of trial from which segment was taken
+            * segId: segment identifier within trial
+            * T: (1 x 1) number of timesteps in segment
+            * y: (yDim x T) neural data
+
+    Raises
+    ------
+    ValueError
+        If `seq_length == 0`.
+
+    """
+    if seg_length == 0:
+        raise ValueError("At least 1 extracted trial must be returned")
+    if np.isinf(seg_length):
+        seqOut = seq_in
+        return seqOut
+
+    dtype_seqOut = [('trialId', np.int), ('segId', np.int), ('T', np.int),
+                    ('y', np.object)]
+    seqOut_buff = []
+    for n, seqIn_n in enumerate(seq_in):
+        T = seqIn_n['T']
+
+        # Skip trials that are shorter than segLength
+        if T < seg_length:
+            warnings.warn('trialId {0:4d} shorter than one segLength...'
+                          'skipping'.format(seqIn_n['trialId']))
+            continue
+
+        numSeg = np.int(np.ceil(float(T) / seg_length))
+
+        # Randomize the sizes of overlaps
+        if numSeg == 1:
+            cumOL = np.array([0, ])
+        else:
+            totalOL = (seg_length * numSeg) - T
+            probs = np.ones(numSeg - 1, np.float) / (numSeg - 1)
+            randOL = np.random.multinomial(totalOL, probs)
+            cumOL = np.hstack([0, np.cumsum(randOL)])
+
+        seg = np.empty(numSeg, dtype_seqOut)
+        seg['trialId'] = seqIn_n['trialId']
+        seg['T'] = seg_length
+
+        for s, seg_s in enumerate(seg):
+            tStart = seg_length * s - cumOL[s]
+
+            seg_s['segId'] = s
+            seg_s['y'] = seqIn_n['y'][:, tStart:tStart + seg_length]
+
+        seqOut_buff.append(seg)
+
+    if len(seqOut_buff) > 0:
+        seqOut = np.hstack(seqOut_buff)
+    else:
+        seqOut = np.empty(0, dtype_seqOut)
+
+    return seqOut
+
+
+def rdiv(a, b):
+    """
+    Returns the solution to x b = a. Equivalent to MATLAB right matrix
+    division: a / b
+    """
+    return np.linalg.solve(b.T, a.T).T
+
+
+def logdet(A):
+    """
+    log(det(A)) where A is positive-definite.
+    This is faster and more stable than using log(det(A)).
+
+    Written by Tom Minka
+    (c) Microsoft Corporation. All rights reserved.
+    """
+    U = np.linalg.cholesky(A)
+    return 2 * (np.log(np.diag(U))).sum()
+
+
+def make_k_big(params, n_timesteps):
+    """
+    Constructs full GP covariance matrix across all state dimensions and
+    timesteps.
+
+    Parameters
+    ----------
+
+    params
+        GPFA model parameters
+    n_timesteps : int
+        number of timesteps
+
+    Returns
+    -------
+
+    K_big
+        GP covariance matrix with dimensions (xDim * T) x (xDim * T).
+                   The (t1, t2) block is diagonal, has dimensions xDim x xDim,
+                   and represents the covariance between the state vectors at
+                   timesteps t1 and t2.  K_big is sparse and striped.
+    K_big_inv
+        Inverse of K_big
+    logdet_K_big
+        Log determinant of K_big
+
+    Raises
+    ------
+    ValueError
+        If `params['covType'] != 'rbf'`.
+
+    """
+    if params['covType'] != 'rbf':
+        raise ValueError("Only 'rbf' GP covariance type is supported.")
+
+    xDim = params['C'].shape[1]
+
+    K_big = np.zeros((xDim * n_timesteps, xDim * n_timesteps))
+    K_big_inv = np.zeros((xDim * n_timesteps, xDim * n_timesteps))
+    Tdif = np.tile(np.arange(0, n_timesteps), (n_timesteps, 1)).T \
+        - np.tile(np.arange(0, n_timesteps), (n_timesteps, 1))
+    logdet_K_big = 0
+
+    for i in range(xDim):
+        K = (1 - params['eps'][i]) * np.exp(-params['gamma'][i] / 2 *
+                                            Tdif ** 2) \
+            + params['eps'][i] * np.eye(n_timesteps)
+        K_big[i::xDim, i::xDim] = K
+        # the original MATLAB program uses here a special algorithm, provided
+        # in C and MEX, for inversion of Toeplitz matrix:
+        # [K_big_inv(idx+i, idx+i), logdet_K] = invToeplitz(K);
+        # TODO: use an inversion method optimized for Toeplitz matrix
+        # Below is an attempt to use such a method, not leading to a speed-up.
+        # # K_big_inv[i::xDim, i::xDim] = sp.linalg.solve_toeplitz((K[:, 0],
+        # K[0, :]), np.eye(T))
+        K_big_inv[i::xDim, i::xDim] = np.linalg.inv(K)
+        logdet_K = logdet(K)
+
+        logdet_K_big = logdet_K_big + logdet_K
+
+    return K_big, K_big_inv, logdet_K_big
+
+
+def inv_persymm(M, blk_size):
+    """
+    Inverts a matrix that is block persymmetric.  This function is
+    faster than calling inv(M) directly because it only computes the
+    top half of inv(M).  The bottom half of inv(M) is made up of
+    elements from the top half of inv(M).
+
+    WARNING: If the input matrix M is not block persymmetric, no
+    error message will be produced and the output of this function will
+    not be meaningful.
+
+    Parameters
+    ----------
+
+    M: numpy.ndarray
+        The block persymmetric matrix to be inverted
+        ((blkSize*T) x (blkSize*T)).
+        Each block is blkSize x blkSize, arranged in a T x T grid.
+    blk_size: int
+        Edge length of one block
+
+    Returns
+    -------
+    invM
+        Inverse of M ((blkSize*T) x (blkSize*T))
+    logdet_M
+        Log determinant of M
+    """
+    T = int(M.shape[0] / blk_size)
+    Thalf = np.int(np.ceil(T / 2.0))
+    mkr = blk_size * Thalf
+
+    invA11 = np.linalg.inv(M[:mkr, :mkr])
+    invA11 = (invA11 + invA11.T) / 2
+
+    # Multiplication of a sparse matrix by a dense matrix is not supported by
+    # SciPy. Making A12 a sparse matrix here  an error later.
+    off_diag_sparse = False
+    if off_diag_sparse:
+        A12 = sp.sparse.csr_matrix(M[:mkr, mkr:])
+    else:
+        A12 = M[:mkr, mkr:]
+
+    term = invA11.dot(A12)
+    F22 = M[mkr:, mkr:] - A12.T.dot(term)
+
+    res12 = rdiv(-term, F22)
+    res11 = invA11 - res12.dot(term.T)
+    res11 = (res11 + res11.T) / 2
+
+    # Fill in bottom half of invM by picking elements from res11 and res12
+    invM = fill_persymm(np.hstack([res11, res12]), blk_size, T)
+
+    logdet_M = -logdet(invA11) + logdet(F22)
+
+    return invM, logdet_M
+
+
+def fill_persymm(p_in, blk_size, n_blocks, blk_size_vert=None):
+    """
+     Fills in the bottom half of a block persymmetric matrix, given the
+     top half.
+
+     Parameters
+     ----------
+
+     p_in
+        Top half of block persymmetric matrix (xDim*Thalf) x (xDim*T),
+        where Thalf = ceil(T/2)
+     blk_size : int
+        Edge length of one block
+     n_blocks : int
+        Number of blocks making up a row of Pin
+     blk_size_vert : int, optional
+        Vertical block edge length if blocks are not square.
+        `blk_size` is assumed to be the horizontal block edge length.
+
+     Returns
+     -------
+
+     Pout
+        Full block persymmetric matrix (xDim*T) x (xDim*T)
+    """
+    if blk_size_vert is None:
+        blk_size_vert = blk_size
+
+    Nh = blk_size * n_blocks
+    Nv = blk_size_vert * n_blocks
+    Thalf = np.int(np.floor(n_blocks / 2.0))
+    THalf = np.int(np.ceil(n_blocks / 2.0))
+
+    Pout = np.empty((blk_size_vert * n_blocks, blk_size * n_blocks))
+    Pout[:blk_size_vert * THalf, :] = p_in
+    for i in range(Thalf):
+        for j in range(n_blocks):
+            Pout[Nv - (i + 1) * blk_size_vert:Nv - i * blk_size_vert,
+                 Nh - (j + 1) * blk_size:Nh - j * blk_size] \
+                = p_in[i * blk_size_vert:(i + 1) *
+                       blk_size_vert,
+                       j * blk_size:(j + 1) * blk_size]
+
+    return Pout
+
+
+def make_precomp(seq, xDim):
+    """
+    Make the precomputation matrices specified by the GPFA algorithm.
+
+    Usage: [precomp] = makePautoSum( seq , xDim )
+
+    Parameters
+    ----------
+
+    seq
+        The sequence struct of inferred latents, etc.
+    xDim : int
+       The dimension of the latent space.
+
+    Returns
+    -------
+
+    precomp
+        The precomp struct will be updated with the posterior covaraince and
+        the other requirements.
+
+    Notes
+    -----
+    All inputs are named sensibly to those in `learnGPparams`.
+    This code probably should not be called from anywhere but there.
+
+    We bother with this method because we
+    need this particular matrix sum to be
+    as fast as possible.  Thus, no error checking
+    is done here as that would add needless computation.
+    Instead, the onus is on the caller (which should be
+    learnGPparams()) to make sure this is called correctly.
+
+    Finally, see the notes in the GPFA README.
+    """
+
+    Tall = seq['T']
+    Tmax = (Tall).max()
+    Tdif = np.tile(np.arange(0, Tmax), (Tmax, 1)).T \
+        - np.tile(np.arange(0, Tmax), (Tmax, 1))
+
+    # assign some helpful precomp items
+    # this is computationally cheap, so we keep a few loops in MATLAB
+    # for ease of readability.
+    precomp = np.empty(xDim, dtype=[(
+        'absDif', np.object), ('difSq', np.object), ('Tall', np.object),
+        ('Tu', np.object)])
+    for i in range(xDim):
+        precomp[i]['absDif'] = np.abs(Tdif)
+        precomp[i]['difSq'] = Tdif ** 2
+        precomp[i]['Tall'] = Tall
+    # find unique numbers of trial lengths
+    Tu = np.unique(Tall)
+    # Loop once for each state dimension (each GP)
+    for i in range(xDim):
+        precomp_Tu = np.empty(len(Tu), dtype=[(
+            'nList', np.object), ('T', np.int), ('numTrials', np.int),
+            ('PautoSUM', np.object)])
+        for j in range(len(Tu)):
+            T = Tu[j]
+            precomp_Tu[j]['nList'] = np.where(Tall == T)[0]
+            precomp_Tu[j]['T'] = T
+            precomp_Tu[j]['numTrials'] = len(precomp_Tu[j]['nList'])
+            precomp_Tu[j]['PautoSUM'] = np.zeros((T, T))
+            precomp[i]['Tu'] = precomp_Tu
+
+    # at this point the basic precomp is built.  The previous steps
+    # should be computationally cheap.  We now try to embed the
+    # expensive computation in a MEX call, defaulting to MATLAB if
+    # this fails.  The expensive computation is filling out PautoSUM,
+    # which we initialized previously as zeros.
+
+    ############################################################
+    # Fill out PautoSum
+    ############################################################
+    # Loop once for each state dimension (each GP)
+    for i in range(xDim):
+        # Loop once for each trial length (each of Tu)
+        for j in range(len(Tu)):
+            # Loop once for each trial (each of nList)
+            for n in precomp[i]['Tu'][j]['nList']:
+                precomp[i]['Tu'][j]['PautoSUM'] += seq[n]['VsmGP'][:, :, i] \
+                    + np.outer(
+                    seq[n]['xsm'][i, :], seq[n]['xsm'][i, :])
+    return precomp
+
+
+def grad_betgam(p, pre_comp, const):
+    """
+    Gradient computation for GP timescale optimization.
+    This function is called by minimize.m.
+
+    Parameters
+    ----------
+
+    p
+        variable with respect to which optimization is performed,
+        where p = log(1 / timescale ^2)
+    pre_comp
+        structure containing precomputations
+    const
+        contains hyperparameters
+
+    Returns
+    -------
+
+    f           - value of objective function E[log P({x},{y})] at p
+    df          - gradient at p
+    """
+    Tall = pre_comp['Tall']
+    Tmax = Tall.max()
+
+    # temp is Tmax x Tmax
+    temp = (1 - const['eps']) * np.exp(-np.exp(p) / 2 * pre_comp['difSq'])
+    Kmax = temp + const['eps'] * np.eye(Tmax)
+    dKdgamma_max = -0.5 * temp * pre_comp['difSq']
+
+    dEdgamma = 0
+    f = 0
+    for j in range(len(pre_comp['Tu'])):
+        T = pre_comp['Tu'][j]['T']
+        Thalf = np.int(np.ceil(T / 2.0))
+
+        Kinv = np.linalg.inv(Kmax[:T, :T])
+        logdet_K = logdet(Kmax[:T, :T])
+
+        KinvM = Kinv[:Thalf, :].dot(dKdgamma_max[:T, :T])  # Thalf x T
+        KinvMKinv = (KinvM.dot(Kinv)).T  # Thalf x T
+
+        dg_KinvM = np.diag(KinvM)
+        tr_KinvM = 2 * dg_KinvM.sum() - np.fmod(T, 2) * dg_KinvM[-1]
+
+        mkr = np.int(np.ceil(0.5 * T ** 2))
+        numTrials = pre_comp['Tu'][j]['numTrials']
+        PautoSUM = pre_comp['Tu'][j]['PautoSUM']
+
+        pauto_kinv_dot = PautoSUM.ravel('F')[:mkr].dot(
+            KinvMKinv.ravel('F')[:mkr])
+        pauto_kinv_dot_rest = PautoSUM.ravel('F')[-1:mkr - 1:- 1].dot(
+            KinvMKinv.ravel('F')[:(T ** 2 - mkr)])
+        dEdgamma = dEdgamma - 0.5 * numTrials * tr_KinvM \
+            + 0.5 * pauto_kinv_dot \
+            + 0.5 * pauto_kinv_dot_rest
+
+        f = f - 0.5 * numTrials * logdet_K \
+            - 0.5 * (PautoSUM * Kinv).sum()
+
+    f = -f
+    # exp(p) is needed because we're computing gradients with
+    # respect to log(gamma), rather than gamma
+    df = -dEdgamma * np.exp(p)
+
+    return f, df
+
+
+def orthogonalize(x, l):
+    """
+
+    Orthonormalize the columns of the loading matrix and
+    apply the corresponding linear transform to the latent variables.
+
+     yDim: data dimensionality
+     xDim: latent dimensionality
+
+    Parameters
+    ----------
+
+    x : np.ndarray
+        Latent variables (xDim x T)
+    l : np.ndarray
+        Loading matrix (yDim x xDim)
+
+    Returns
+    -------
+
+    Xorth
+        Orthonormalized latent variables (xDim x T)
+    Lorth
+        Orthonormalized loading matrix (yDim x xDim)
+    TT
+       Linear transform applied to latent variables (xDim x xDim)
+    """
+    xDim = l.shape[1]
+    if xDim == 1:
+        TT = np.sqrt(np.dot(l.T, l))
+        Lorth = rdiv(l, TT)
+        Xorth = np.dot(TT, x)
+    else:
+        UU, DD, VV = sp.linalg.svd(l, full_matrices=False)
+        # TT is transform matrix
+        TT = np.dot(np.diag(DD), VV)
+
+        Lorth = UU
+        Xorth = np.dot(TT, x)
+    return Xorth, Lorth, TT
+
+
+def segment_by_trial(seq, x, fn):
+    """
+    Segment and store data by trial.
+
+    Parameters
+    ----------
+
+    seq
+        Data structure that has field T, the number of timesteps
+    x : np.ndarray
+        Data to be segmented (any dimensionality x total number of timesteps)
+    fn
+        New field name of seq where segments of X are stored
+
+    Returns
+    -------
+
+    seq_new
+        Data structure with new field `fn`
+
+    Raises
+    ------
+    ValueError
+        If `seq['T']) != x.shape[1]`.
+
+    """
+    if np.sum(seq['T']) != x.shape[1]:
+        raise (ValueError, 'size of X incorrect.')
+
+    dtype_new = [(i, seq[i].dtype) for i in seq.dtype.names]
+    dtype_new.append((fn, np.object))
+    seq_new = np.empty(len(seq), dtype=dtype_new)
+    for dtype_name in seq.dtype.names:
+        seq_new[dtype_name] = seq[dtype_name]
+
+    ctr = 0
+    for n, T in enumerate(seq['T']):
+        seq_new[n][fn] = x[:, ctr:ctr + T]
+        ctr += T
+
+    return seq_new
diff --git a/elephant/spike_train_generation.py b/elephant/spike_train_generation.py
index d125bb8c6..7a80aecfd 100644
--- a/elephant/spike_train_generation.py
+++ b/elephant/spike_train_generation.py
@@ -331,7 +331,7 @@ def homogeneous_poisson_process(rate, t_start=0.0 * ms, t_stop=1000.0 * ms,
     """
     if not isinstance(t_start, Quantity) or not isinstance(t_stop, Quantity):
         raise ValueError("t_start and t_stop must be of type pq.Quantity")
-    rate = rate.rescale((1 / t_start).units)
+    rate = rate.rescale(1 / t_start.units)
     mean_interval = 1 / rate.magnitude
     return _homogeneous_process(
         np.random.exponential, (mean_interval,), rate, t_start, t_stop,
diff --git a/elephant/test/test_gpfa.py b/elephant/test/test_gpfa.py
new file mode 100644
index 000000000..195a01383
--- /dev/null
+++ b/elephant/test/test_gpfa.py
@@ -0,0 +1,177 @@
+# -*- coding: utf-8 -*-
+"""
+Unit tests for the GPFA analysis.
+
+:copyright: Copyright 2014-2016 by the Elephant team, see AUTHORS.txt.
+:license: Modified BSD, see LICENSE.txt for details.
+"""
+
+import sys
+import unittest
+
+import neo
+import numpy as np
+import quantities as pq
+from numpy.testing import assert_array_equal, assert_array_almost_equal
+
+from elephant.spike_train_generation import homogeneous_poisson_process
+
+try:
+    import sklearn
+except ImportError:
+    HAVE_SKLEARN = False
+else:
+    HAVE_SKLEARN = True
+    from elephant.gpfa_src import gpfa_util
+    from elephant.gpfa import gpfa
+
+python_version_major = sys.version_info.major
+
+
+@unittest.skipUnless(HAVE_SKLEARN, 'requires sklearn')
+class GPFATestCase(unittest.TestCase):
+    def setUp(self):
+        def gen_gamma_spike_train(k, theta, t_max):
+            x = []
+            for i in range(int(3 * t_max / (k*theta))):
+                x.append(np.random.gamma(k, theta))
+            s = np.cumsum(x)
+            return s[s < t_max]
+
+        def gen_test_data(rates, durs, shapes=(1, 1, 1, 1)):
+            s = gen_gamma_spike_train(shapes[0], 1./rates[0], durs[0])
+            for i in range(1, 4):
+                s_i = gen_gamma_spike_train(shapes[i], 1./rates[i], durs[i])
+                s = np.concatenate([s, s_i + np.sum(durs[:i])])
+            return s
+
+        self.n_iters = 10
+        self.bin_size = 20 * pq.ms
+
+        # generate data1
+        rates_a = (2, 10, 2, 2)
+        rates_b = (2, 2, 10, 2)
+        durs = (2.5, 2.5, 2.5, 2.5)
+        self.data1 = []
+        for tr in range(1):
+            np.random.seed(tr)
+            n1 = neo.SpikeTrain(gen_test_data(rates_a, durs), units=1*pq.s,
+                                t_start=0*pq.s, t_stop=10*pq.s)
+            n2 = neo.SpikeTrain(gen_test_data(rates_a, durs), units=1*pq.s,
+                                t_start=0*pq.s, t_stop=10*pq.s)
+            n3 = neo.SpikeTrain(gen_test_data(rates_b, durs), units=1*pq.s,
+                                t_start=0*pq.s, t_stop=10*pq.s)
+            n4 = neo.SpikeTrain(gen_test_data(rates_b, durs), units=1*pq.s,
+                                t_start=0*pq.s, t_stop=10*pq.s)
+            self.data1.append((tr, [n1, n2, n3, n4]))
+        self.x_dim = 4
+
+        # generate data2
+        np.random.seed(27)
+        self.data2 = []
+        n_trials = 10
+        n_channels = 20
+        for trial in range(n_trials):
+            rates = np.random.randint(low=1, high=100, size=n_channels)
+            spike_times = [homogeneous_poisson_process(rate=rate*pq.Hz)
+                           for rate in rates]
+            self.data2.append((trial, spike_times))
+
+    def test_data1(self):
+        params_est, seqs_train, fit_info = gpfa(
+            self.data1, x_dim=self.x_dim, em_max_iters=self.n_iters)
+        self.assertEqual(fit_info['bin_size'], 20*pq.ms)
+        self.assertEqual(fit_info['min_var_frac'], 0.01)
+        self.assertTrue(np.all(fit_info['has_spikes_bool']))
+        self.assertAlmostEqual(fit_info['log_likelihood'], -27.222600197474762)
+        # Since data1 is inherently 2 dimensional, only the first two
+        # dimensions of xorth should have finite power.
+        for i in [0, 1]:
+            self.assertNotEqual(seqs_train['xorth'][0][i].mean(), 0)
+            self.assertNotEqual(seqs_train['xorth'][0][i].var(), 0)
+        for i in [2, 3]:
+            self.assertEqual(seqs_train['xorth'][0][i].mean(), 0)
+            self.assertEqual(seqs_train['xorth'][0][i].var(), 0)
+
+    def test_invalid_input_data(self):
+        invalid_data = [(0, [0, 1, 2])]
+        invalid_bin_size = 10
+        with self.assertRaises(ValueError):
+            gpfa(data=invalid_data)
+        with self.assertRaises(ValueError):
+            gpfa(data=[])
+        with self.assertRaises(ValueError):
+            gpfa(data=self.data2, bin_size=invalid_bin_size)
+
+    def test_data2(self):
+        params_est, seqs_train, fit_info = gpfa(
+            self.data2, bin_size=self.bin_size, x_dim=8,
+            em_max_iters=self.n_iters)
+        self.assertEqual(fit_info['bin_size'], self.bin_size,
+                         "Input and output bin_size don't match")
+        n_trials = len(self.data2)
+        t_start = self.data2[0][1][0].t_stop
+        t_stop = self.data2[0][1][0].t_start
+        n_bins = int(((t_start - t_stop) / self.bin_size).magnitude)
+        assert_array_equal(seqs_train['T'], [n_bins,] * n_trials)
+        assert_array_equal(seqs_train['trialId'], np.arange(n_trials))
+        for key in ['y', 'xsm', 'Vsm', 'VsmGP', 'xorth']:
+            self.assertEqual(len(seqs_train[key]), n_trials,
+                             msg="Failed ndarray field {0}".format(key))
+        self.assertEqual(len(seqs_train), n_trials)
+
+    def test_get_seq_sqrt(self):
+        data = [self.data2[0]]
+        seqs = gpfa_util.get_seq(data, bin_size=self.bin_size)
+        seqs_not_sqrt = gpfa_util.get_seq(data, bin_size=self.bin_size,
+                                          use_sqrt=False)
+        for common_key in ('trialId', 'T'):
+            self.assertEqual(seqs[common_key], seqs_not_sqrt[common_key])
+        self.assertEqual(seqs['y'].shape, seqs_not_sqrt['y'].shape)
+
+    def test_cut_trials_inf(self):
+        same_data = gpfa_util.cut_trials(self.data2, seg_length=np.Inf)
+        assert same_data is self.data2
+
+    def test_cut_trials_zero_length(self):
+        seqs = gpfa_util.get_seq(self.data2, bin_size=self.bin_size)
+        with self.assertRaises(ValueError):
+            gpfa_util.cut_trials(seqs, seg_length=0)
+
+    def test_cut_trials_same_length(self):
+        data = [self.data2[0]]
+        seqs = gpfa_util.get_seq(data, bin_size=self.bin_size)
+        seg_length = seqs[0]['T']
+        seqs_cut = gpfa_util.cut_trials(seqs, seg_length=seg_length)
+        assert_array_almost_equal(seqs[0]['y'], seqs_cut[0]['y'])
+
+    @unittest.skipUnless(python_version_major == 3, "assertWarns requires 3.2")
+    def test_cut_trials_larger_length(self):
+        data = [self.data2[0]]
+        seqs = gpfa_util.get_seq(data, bin_size=self.bin_size)
+        seg_length = seqs[0]['T'] + 1
+        with self.assertWarns(UserWarning):
+            gpfa_util.cut_trials(seqs, seg_length=seg_length)
+
+    def test_logdet(self):
+        np.random.seed(27)
+        # generate a positive definite matrix
+        matrix = np.random.randn(20, 20)
+        matrix = matrix.dot(matrix.T)
+        logdet_fast = gpfa_util.logdet(matrix)
+        logdet_ground_truth = np.log(np.linalg.det(matrix))
+        assert_array_almost_equal(logdet_fast, logdet_ground_truth)
+
+
+def suite():
+    suite = unittest.makeSuite(GPFATestCase, 'test')
+    return suite
+
+
+def run():
+    runner = unittest.TextTestRunner(verbosity=2)
+    runner.run(suite())
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/requirements-extras.txt b/requirements-extras.txt
index f46e91593..17fec19af 100644
--- a/requirements-extras.txt
+++ b/requirements-extras.txt
@@ -1,3 +1,4 @@
 pandas>=0.14.1
 scikit-learn
 mpi4py>=3.0.1
+tqdm