## Run CNMF source extraction on movies
Step 2 of the Caiman processing pipeline for dendritic two-photon calcium imaging movies. This part uses mmap files as input. These are created during motion correction with the Caiman toolbox (see `01_Preprocess_MC_3D.ipynb`). 

### Imports & Setup
The first cells import the various Python modules required by the notebook. In particular, a number of modules are imported from the Caiman package. In addition, we also setup the environment so that everything works as expected.

In [1]:
# Generic imports
# from __future__ import absolute_import, division, print_function
# from builtins import *

import os, platform, glob, sys, re, copy, getpass
import fnmatch, tempfile, shutil
import json, yaml
import time
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import savemat
import scipy.spatial.distance as distance
from sklearn.decomposition import PCA
from tifffile import imsave
import subprocess, ipyparallel

from IPython.display import clear_output

# Import Bokeh library
import bokeh.plotting as plotting
from bokeh.plotting import Figure, show
from bokeh.layouts import gridplot
from bokeh.models import Range1d, CrosshairTool, HoverTool, Legend
from bokeh.io import output_notebook, export_svgs
from bokeh.models.sources import ColumnDataSource

%matplotlib inline

In [2]:
# This has to be in a separate cell, otherwise it wont work.
from bokeh import resources
output_notebook(resources=resources.INLINE)

In [3]:
# on Linux we have to add the caiman folder to Pythonpath
if platform.system() == 'Linux':
    sys.path.append(os.path.expanduser('~/caiman'))
# environment variables for parallel processing
os.environ['MKL_NUM_THREADS']='1'
os.environ['OPENBLAS_NUM_THREADS']='1'
os.environ['VECLIB_MAXIMUM_THREADS']='1'

In [4]:
# CaImAn imports
import caiman as cm
from caiman.source_extraction.cnmf import cnmf as cnmf
from caiman.source_extraction.cnmf import params as params
from caiman.components_evaluation import estimate_components_quality as estimate_q
from caiman.components_evaluation import estimate_components_quality_auto
from caiman.utils.visualization import plot_contours, nb_view_patches, nb_plot_contour
from caiman.source_extraction.cnmf import utilities as cnmf_utils
import caiman_utils as cm_utils
import utils as utils
import plotting

### Read parameters from config file

In [6]:
config_file = 'config.yml'
with open(config_file) as f:
    config = yaml.load(f, Loader=yaml.FullLoader)
print(config)

{'general': {'ncpus': 4}, 'data': {'connect_storage': 0, 'storage_adress': '//130.60.51.15/Neurophysiology-Storage2', 'storage_user': 'luetcke', 'mountpoint': '/home/luetcke/neurophys-storage', 'copy_to_temp': 0, 'data_folder': '/Users/Henry/Data/temp/Dendrites_Gwen', 'animal_folder': 'M5.2', 'day_folder': 20181211, 'area_folder': 'S1', 'group_id': 'G0', 'mc_output': 'rig'}, 'analysis': {'max_trials': 10, 'n_planes': 4, 'x_crop': 0.5, 'max_group_size': 999, 'remove_bad_frames': 1}, 'mc': {'niter_rig': 5, 'pw_rigid': False, 'splits_rig': 50, 'num_splits_to_process_rig': 'None', 'strides': [24, 24], 'overlaps': [24, 24], 'splits_els': 50, 'num_splits_to_process_els': [28, 'None'], 'upsample_factor_grid': 4, 'max_deviation_rigid': 10, 'border_nan': False}, 'metrics': {'winsize': 100, 'swap_dim': False, 'resize_fact_flow': 0.2, 'iters_flow': 3}}


### Setup cluster for parallel processing

This section starts the IPython cluster (ipcluster).

In [7]:
ncpus = config['general']['ncpus']

In [8]:
%%bash -s "$ncpus"
source /opt/Anaconda3-5.1.0-Linux-x86_64/bin/activate caiman || source activate caiman
ipcluster stop
sleep 5
ipcluster start --daemonize -n $1

bash: line 1: /opt/Anaconda3-5.1.0-Linux-x86_64/bin/activate: No such file or directory
2019-04-09 14:42:39.997 [IPClusterStop] CRITICAL | Could not read pid file, cluster is probably not running.


In [9]:
time.sleep(10)
# connect client
client = ipyparallel.Client()
time.sleep(2)
while len(client) < ncpus:
    sys.stdout.write(".")  # Give some visual feedback of things starting
    sys.stdout.flush()     # (de-buffered)
    time.sleep(0.5)

# create dview object
client.direct_view().execute('__a=1', block=True)
dview = client[:]
n_processes = len(client)
print('\n\nThe cluster appears to be setup. Number of parallel processes: %d' % (n_processes))



The cluster appears to be setup. Number of parallel processes: 4


### Map network drive
If the data is located on a network drive (i.e. Neurophysiology storage), we first need to connect the drive with the relevant user credentials.

In [None]:
connect_storage = config['data']['connect_storage']
if connect_storage:
    storage_user = config['data']['storage_user']
    storage_adress = config['data']['storage_adress']
    mountpoint = config['data']['mountpoint']
    storage_pw = getpass.getpass(prompt="Enter password for the remote storage")

In [None]:
# check if the mountpoint exists, if not create it
if connect_storage:
    if not os.path.isdir(mountpoint):
        os.makedirs(mountpoint)
    # list contents of the directory
    os.listdir(mountpoint)

In [None]:
%%bash -s "$connect_storage" "$storage_user" "$storage_pw" "$storage_adress" "$mountpoint"
if [ "$1" = 1 ]; then
    sudo mount -t cifs -o username=$2,password=$3,uid=$(id -u),gid=$(id -g) $4 $5
else
    echo "Not mounting storage"
fi

In [None]:
# list contents of the directory
if connect_storage:
    print(os.listdir(mountpoint))

In [None]:
# data parameters
data_folder = str(config['data']['data_folder'])
animal_folder = str(config['data']['animal_folder'])
day_folder = str(config['data']['day_folder'])
area_folder = str(config['data']['area_folder'])
data_folder = os.path.join(data_folder, animal_folder, day_folder, area_folder)
copy_to_temp = bool(config['data']['copy_to_temp'])

group_id = config['data']['group_id']
mc_output = config['data']['mc_output']
remove_bad_frames = config['analysis']['remove_bad_frames']

In [None]:
# get metadata
for file in os.listdir(data_folder):
    if fnmatch.fnmatch(file, '%s_%s_Join_%s_*[!badFrames].json' % (day_folder, area_folder, group_id)):
        meta = json.load(open(os.path.join(data_folder,file)))
        break
trial_index = np.array(meta['trial_index'])

In [None]:
if copy_to_temp:
    # create a temp directory for analysis
    temp_dir = tempfile.mkdtemp()
    # create the data folder structure in the temporary directory
    temp_data_folder = os.path.join(temp_dir, animal_folder, day_folder, area_folder)
    os.makedirs(temp_data_folder, exist_ok=True)
    print('Created temporary analysis folder %s' % (temp_data_folder))
else:
    temp_data_folder = data_folder

In [None]:
# select mmap files
all_files = os.listdir(data_folder)
mmap_files = sorted([x for x in all_files if x.startswith('%s_%s' % (day_folder, area_folder)) 
           and x.endswith('.mmap') and mc_output in x and group_id in x and not 'remFrames' in x])
n_planes = len(mmap_files)

print('Found %d mmap files. Check allocation to planes!' % (n_planes))
for i_plane in range(n_planes):
    print('Plane %d: %s' % (i_plane, mmap_files[i_plane]))
mmap_files = [os.path.join(data_folder, x) for x in mmap_files]
frame_rate = meta['frame_rate'] / n_planes

In [None]:
# copy relevant files to temporary analysis folder
if copy_to_temp:
    t_start = time.time()
    
    bad_frame_files = [x.replace('.mmap', 'badFrames.json') for x in mmap_files]
    files_to_copy = mmap_files + bad_frame_files
    out = dview.map_sync(utils.copyFiles, files_to_copy, [temp_data_folder]*len(files_to_copy))
    mmap_files_temp = [x.replace(data_folder, temp_data_folder) for x in mmap_files]

    t_elapsed = time.time() - t_start
    print('Copied %d files to %s in %1.2f s' % (len(files_to_copy), temp_data_folder, t_elapsed))
else:
    mmap_files_temp = mmap_files

### Load data and remove bad frames

In [None]:
t_start = time.time()

bad_frames = np.array([], dtype='int64')
fname_list = []
images_list = []

# first, create list of bad frame indices (for all planes combined)
for fname in mmap_files:
    bad_frames = np.concatenate((bad_frames, cm_utils.getBadFrames(fname)))
bad_frames = np.unique(bad_frames)

# remove the bad frames from all files
for fname in mmap_files_temp:
    Yr, dims = cm_utils.loadData(fname)
    images, Y, fname_rem, bad_frames_by_trial, trial_idx = cm_utils.removeBadFrames(fname, 
                                                                                      trial_index, 
                                                                                      Yr, dims, bad_frames, 
                                                                                      temp_data_folder)
    fname_list.append(fname_rem)
    images_list.append(images)
trial_index = trial_idx

t_elapsed = time.time() - t_start
print('Loading data / removing frames in %1.2f s' % (t_elapsed))

### Display frame average for each plane

In [None]:
plt.figure(figsize=(30,30))
for ix_plane in range(n_planes):
    avg_img = np.mean(images_list[ix_plane],axis=0)
    plt.subplot(n_planes, 1, ix_plane+1)
    plt.imshow(avg_img, cmap='gray'), plt.title('Frame average - Plane %d' % (ix_plane), fontsize=32);
plt.subplots_adjust(wspace=0, hspace=0)

### Export data for manual source extraction
The following are exported to the folder where the original data is stored:
- 1 TIFF file per plane of motion corrected images with bad frames removed
- 1 MAT file per plane that contains:
    - motion corrected images with bad frames removed (images)
    - trial index for each frame (trial_index)
    - list of trial names (trial_names)
    - number of frames per trial (trial_frames)
    - frame indices of bad frames (bad_frames)

In [None]:
files_to_copy = []

bad_frames_by_trial_copy = dict()
for key in bad_frames_by_trial.keys():
    bad_frames_by_trial_copy['trial_%s' % (key)] = bad_frames_by_trial[key]

for ix_plane, images in enumerate(images_list):
    # export to TIFF
    tiff_name = fname_list[ix_plane].replace('.mmap', '.tif')
    files_to_copy.append(tiff_name)
    imsave(tiff_name, images)
    print('\nExported TIFF file for plane %d\n%s' % (ix_plane, tiff_name))
    
    # export to Matlab
    # create dictionary for saving as mat file (field names will be variable names in Matlab)
    mdict = {
        'images': images,
        'trial_index': trial_index,
        'trial_names': meta['source_file'],
        'trial_frames': meta['source_frames'],
        'bad_frames': bad_frames,
        'bad_frames_by_trial': bad_frames_by_trial_copy,
    }
    matfile_name = fname_list[ix_plane].replace('.mmap', '.mat')
    files_to_copy.append(matfile_name)
    savemat(matfile_name, mdict=mdict, long_field_names=True)
    print('\nExported MAT file for plane %d\n%s' % (ix_plane, matfile_name))

if copy_to_temp:
    t_start = time.time()
    out = dview.map_sync(utils.copyFiles, files_to_copy, [data_folder]*len(files_to_copy))
    print('Copied files to %s in %1.2f s' % (data_folder, time.time()-t_start))

### Specify if plane contains dendritic signals
CaImAn uses different initialization methods depending on whether the signals are dendritic or somatic. Therefore, we need to specify the types of signal expected in each plane.

In [None]:
is_dendritic = [True, True, True, True]

### Parameters for source extraction
Next, we define the important parameters for calcium source extraction. These parameters will have to be iteratively refined for the respective datasets.


In [None]:
# dataset dependent parameters
decay_time = 0.4                            # length of a typical transient in seconds

# parameters for source extraction and deconvolution
p = 1                         # order of the autoregressive system
gnb = 2                       # number of global background components
merge_thresh = 0.8            # merging threshold, max correlation allowed
rf = [7,14]                   # half-size of the patches in pixels. e.g., if rf=25, patches are 50x50
rf = None
stride_cnmf = 3               # amount of overlap between the patches in pixels
K = 20                        # max. number of components per patch
gSig = [7,35]                 # expected half size of neurons in pixels

method_init = 'sparse_nmf'    # initialization method (if analyzing dendritic data use 'sparse_nmf', else 'greedy_roi')
#alpha_snmf = 10e2            # sparsity penalty for dendritic data analysis through sparse NMF
alpha_snmf = 100
normalize_init = True         # default is True
sigma_smooth_snmf = (0.5, 0.5, 0.5) # defaults to (0.5, 0.5, 0.5)
max_iter_snmf = 500           # defaults to 500

ssub = 1                      # spatial subsampling during initialization
tsub = 1                      # temporal subsampling during intialization


# Parameters for component evaluation
quality_params = {
    'min_SNR': 3,               # signal to noise ratio for accepting a component
    'rval_thr': 0.99,           # space correlation threshold for accepting a component
    'use_cnn': False,           # use CNN classifier
    'cnn_thr': 0.95,            # threshold for CNN based classifier
    'cnn_lowest': 0.1           # neurons with cnn probability lower than this value are rejected
}

In [None]:
# create Parameters object
# unspecified parameters get default values
opts_dict = {'fnames': fname_list[0],
             'fr': frame_rate,
            'decay_time': decay_time,
            'p': p,
            'nb': gnb,
            'rf': rf,
            'K': K,
             'gSig': gSig,
            'stride': stride_cnmf,
            'method_init': method_init,
            'alpha_snmf': alpha_snmf,
            'normalize_init': normalize_init,
            'sigma_smooth_snmf': sigma_smooth_snmf,
            'max_iter_snmf': max_iter_snmf,
            'rolling_sum': True,
            'only_init': True,
            'ssub': ssub,
            'tsub': tsub}

opts = params.CNMFParams(params_dict=opts_dict)

opts.set('quality', quality_params)

To get a dict with all parameters, use `opts.to_dict()`

#### Run CNMF on patches

In [None]:
# First extract spatial and temporal components on patches and combine them
# for this step deconvolution is turned off (p=0)
# Then re-run seeded CNMF on accepted patches to refine and perform deconvolution
opts.set('temporal', {'p': 0})
cnm_list = []

t_start = time.time()
for ix_plane in range(n_planes):
    opts_plane = copy.deepcopy(opts)
    opts_plane.set('data', {'fnames': [fname_list[ix_plane]]})
    if is_dendritic[ix_plane]:
        opts_plane.set('init', {'method_init': 'sparse_nmf'})
    else:
        opts_plane.set('init', {'method_init': 'greedy_roi'})
    cnm = cnmf.CNMF(n_processes, params=opts_plane, dview=dview)
    cnm.fit(images_list[ix_plane])
     
    cnm.params.set('temporal', {'p': p})
    cnm2 = cnm.refit(images_list[ix_plane], dview=dview)
    
    cnm_list.append(cnm2)
    
    clear_output()
    
t_elapsed = time.time() - t_start
print('\nFinished Source Extract in %1.2f s' % (t_elapsed))

### Evaluate components

In [None]:
for ix_plane, cnm in enumerate(cnm_list):
    opts = copy.deepcopy(cnm.params)
    cnm.estimates.evaluate_components(images_list[ix_plane], opts, dview=dview)
    cnm_list[ix_plane] = cnm
    print('\nPlane %d' % (ix_plane))
    print('Found %d good / %d bad components\n' % (len(cnm.estimates.idx_components), 
                                                 len(cnm.estimates.idx_components_bad)))

### Save CNMF results
After the time consuming steps of the source extraction are completed, it makes sense to store the results. Variables are stored in the Numpy-specific `.npz` format.

In [None]:
npz_basename = '%s_%s_Join_%s_results_CNMF.npz' % (day_folder, area_folder, group_id)
npz_name = os.path.join(temp_data_folder, npz_basename)

nb_params = {
    'data_folder': data_folder,
    'day_folder': day_folder,
    'area_folder': area_folder,
    'group_id': group_id,
    'meta': meta,
    'trial_index': trial_index,
    'bad_frames': bad_frames,
    'bad_frames_by_trial': bad_frames_by_trial
}

for ix_plane, cnm in enumerate(cnm_list):
    cnm.dview = None
    cnm_list[ix_plane] = cnm
np.savez(npz_name, cnm_list=cnm_list, images_list=images_list, nb_params=nb_params)
print('Saved CNMF results in %s' % (npz_name))

if copy_to_temp:
    out = utils.copyFiles(npz_name, data_folder)
    print('Copied %s to %s' % (npz_name, data_folder))
    print('\n\nFor further analysis, load CNMF results from file:\n%s', os.path.join(data_folder, npz_basename))
else:
    print('\n\nFor further analysis, load CNMF results from file:\n%s', npz_name)

### Delete temporary folder

In [None]:
if copy_to_temp:
    # delete the temp. dir
    shutil.rmtree(temp_dir)

### Stop the cluster

In [10]:
%%bash
source /opt/Anaconda3-5.1.0-Linux-x86_64/bin/activate caiman || source activate caiman
ipcluster stop
sleep 1

bash: line 1: /opt/Anaconda3-5.1.0-Linux-x86_64/bin/activate: No such file or directory
2019-04-09 14:43:42.787 [IPClusterStop] Stopping cluster [pid=68236] with [signal=<Signals.SIGINT: 2>]


### Load CNMF results
Load results from a previous CNMF run. If the analysis is continued right away, the load step can be skipped.

If you reload the notebook, you have to import the required modules (i.e. run cells 1 - 4).

In [11]:
load_data = True

if load_data:
    npz_name = '/Users/Henry/Data/temp/Dendrites_Gwen/M5.2/20181211/S1/20181211_S1_Join_G0_results_CNMF.npz'
    npz_content = np.load(npz_name)
    nb_params = npz_content['nb_params'][()]
    cnm_list = npz_content['cnm_list'][()]
    images_list = npz_content['images_list'][()]

    print('Loaded file %s (%d planes)' % (os.path.basename(npz_name), len(cnm_list)))
else:
    nb_params = {
        'data_folder': data_folder,
        'day_folder': day_folder,
        'area_folder': area_folder,
        'group_id': group_id,
        'meta': meta,
        'trial_index': trial_index,
        'bad_frames': bad_frames,
        'bad_frames_by_trial': bad_frames_by_trial
}

Loaded file 20181211_S1_Join_G0_results_CNMF.npz (4 planes)


Pre-compute local correlations as they will be used a lot.

In [12]:
Cn = []
for img in images_list:
    cc = cm.local_correlations(img.transpose(1,2,0))
    cc[np.isnan(cc)] = 0
    Cn.append(cc)

Show average images of the different planes.

In [28]:
grid_array = []
for ix_plane, image in enumerate(images_list):
    grid_array.append([])
    image_neurons=np.mean(image,axis=0)
    grid_array[ix_plane].append(Figure(plot_width=image_neurons.shape[1]*2, plot_height=image_neurons.shape[0]*2, toolbar_location="below", 
                                  title="Plane %d" % (ix_plane), x_range = [0, image_neurons.shape[1]], y_range = [0, image_neurons.shape[0]]))
    grid_array[ix_plane][0].image(image=[np.flipud(image_neurons)], x=0, y=0, dw=image_neurons.shape[1], dh=image_neurons.shape[0], palette='Greys256')

grid = gridplot(grid_array, sizing_mode='fixed', toolbar_location='left')
show(grid)

View traces of good or bad components.

In [109]:
from importlib import reload
reload(plotting)

<module 'plotting' from '/Users/Henry/Data/Projects/CaImAn_DendriteAnalysis/plotting.py'>

In [110]:
good_or_bad = 'good'
plane_ix = 0

cnm = cnm_list[plane_ix]
images = images_list[plane_ix]
if good_or_bad == 'good':
    idx = cnm.estimates.idx_components
elif good_or_bad == 'bad':
    idx = cnm.estimates.idx_components_bad

Yr = cnm.estimates.YrA[idx] + cnm.estimates.C[idx]

component_images = plotting.nb_view_patches(Yr, idx, cnm.estimates.A, cnm.estimates.C, cnm.estimates.b, cnm.estimates.f, 
                                             cnm.dims[0], cnm.dims[1], YrA=cnm.estimates.YrA, 
                                             image_neurons=np.mean(images,axis=0), denoised_color='red', 
                                             title="Plane %d - %s components" % (plane_ix, good_or_bad))

### Accessing parameters in the CNM object
This cell shows how to access relevant parameters stored in the `cnm` object.

```python
A, C, b, f, YrA, S, sn = cnm.estimates.A, cnm.estimates.C, cnm.estimates.b, cnm.estimates.f, cnm.estimates.YrA, cnm.estimates.S, cnm.estimates.sn
```

<hr>

**Explanation of parameters:**
- A   ... n_pixel x n_components sparse matrix (component locations)
- C   ... n_component x t np.array (fitted signal)
- b   ... ? np.array
- f   ... ? np.array (b / f related to global background components)
- YrA ... n_component x t np.array (residual)
- S   ... deconvolved signal (spike rate(ish))
- sn  ... n_pixel np.array (SNR?)

<hr>

**Convert sparse component matrix to dense matrix:**
```python
A_dense = A.todense()
```

<hr>

**Indices of good and bad components:**
``` python
idx_comps = cnm.estimates.idx_components
idx_comps_bad = cnm.estimates.idx_components_bad
```

<hr>

### Component post-processing
Create a plot with good components on background image and as component map. This plot is saved as a PNG file in the data folder.

In [None]:
def plot_component_contours(cnm, images, idx_comps, fig_name):
    avg_img = np.mean(images,axis=0)
    
    A = cnm.estimates.A
    
    try:
        A = A.todense()
    except:
        pass

    counter = 1
    plt.figure(figsize=(30,30));
    for i_comp in range(len(idx_comps)):
        plt.subplot(len(idx_comps),2,counter)
        if counter == 1:
            plt.title('CNMF Components', fontsize=24);

        counter += 1
        dummy = cm.utils.visualization.plot_contours(A[:,idx_comps[i_comp]], avg_img, cmap='gray', 
                                                     colors='r', display_numbers=False)
        component_img = np.array(np.reshape(A[:,idx_comps[i_comp]], avg_img.shape, order='F'))
        plt.subplot(len(idx_comps),2,counter)
        counter += 1
        plt.imshow(component_img), plt.title('Component %1.0f' % (i_comp), fontsize=24)

    plt.tight_layout()
    plt.savefig(fig_name)
    plt.close()
    
    print('Saved file %s' % (fig_name))

In [None]:
for ix_plane, cnm in enumerate(cnm_list):
    idx_comps = cnm.estimates.idx_components
    fig_name = os.path.join(nb_params['data_folder'], '%s_%s_Join_%s_P%d_Components_good.png' % 
                            (nb_params['day_folder'], nb_params['area_folder'], nb_params['group_id'], ix_plane))
    plot_component_contours(cnm, images_list[ix_plane], cnm.estimates.idx_components, fig_name)

#### Remove spurious components
Try to remove spurious 'good' components consisting of many small spots spread over a large part of the field-of-view. This is done in two ways:
1. Calculate component sparsity (i.e. the fraction of pixels with 0)
2. The cosine distance between non-zero pixels.

For good components, the sparsity should be high (i.e. > 0.99) or the distance between component pixels should be small (i.e. < 0.01)

To make the distinction clearer, it helps to threshold the component map before, i.e. at 10% of the max. value.

In [None]:
component_threshold = 0.1 # threshold at thresh*max before calculating sparsity / cosine distance
sparsity_threshold = 0.99 # components with less sparsity will be excluded
distance_threshold = 0.01 # components with larger average cosine distance will be excluded

In [None]:
cnm_list_processed = copy.deepcopy(cnm_list)

for ix_plane, cnm in enumerate(cnm_list):
    A = cnm.estimates.A
    try:
        A = A.todense()
    except:
        pass
    idx_components = cnm.estimates.idx_components
    avg_img = np.mean(images_list[ix_plane],axis=0)
    
    sparsity = []
    dist = []
    for ix in idx_components:
        component_img = np.array(np.reshape(A[:,ix], avg_img.shape, order='F'))
        component_img[component_img < component_threshold*np.max(component_img)] = 0
        zeros = np.where(component_img==0)
        sparsity.append((zeros[0].shape / np.prod(avg_img.shape))[0])
        dist.append(distance.pdist(np.nonzero(component_img), metric='cosine')[0])
        
    idx_components_proc = [x for (ix,x) in enumerate(idx_components) if sparsity[ix]>sparsity_threshold or dist[ix]<distance_threshold]
    
    fig_name = os.path.join(nb_params['data_folder'], '%s_%s_Join_%s_P%d_Components_processed.png' % 
                            (nb_params['day_folder'], nb_params['area_folder'], nb_params['group_id'], ix_plane))
    
    plot_component_contours(cnm, images_list[ix_plane], idx_components_proc, fig_name)
    
    cnm_list_processed[ix_plane].estimates.idx_components = idx_components_proc

#### Merge correlated components

First, get an idea how much different components are correlated. For this, we calculate the cross-correlation between the different spatial components and plot the results for different planes.

In [None]:
# component cross-correlation
vmin = 0 # min display cutoff
vmax = 1 # max display cutoff

fig, axes = plt.subplots(nrows=1, ncols=len(cnm_list_processed), figsize=(20,20))

for ix_plane, cnm in enumerate(cnm_list_processed):
    A_dense = cnm.estimates.A.todense()
    
    idx_components = cnm.estimates.idx_components
    cc_mat = np.zeros([len(idx_components), len(idx_components)])
    for ix1, c1 in enumerate(idx_components):
        for ix2, c2 in enumerate(idx_components):
            comp1 = np.array(np.reshape(A_dense[:,c1], [1,np.prod(avg_img.shape)]))
            comp2 = np.array(np.reshape(A_dense[:,c2], [1,np.prod(avg_img.shape)]))
            cc_mat[ix1,ix2] = np.corrcoef(comp1, comp2)[0][1]

    im = axes[ix_plane].imshow(cc_mat, vmin=vmin, vmax=vmax, cmap='jet', aspect='equal')
    axes[ix_plane].set_title('Component correlation\nPlane %d' % (ix_plane), fontsize=16)

fig.subplots_adjust(right=0.8)
cbar_ax = fig.add_axes([0.85, 0.45, 0.03, 0.1])
fig.colorbar(im, cax=cbar_ax);

Next, run a PCA to determine the additional variance explained by adding components. If some components are highly correlated, then most of the variance should be explained by a subset of components.

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=len(cnm_list_processed), figsize=(20,5))

for ix_plane, cnm in enumerate(cnm_list_processed):
    A_good = cnm_list_processed[ix_plane].estimates.A.todense()[:,cnm_list_processed[ix_plane].estimates.idx_components]

    pca = PCA(n_components=A_good.shape[1])
    pca.fit(A_good)

    # Plot explained variance
    axes[ix_plane].plot(np.arange(1,A_good.shape[1]+1), np.cumsum(pca.explained_variance_ratio_),'o-')
    axes[ix_plane].plot([1,A_good.shape[1]+1],[0.9, 0.9],'--')
    axes[ix_plane].set_xlabel('number of components')
    axes[ix_plane].set_ylabel('cumulative explained variance');
    axes[ix_plane].set_title('Explained variance\nPlane %d' % (ix_plane), fontsize=16)
    
plt.tight_layout()

Based on the above plot, we can specify how many components per plane should be kept. 90% explained variance is a suggested cut-off.

In [None]:
# set number of components to keep per plane
components_to_keep = [3, 2, 3, 7]

In [None]:
# Select spatial components based on PCA components
cnm_list_pca = copy.deepcopy(cnm_list_processed)

for ix_plane, cnm in enumerate(cnm_list_processed):
    
    print('\nPlane: %d' % (ix_plane))
    
    A = cnm_list_processed[ix_plane].estimates.A
    try:
        A = A.todense()
    except:
        pass
    
    A_good = A[:,cnm_list_processed[ix_plane].estimates.idx_components]
    pca = PCA(n_components=components_to_keep[ix_plane])
    pca.fit(A_good)
    max_idx = np.argmax(pca.components_, axis=1)
    
    # rearrange components
    cnm_list_pca[ix_plane].estimates.idx_components = [cnm_list_processed[ix_plane].estimates.idx_components[x] for x in max_idx]
    
    fig_name = os.path.join(nb_params['data_folder'], '%s_%s_Join_%s_P%d_Components_pca.png' % 
                            (nb_params['day_folder'], nb_params['area_folder'], nb_params['group_id'], ix_plane))
    
    plot_component_contours(cnm_list_pca[ix_plane], images_list[ix_plane], cnm_list_pca[ix_plane].estimates.idx_components, fig_name)

Create component matrices with good components

In [None]:
component_matrix_list = []
for ix_plane, cnm in enumerate(cnm_list_pca):
    idx_comps = cnm.estimates.idx_components
    A = cnm.estimates.A
    try:
        A = A.todense()
    except:
        pass
    
    for i_comp in range(len(idx_comps)):
        component_img = np.array(np.reshape(A[:,idx_comps[i_comp]], cnm.dims, order='F'))
        if i_comp == 0:
            component_matrix = component_img
        else:
            component_matrix = np.dstack((component_matrix, component_img))
            
    component_matrix_list.append(component_matrix)

#### Extract DF/F values and discard bad components
The CaImAn function `detrend_df_f` uses a sliding window percentile filter to determine the baseline and compute DFF.
Note: for noisy traces and / or high levels of activity, `detrend_df_f` seems to produce sometimes unexpected results (i.e. trace whose shape differs a lot from the extracted component traces). It might be better to use the extracted component traces (see below) for downstream analysis.

In [None]:
for ix_plane, cnm in enumerate(cnm_list_pca):
#     cnm.estimates.select_components(idx_components=cnm_list_pca[ix_plane].estimates.idx_components, save_discarded_components=False)
    cnm.estimates.detrend_df_f(quantileMin=8, frames_window=250) # results are in cnm.estimates.F_dff
    cnm_list_pca[ix_plane] = cnm

Interactive plot of selected components

In [None]:
for ix_plane, cnm in enumerate(cnm_list_pca):
    print('Plane %d' % (ix_plane))
    component_list = cnm.estimates.idx_components
    print(component_list)
    if len(component_list) == 0:
        raise Exception('No valid components')
    elif len(component_list) == 1: # adress caiman bug if only 1 component
        print('Found 1 component. Duplicating due to Caiman bug.')
        component_list = np.append(component_list, component_list[0])
    cnm.estimates.nb_view_components(img=Cn[ix_plane], denoised_color='red', idx=component_list)

In [None]:
meta = nb_params['meta']
source_files = meta['source_file']
source_frames = np.array(meta['source_frames'])
trial_index = nb_params['trial_index']

# get corresponding trial name for each frame
trial_names = [x.replace('_crop.tif','') for x in source_files]
trial_names_frames = [trial_names[x] for x in trial_index]

### Create stacked plot of components
Plot stacked traces for some or all good components. The source data and plane can be selected. The plot also shows the trial for each frame.
Types of source data that can be plotted:
- F_dff ... detrended DF/F
- YrA ... residual
- C ... denoised signal
- Y_r ... ROI signal (C + YrA)
- S ... Deconvolved signal

In [None]:
comp_idx = None # select index of components to plot, e.g. [0,1,2] / use None to plot all good components
source = 'Y_r' # select the data that should be plotted ('F_dff', 'Y_r', 'C', 'S', 'YrA')
ix_plane = 1 # select plane that should be plotted

cnm = cnm_list_pca[ix_plane]

if comp_idx is None:
    comp_idx = cnm.estimates.idx_components
    print(comp_idx)

if source == 'F_dff':
    source_data = cnm.estimates.F_dff
elif source == 'Y_r':
    source_data = cnm.estimates.YrA + cnm.estimates.C
elif source == 'YrA':
    source_data = cnm.estimates.YrA
elif source == 'C':
    source_data = cnm.estimates.C
elif source == 'S':
    source_data = cnm.estimates.S
else:
    raise Exception('Specified source_data is not implemented')

t = np.arange(0, source_data.shape[-1]) / frame_rate
    
source_data = source_data[comp_idx,:]

p = Figure(plot_width=900, plot_height=600, title=('%s %s CNMF Results' % (nb_params['day_folder'], nb_params['area_folder'])))    
legend_text = ['Component %d (%d)' % (x, comp_idx[x]) for x in range(source_data.shape[0])]

# this is the call to the plotting function (change args. as required)
utils.plotTimeseries(p, t, source_data, legend=legend_text, stack=True, xlabel='Time [s]', ylabel=source,
                     output_backend='canvas', trial_index=trial_index, trial_names_frames=trial_names_frames)

### Split up by trials and save as .mat

In [None]:
# First, check if number of frames match
if not (np.sum(source_frames)-len(nb_params['bad_frames'])) == cnm_list_pca[0].estimates.F_dff.shape[-1]:
    raise Exception('Sum of source frames minus number of bad frames must be equal to number of timepoints.')

In [None]:
bad_frames_by_trial = nb_params['bad_frames_by_trial']

for ix_plane, cnm in enumerate(cnm_list_pca):

    results_dff = dict()
    results_Yr = dict()
    results_C = dict()
    results_S = dict()
    removed_frames = dict()
    
    comp_idx = cnm.estimates.idx_components
    
    F_dff = cnm.estimates.F_dff[comp_idx,:]
    C = cnm.estimates.C[comp_idx,:]
    Y_r = cnm.estimates.YrA[comp_idx,:] + C
    S = cnm.estimates.S[comp_idx,:]
    
    for ix, trial_file in enumerate(source_files):
        # get indices for current trial's frames
        trial_indices = np.where(trial_index==ix)[0]

        if ix in bad_frames_by_trial:
            removed_frames_trial = bad_frames_by_trial[ix]
        else:
            removed_frames_trial = []

        # create valid Matlab variables / field names
        field_name = str('x' + source_files[ix][:source_files[ix].find('/')]).replace('_Live','').replace('-','_')
        results_dff[field_name] = F_dff[:,trial_indices]
        results_Yr[field_name] = Y_r[:,trial_indices]
        results_C[field_name] = C[:,trial_indices]
        results_S[field_name] = S[:,trial_indices]
        removed_frames[field_name] = removed_frames_trial
        
    # dictionary for saving as mat file (field names will be variable names in Matlab)
    mdict = {
        'trials': [str(x) for x in source_files], 
        'dff_trial': results_dff,
        'Yr_trial': results_Yr,
        'C_trial': results_C,
        'Deconv_trial': results_S,
        'removed_frames': removed_frames,
        'mean_image': np.mean(images_list[ix_plane], axis=0),
        'spatial_components': component_matrix_list[ix_plane],
        'local_correlations': Cn[ix_plane]
      }
    
    # save the .mat file
    matfile_name = os.path.join(nb_params['data_folder'], '%s_%s_Join_%s_P%d_results_CNMF.mat' % 
                                (nb_params['day_folder'], nb_params['area_folder'], nb_params['group_id'], ix_plane))
    savemat(os.path.join(nb_params['data_folder'], matfile_name), mdict=mdict, long_field_names=True)