## Run CNMF source extraction on movies
Step 2 of the Caiman processing pipeline for dendritic two-photon calcium imaging movies. This part uses mmap files as input. These are created during motion correction with the Caiman toolbox (see `01_Preprocess_MC_3D.ipynb`). 

### Imports & Setup
The first cells import the various Python modules required by the notebook. In particular, a number of modules are imported from the Caiman package. In addition, we also setup the environment so that everything works as expected.

In [None]:
# Generic imports
# from __future__ import absolute_import, division, print_function
# from builtins import *

import os, platform, glob, sys, re
import fnmatch
import json
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import savemat

from IPython.display import clear_output

# Import Bokeh library
import bokeh.plotting as plotting
from bokeh.plotting import Figure, show
from bokeh.layouts import gridplot
from bokeh.models import Range1d, CrosshairTool, HoverTool, Legend
from bokeh.io import output_notebook, export_svgs
from bokeh.models.sources import ColumnDataSource

%matplotlib inline

In [None]:
# This has to be in a separate cell, otherwise it wont work.
from bokeh import resources
output_notebook(resources=resources.INLINE)

In [None]:
# on Linux we have to add the caiman folder to Pythonpath
if platform.system() == 'Linux':
    sys.path.append(os.path.expanduser('~/caiman'))
# environment variables for parallel processing
os.environ['MKL_NUM_THREADS']='1'
os.environ['OPENBLAS_NUM_THREADS']='1'
os.environ['VECLIB_MAXIMUM_THREADS']='1'

In [None]:
# CaImAn imports
import caiman as cm
from caiman.source_extraction.cnmf import cnmf as cnmf
from caiman.source_extraction.cnmf import params as params
from caiman.components_evaluation import estimate_components_quality as estimate_q
from caiman.components_evaluation import estimate_components_quality_auto
from caiman.utils.visualization import plot_contours, nb_view_patches, nb_plot_contour
from caiman.source_extraction.cnmf import utilities as cnmf_utils
import caiman_utils as cm_utils

### Select files
The following need to be specified:
- data_folder ... location of the data (relative to ~/Data)
- mc_output ... select if output of rigid ('rig') or piece-wise rigid ('els') motion correction should be used (currently only 'rig' is tested and works)
- max_files ... maximum number of files to process, e.g. for testing (if 0, all files will be processed)

In [None]:
animal_folder = 'M3_October_2018'
date_folder = 'M3_2018-10-02'
session_folder = 'S1'
group_id = 'G0'

mc_output = 'rig'
remove_bad_frames = True # remove bad frames specified in Json file

# create the complete path to the data folder
if platform.system() == 'Linux':
    data_folder = '/home/ubuntu/Data'
elif platform.system() == 'Darwin':
    data_folder = '/Users/Henry/Data/temp/Dendrites_Gwen'
data_folder = os.path.join(data_folder, animal_folder, date_folder, session_folder)

In [None]:
# select the mmap file created during motion correction
all_files = os.listdir(data_folder)
mmap_files = sorted([x for x in all_files if x.startswith('%s_%s' % (date_folder, session_folder)) 
           and x.endswith('.mmap') and mc_output in x and group_id in x])
n_planes = len(mmap_files)

print('Found %d mmap files. Check allocation to planes!' % (n_planes))
for i_plane in range(n_planes):
    print('Plane %d: %s' % (i_plane, mmap_files[i_plane]))

In [None]:
mmap_files = [os.path.join(data_folder, x) for x in mmap_files]
# get metadata
for file in os.listdir(data_folder):
    if fnmatch.fnmatch(file, '%s_%s_Join_%s_*[!badFrames].json' % (date_folder, session_folder, group_id)):
        meta = json.load(open(os.path.join(data_folder,file)))
        break
trial_index = np.array(meta['trial_index'])
frame_rate = meta['frame_rate'] / n_planes

### Load data and remove bad frames

In [None]:
fname = mmap_files[0]
Yr, dims = cm_utils.loadData(fname)
fname

In [None]:
bad_frames, images, Y, fname_rem = cm_utils.removeBadFrames(fname, trial_index, Yr, dims, remove_bad_frames, data_folder)
fname_rem

### Setup cluster
The default backend mode for parallel processing is through the multiprocessing package. This will allow us to use all the cores in the VM.

In [None]:
# start the cluster (if a cluster already exists terminate it)
n_processes = 8 # number of compute processes (None to select automatically)
if 'dview' in locals() and dview is not None:
    dview.terminate()
c, dview, n_processes = cm.cluster.setup_cluster(
    backend='local', n_processes=n_processes, single_thread=False)

In [None]:
# set dview to None for debugging
if 'dview' in locals():
    dview.terminate()
dview = None

### Parameters for source extraction
Next, we define the important parameters for calcium source extraction. These parameters will have to be iteratively refined for the respective datasets.


#### Spring 2018 Parameter (dendritic)

In [None]:
# parameters for source extraction and deconvolution
# decay_time = 0.4            # length of a typical transient in seconds
# p = 1                       # order of the autoregressive system (normally 1, 2 for fast indicators / imaging)
# gnb = 2                     # number of global background components
# merge_thresh = 0.8          # merging threshold, max correlation allowed
# rf = 10                     # half-size of the patches in pixels. e.g., if rf=25, patches are 50x50 / None: no patches
# stride_cnmf = 5             # amount of overlap between the patches in pixels
# K = None                    # number of components per patch (usually None)
# gSig = [4, 4]               # expected half size of neurons
# method_init = 'sparse_nmf'  # initialization method (if analyzing dendritic data use 'sparse_nmf')
# is_dendrites = True         # flag for analyzing dendritic data
# #alpha_snmf = 10e2           # sparsity penalty for dendritic data analysis through sparse NMF
# alpha_snmf = 1e-6

# method_deconvolution='oasis'# deconvolution method (oasis or cvxpy)

# # parameters for component evaluation
# min_SNR = 2.5               # signal to noise ratio for accepting a component
# rval_thr = 0.8              # space correlation threshold for accepting a component
# use_cnn = False             # whether to use CNN to filter components
# cnn_thr = 0.8               # threshold for CNN based classifier

# final_rate = frame_rate             # final frame rate in Hz

#### Autumn 2018 Parameter

In [None]:
# dataset dependent parameters
decay_time = 0.4                            # length of a typical transient in seconds

# parameters for source extraction and deconvolution
p = 1                       # order of the autoregressive system
gnb = 2                     # number of global background components
merge_thresh = 0.8          # merging threshold, max correlation allowed
rf = 15                     # half-size of the patches in pixels. e.g., if rf=25, patches are 50x50
stride_cnmf = 6             # amount of overlap between the patches in pixels
K = 4                       # number of components per patch
gSig = [4, 4]               # expected half size of neurons in pixels

method_init = 'sparse_nmf'  # initialization method (if analyzing dendritic data use 'sparse_nmf', else 'greedy_roi')
#alpha_snmf = 10e2           # sparsity penalty for dendritic data analysis through sparse NMF
alpha_snmf = 100
normalize_init = True      # default is True

ssub = 1                    # spatial subsampling during initialization
tsub = 1                    # temporal subsampling during intialization

# parameters for component evaluation
min_SNR = 2.0               # signal to noise ratio for accepting a component
rval_thr = 0.85              # space correlation threshold for accepting a component
cnn_thr = 0.99              # threshold for CNN based classifier
cnn_lowest = 0.1 # neurons with cnn probability lower than this value are rejected

In [None]:
# create Parameters object
# unspecified parameters get default values
opts_dict = {'fnames': fname_rem,
            'fr': frame_rate,
            'decay_time': decay_time,
            'p': 1,
            'nb': gnb,
            'rf': rf,
            'K': K, 
            'stride': stride_cnmf,
            'method_init': method_init,
            'alpha_snmf': alpha_snmf,
             'normalize_init': normalize_init,
            'rolling_sum': True,
            'only_init': True,
            'ssub': ssub,
            'tsub': tsub,
            'min_SNR': min_SNR,
            'rval_thr': rval_thr,
            'use_cnn': True,
            'min_cnn_thr': cnn_thr,
            'cnn_lowest': cnn_lowest}

opts = params.CNMFParams(params_dict=opts_dict)

To get a dict with all parameters, use `opts.to_dict()`

#### Run CNMF on patches

In [None]:
opts.to_dict()

In [None]:
# First extract spatial and temporal components on patches and combine them
# for this step deconvolution is turned off (p=0)
opts.set('temporal', {'p': 0})
cnm = cnmf.CNMF(n_processes, params=opts, dview=dview)
cnm = cnm.fit(images)

#### Inspect results

In [None]:
# plot contours of found components
Cn = cm.local_correlations(images.transpose(1,2,0))
Cn[np.isnan(Cn)] = 0
cnm.estimates.plot_contours_nb(img=Cn)

#### Re-run (seeded) CNMF on the full Field of View

In [None]:
# RE-RUN seeded CNMF on accepted patches to refine and perform deconvolution 
cnm.params.set('temporal', {'p': p})
cnm2 = cnm.refit(images, dview=dview)

#### Component evaluation
the components are evaluated in three ways:
1. the shape of each component must be correlated with the data
2. a minimum peak SNR is required over the length of a transient
3. each shape passes a CNN based classifier

In [None]:
cnm2.estimates.evaluate_components(images, cnm2.params, dview=dview)
print('Found %d good / %d bad components' % (len(cnm2.estimates.idx_components), len(cnm2.estimates.idx_components_bad)))

Plot contours of selected and rejected components

In [None]:
cnm2.estimates.plot_contours_nb(img=Cn, idx=cnm2.estimates.idx_components)

View traces of accepted and rejected components. Note that if you get data rate error you can start Jupyter notebooks using: 'jupyter notebook --NotebookApp.iopub_data_rate_limit=1.0e10'

In [None]:
# accepted components
cnm2.estimates.nb_view_components(img=Cn, idx=cnm2.estimates.idx_components)

In [None]:
# rejected components
if len(cnm2.estimates.idx_components_bad) > 0:
    cnm2.estimates.nb_view_components(img=Cn, idx=cnm2.estimates.idx_components_bad)
else:
    print("No components were rejected.")

#### Extract DF/F values

In [None]:
cnm2.estimates.detrend_df_f(quantileMin=8, frames_window=250)

Select only high quality components.

In [None]:
cnm2.estimates.select_components(use_object=True)

In [None]:
cnm2.estimates.nb_view_components(img=Cn, denoised_color='red')