## Preprocess and motion correct 3D movies
Step 1 of the Caiman processing pipeline for multi-layer two-photon calcium imaging movies. Assume movie format acquired using the Scope setup (i.e. different layers arranged as mosaic on top of each other).

### Imports & Setup
The first cells import the various Python modules required by the notebook. In particular, a number of modules are imported from the Caiman package. In addition, we also setup the environment so that everything works as expected.

In [None]:
# Generic imports
import os, sys, platform, re, math, getpass, shutil, tempfile
import json, yaml
import time
import ipyparallel
import xml.etree.ElementTree as ET
import numpy as np
# import skimage.transform
# from tifffile import TiffFile, imread, imsave
from tifffile import imsave
from IPython.display import clear_output

In [None]:
# on Linux we have to add the caiman folder to Pythonpath
if platform.system() == 'Linux':
    sys.path.append(os.path.expanduser('~/caiman'))
# environment variables for parallel processing
os.environ['MKL_NUM_THREADS']='1'
os.environ['OPENBLAS_NUM_THREADS']='1'
os.environ['VECLIB_MAXIMUM_THREADS']='1'

In [None]:
# Import CaImAn and custom functions
import caiman as cm
from caiman.motion_correction import MotionCorrect
from caiman.source_extraction.cnmf import params as params
import utils, mc_utils

### Read parameters from config file

In [None]:
config_file = 'config.yml'

In [None]:
with open(config_file) as f:
    config = yaml.load(f, Loader=yaml.FullLoader)
print(config)

### Setup cluster for parallel processing
This section starts the IPython cluster (ipcluster).

In [None]:
ncpus = config['general']['ncpus']

In [None]:
%%bash -s "$ncpus"
source /opt/Anaconda3-5.1.0-Linux-x86_64/bin/activate caiman || source activate caiman
ipcluster stop
sleep 5
ipcluster start --daemonize -n $1

In [None]:
time.sleep(10)
# connect client
client = ipyparallel.Client()
time.sleep(2)
while len(client) < ncpus:
    sys.stdout.write(".")  # Give some visual feedback of things starting
    sys.stdout.flush()     # (de-buffered)
    time.sleep(0.5)

# create dview object
client.direct_view().execute('__a=1', block=True)
dview = client[:]
n_processes = len(client)
print('\n\nThe cluster appears to be setup. Number of parallel processes: %d' % (n_processes))

### Map network drive
If the data is located on a network drive (i.e. Neurophysiology storage), we first need to connect the drive with the relevant user credentials.

In [None]:
connect_storage = config['data']['connect_storage']
if connect_storage:
    storage_user = config['data']['storage_user']
    storage_adress = config['data']['storage_adress']
    mountpoint = config['data']['mountpoint']
    storage_pw = getpass.getpass(prompt="Enter password for the remote storage")

In [None]:
# check if the mountpoint exists, if not create it
if connect_storage:
    if not os.path.isdir(mountpoint):
        os.makedirs(mountpoint)
    # list contents of the directory
    os.listdir(mountpoint)

In [None]:
%%bash -s "$connect_storage" "$storage_user" "$storage_pw" "$storage_adress" "$mountpoint"
if [ "$1" = 1 ]; then
    sudo mount -t cifs -o username=$2,password=$3,uid=$(id -u),gid=$(id -g) $4 $5
else
    echo "Not mounting storage"
fi

In [None]:
# list contents of the directory
if connect_storage:
    print('Mountpoint folder content:')
    print(os.listdir(mountpoint))

### Read other parameters from config file

In [None]:
# data parameters
data_folder = str(config['data']['data_folder'])
animal_folder = str(config['data']['animal_folder'])
day_folder = str(config['data']['day_folder'])
area_folder = str(config['data']['area_folder'])
data_folder = os.path.join(data_folder, animal_folder, day_folder, area_folder)
copy_to_temp = bool(config['data']['copy_to_temp'])

# analysis parameters
max_trials = config['analysis']['max_trials']
x_crop = config['analysis']['x_crop']
n_planes = config['analysis']['n_planes']
max_group_size = config['analysis']['max_group_size']

### Copy data to temporary folder
If the data is stored on a remote network location, it is more robust to copy it to a local temporary folder for analysis. Afterwards, the results are copied back to the original location.

In [None]:
if copy_to_temp:
    # create a temp directory for analysis
    temp_dir = tempfile.mkdtemp()
    # create the data folder structure in the temporary directory
    temp_data_folder = os.path.join(temp_dir, animal_folder, day_folder, area_folder)
    os.makedirs(temp_data_folder, exist_ok=True)
    print('Created temporary analysis folder %s' % (temp_data_folder))

In [None]:
# select sessions for processing
p = re.compile('\d\d-\d\d-\d\d_Live') # regular expression that should match the folder names (ie. 01-23-45_Live)
sessions = [os.path.join(data_folder, x) for (i,x) in enumerate(sorted(os.listdir(data_folder))) if p.match(x) and i <= max_trials]

In [None]:
# copy data to temporary folder
if copy_to_temp:
    t_start = time.time()
    temp_sessions = dview.map_sync(utils.copyDirectory, sessions, [temp_data_folder]*len(sessions))
    # print elapsed time
    t_elapsed = time.time() - t_start
    print('Copied data to %s in %1.2f s (%1.2f s per session)' % (temp_dir, t_elapsed, t_elapsed/len(sessions)))

In [None]:
if copy_to_temp:
    sessions = temp_sessions
    working_data_folder = temp_data_folder
else:
    working_data_folder = data_folder

### Select TIFF files and extract metadata

In [None]:
tiff_files = []
xml_files = []
for i_session in sessions:
    tiff_files.append([os.path.join(i_session, x) for x in os.listdir(i_session) if x.endswith('.tif') and not 'stacked' in x][0])
    xml_files.append([os.path.join(i_session, x) for x in os.listdir(i_session) if 'parameters.xml' in x][0])

In [None]:
# read frame rate from parameters.xml
frame_rates = []
for ix, i_session in enumerate(sessions):
    tree = ET.parse(xml_files[ix])
    root = tree.getroot()
    for child in root:
        if child.tag == 'area0': # Note: only area 0!
            fr = child.find('Framerate_Hz')
            frame_rate = float(fr.text)
            frame_rates.append(frame_rate)
    if ix <= 10:
        print('Frame rate: %1.4f Hz (%s)' % (frame_rate, os.path.split(i_session)[1]))
if ix > 10:
    print('...')

### Convert TIFF files to ImageJ hyperstack files
This cell calls the `mosaicToStack` function in `utils` through the ipyparallel `map_sync` method to make use of multiple cores.

In [None]:
stacked_files = dview.map_sync(utils.mosaicToStack, tiff_files, [n_planes]*len(tiff_files), [x_crop]*len(tiff_files))

In [None]:
print('Processing %1.0f files:' % (len(stacked_files)))
print(*stacked_files[:10], sep='\n')
if len(stacked_files) > 10:
    print('...')
    print(*stacked_files[-5:], sep='\n')

### Join cropped TIF files
Next, we create a large joined TIF file from individual cropped files. Further processing will be done on the joined file.

In [None]:
n_groups = math.ceil(len(stacked_files) / float(max_group_size))
files_per_group = math.ceil(len(stacked_files) / n_groups)
stacked_files_by_group = []
print('Processing files in %d groups' % (n_groups))
for i_groups in range(int(n_groups)):
    start_ix = int(i_groups * files_per_group)
    stop_ix = int((i_groups+1) * files_per_group)
    stacked_files_by_group.append(stacked_files[start_ix:stop_ix])
    
    print('Group %d (%d - %d): %d files' % (i_groups+1, start_ix, stop_ix, len(stacked_files[start_ix:stop_ix])))

In [None]:
joined_tif_list = []
json_fname_list = []
total_frames_list = []
trial_indices_list = []
for i_group, stacked_files_group in enumerate(stacked_files_by_group):
    # load movies
    movies = cm.load_movie_chain(stacked_files_group, is3D=True, outtype=np.int16)
    
    total_frames = movies.shape[0]
    total_frames_list.append(total_frames)
    n_planes = movies.shape[1]
    dims = (movies.shape[2], movies.shape[3])
    
    frames_per_movie = dview.map_sync(utils.getFramesTif, stacked_files_group)
    
     # trial index for each frame
    trial_indices = []
    for i_frame, frame_count in enumerate(frames_per_movie):
        trial_indices = trial_indices + [i_frame]*frame_count
    trial_indices_list.append(trial_indices)
    
    for i_plane in range(n_planes):
        # derive joined file name and save
        joined_tif = '%s_%s_Join_G%d_F%d_P%d.tif' % (day_folder, area_folder, i_group, total_frames, i_plane)
        imsave(os.path.join(working_data_folder, joined_tif), movies[:, i_plane, :, :])
        print('Saved joined TIF file %s' % (joined_tif))
    
    movies = None # free the memory
    
    # create a Json file with information about source files
    meta = {"joined_file": joined_tif.replace('_P%d.tif' % (i_plane), ''), 
            "source_frames": frames_per_movie, 
            "source_file": [x.replace(working_data_folder + os.path.sep,'') for x in stacked_files_group],
            "trial_index": trial_indices,
            "frame_rate": frame_rate,
            "z_planes": n_planes
           }
    json_fname = joined_tif.replace('_P%d.tif' % (i_plane), '.json')
    with open(os.path.join(working_data_folder, json_fname), 'w') as fid:
        json.dump(meta, fid)
    
    # save output file names in list
    joined_tif_list.append(joined_tif.replace('_P%d.tif' % (i_plane), ''))
    json_fname_list.append(json_fname)
    
    print('Created JSON metadata file %s' % (json_fname))
    
# delete stacked TIF files (to save disk space)
dummy = dview.map_sync(os.remove, stacked_files)

### Motion correction

First, setup the parameters for motion correction. The following parameters influence the **quality** of the motion correction:
- niter_rig ... number of iterations for rigid registration (larger = better). Little improvement likely above 5-10.
- strides ... intervals at which patches are laid out for motion correction (smaller = better)
- overlaps ... overlap between patches

Note that smaller values for strides / overlap will improve registration but also lead to NaNs in the output image. In general, there is a trade-off between the quality of registration and the presence / number of NaNs in the output (at least if there is significant motion).

In [None]:
# some parameters are derived directly from the data
max_shifts = (int(np.round(dims[0]/10)), int(np.round(dims[1]/10)))  # maximum allow rigid shift

# the rest are read from the config
for key in config['mc']:
    if isinstance(config['mc'][key], (list,)):
        # replace 'None' strings in lists with None
        config['mc'][key] = [None if x == 'None' else x for x in config['mc'][key]]
    elif isinstance(config['mc'][key], (str,)) and config['mc'][key] == 'None':
        config['mc'][key] = None

In [None]:
# parameters for motion correction
opts_dict = {
    # number of iterations rigid
    'niter_rig': config['mc']['niter_rig'],
    # run piecewise-rigid registration?
    'pw_rigid': config['mc']['pw_rigid'],
    'max_shifts': max_shifts,  # maximum allow rigid shift
    # for parallelization split the movies in num_splits chuncks across time
    'splits_rig': config['mc']['splits_rig'],
    # if none all the splits are processed and the movie is saved
    'num_splits_to_process_rig': config['mc']['num_splits_to_process_rig'],
    # intervals at which patches are laid out for motion correction
    'strides': config['mc']['strides'],
    # overlap between patches (size of patch strides+overlaps)
    'overlaps': config['mc']['overlaps'],
    # for parallelization split the movies in num_splits chuncks across time
    'splits_els': config['mc']['splits_els'],
    # if none all the splits are processed and the movie is saved
    'num_splits_to_process_els': config['mc']['num_splits_to_process_els'],
    'upsample_factor_grid': config['mc']['upsample_factor_grid'],  # upsample factor to avoid smearing when merging patches
    # maximum deviation allowed for patch with respect to rigid shift
    'max_deviation_rigid': config['mc']['max_deviation_rigid'],
    # Specifies how to deal with borders. (True, False, 'copy', 'min')
    'border_nan': config['mc']['border_nan'],
}
opts = params.CNMFParams(params_dict=opts_dict)

There are also some parameters for computing the quality metrics. These probably don't have to be changed.

In [None]:
# parameters for computing metrics
winsize = config['metrics']['winsize']
swap_dim = config['metrics']['swap_dim']
resize_fact_flow = config['metrics']['resize_fact_flow']    # downsample for computing ROF
iters_flow = config['metrics']['iters_flow'] # iterations for calculation of optic flow

Next, we define some functions. See the function doc strings for further information.

Now we are ready to run motion correction for the joined TIF file. If there are a lot of concatenated trials, this might take a while to complete.

The following outputs will be saved:
- result of rigid motion correction in Python mmap format and as TIF file
- result of pw-rigid motion correction in Python mmap format and as TIF file

### Run the motion correction
Next, we run the MC. This can be time consuming for large datasets.

In [None]:
t_start = time.time()
mc_list = []
for ix_file, i_file in enumerate(joined_tif_list):
    mc_list.append([])
    for i_plane in range(n_planes):
        fname = os.path.join(working_data_folder, i_file + '_P%d.tif' % (i_plane))
        
        if opts.get('motion', 'pw_rigid'):
            opts.set('motion', {'pw_rigid': False})
            mc = MotionCorrect(fname, dview=dview, **opts.get_group('motion'))
            mc.motion_correct(save_movie=True)
            fname_rig = mc.fname_tot_rig
            opts.set('motion', {'pw_rigid': True})
        
        mc = MotionCorrect(fname, dview=dview, **opts.get_group('motion'))
        mc.motion_correct(save_movie=True)
        
        if opts.get('motion', 'pw_rigid'):
            mc.fname_tot_rig = fname_rig
        
        # save TIF files
        imsave(mc.fname_tot_rig[0].replace('.mmap', '.tif'), cm.load(mc.fname_tot_rig[0]))
        if opts.get('motion', 'pw_rigid'):
            imsave(mc.fname_tot_els[0].replace('.mmap', '.tif'), cm.load(mc.fname_tot_els[0]))
        
        mc_list[ix_file].append(mc)
        print('Finished MC for %s - Plane %d of %d' % (i_file, i_plane+1, n_planes))

# print elapsed time
t_elapsed = time.time() - t_start
print('\nFinished MC in %1.2f s (%1.2f s per frame)' % (t_elapsed, t_elapsed/(sum(total_frames_list)*n_planes)))

### Assess quality of motion correction
A number of key metrics can be calculated to assess how much motion correction improved overall motion. 
1. Correlation
Correlations of each frame with the template image (binned median) for original, rigid correction and pw-rigid correction. The mean correlation gives an overall impression of motion. The minimum correlation indicates the parts of the movie that are worst affected by motion. Larger correlations indicate less motion.
2. Crispness
Crispness provides a measure of the smoothness of the corrected average image. Intuitively, a dataset with nonregistered motion will have a blurred mean image, resulting in a lower value for the total gradient field norm. Thus, larger values indicate a crisper average image and less residual motion. Crispness is calculated from the gradient field of the mean image (`np.gradient`).
3. Residual optical flow
Optic flow algorithms attempt to match each frame to the template by estimating locally smooth displacement fields. The output is an image where each pixel described the local displacement between template and frame at this point. The smaller the local displacement, the better the registration. Here we compute the matrix norm of the optic flow matrix as summary statistic.

In [None]:
# compute quality assessment metrics
crispness = []
norms = []
corr_mean = []
corr_min = []
metrics = []
metrics_files = []
for i_group in range(n_groups):
    crispness.append([])
    norms.append([])
    corr_mean.append([])
    corr_min.append([])
    metrics.append([])
    mc_group = mc_list[i_group]
    
    # remove Pool object (required for parallel processing)
    for ix, i_mc in enumerate(mc_group):
        i_mc.dview = None
        mc_group[ix] = i_mc
    
    metrics_list = dview.map_sync(mc_utils.computeMetrics, mc_group, [swap_dim]*len(mc_group), [winsize]*len(mc_group), 
                           [resize_fact_flow]*len(mc_group), [iters_flow]*len(mc_group))
    
    # collect results
    for mtrs in metrics_list:
        metrics[i_group].append(mtrs)
        if opts.get('motion', 'pw_rigid'):
            # correlations, crispness and norms of residual optic flow as indicators of registration quality
            crispness[i_group].append(np.array([mtrs['crispness_orig'], mtrs['crispness_rig'], mtrs['crispness_els']]))
            norms[i_group].append(np.array([np.mean(mtrs['norms_orig']), np.mean(mtrs['norms_rig']), np.mean(mtrs['norms_els'])]))
            corr_mean[i_group].append(np.array([np.mean(mtrs['corr_orig']), np.mean(mtrs['corr_rig']), np.mean(mtrs['corr_els'])]))
            corr_min[i_group].append(np.array([np.min(mtrs['corr_orig']), np.min(mtrs['corr_rig']), np.min(mtrs['corr_els'])]))
        else:
            crispness[i_group].append(np.array([mtrs['crispness_orig'], mtrs['crispness_rig']]))
            norms[i_group].append(np.array([np.mean(mtrs['norms_orig']), np.mean(mtrs['norms_rig'])]))
            corr_mean[i_group].append(np.array([np.mean(mtrs['corr_orig']), np.mean(mtrs['corr_rig'])]))
            corr_min[i_group].append(np.array([np.min(mtrs['corr_orig']), np.min(mtrs['corr_rig'])]))
            
    # save results
    metrics_files.append(json_fname_list[i_group].replace('.json', '_MC_metrics.npy'))
    np.save(os.path.join(working_data_folder, metrics_files[i_group]), 
            [metrics[i_group], crispness[i_group], norms[i_group], corr_mean[i_group], corr_min[i_group]])

clear_output()

Print different metrics for raw movie and rigid / pw-rigid corrected movies.

In [None]:
for i_group in range(n_groups):
    for i_plane in range(n_planes):
        print('MC evaluation - Group %d - Plane %d:' % (i_group, i_plane))
        mc_utils.printMetrics(corr_mean[i_group][i_plane], corr_min[i_group][i_plane], crispness[i_group][i_plane], norms[i_group][i_plane])
        print('\n')

### Create a log for downstream analysis
Add important information to the config dictionary and save it.

In [None]:
mc_log = dict()
mc_log['config'] = config
mc_log['data_folder'] = data_folder
mc_log['working_data_folder'] = working_data_folder
mc_log['n_groups'] = n_groups
mc_log['n_planes'] = n_planes
mc_log['tiff_files'] = tiff_files
mc_log['joined_tif_list'] = joined_tif_list
mc_log['json_fname_list'] = json_fname_list
mc_log['metrics_files'] = metrics_files
mc_log['stacked_files_by_group'] = stacked_files_by_group
mc_log['total_frames_list'] = total_frames_list
mc_log['trial_indices_list'] = trial_indices_list

# file names of mmap files
mmap_files_rig = []
mmap_files_els = []
for i_group in range(n_groups):
    mmap_files_rig.append([])
    mmap_files_els.append([])
    for i_plane in range(n_planes):
        mmap_files_rig[i_group].append(os.path.basename(mc_list[i_group][i_plane].fname_tot_rig[0]))
        if config['mc']['pw_rigid']:
            mmap_files_els[i_group].append(os.path.basename(mc_list[i_group][i_plane].fname_tot_els[0]))

mc_log['mmap_files_rig'] = mmap_files_rig
if config['mc']['pw_rigid']:
    mc_log['mmap_files_els'] = mmap_files_els
            
with open(os.path.join(working_data_folder, 'caiman_mc_log.yml'), 'w') as outfile:
    yaml.dump(mc_log, outfile, default_flow_style=True)

### Copy back results and delete the temporary folder
If copying fails for any reason (e.g. no more disk space), we can still recover the results from the temporary folder.

In [None]:
if copy_to_temp:
    t_start = time.time()
    file_types_to_copy = ['.tif', '.npz', '.npy', '.json', '.mmap', '.yml']
    files_to_copy = []
    for file_type in file_types_to_copy:
        file_list = [x for x in os.listdir(working_data_folder) if not os.path.isdir(x) and x.endswith(file_type)]
        files_to_copy = files_to_copy + file_list
    out = dview.map_sync(utils.copyFiles, [os.path.join(working_data_folder, x) for x in files_to_copy], 
                         [os.path.join(data_folder, x) for x in files_to_copy])
    print('\nFinished copying in %1.2f s' % (time.time()-t_start))
    # delete the temp. dir
    shutil.rmtree(temp_dir)

### Stop the cluster

In [None]:
%%bash
source /opt/Anaconda3-5.1.0-Linux-x86_64/bin/activate caiman || source activate caiman
ipcluster stop
sleep 1