In [None]:
#ipython magic
%reset -f
%matplotlib notebook
%load_ext autoreload
%autoreload 2

In [1]:
#general imports
import sys
sys.path.append(r'C:\Users\Robert Lees\Documents\Code\Vape\suite2p_etc')
sys.path.append('..')

import numpy as np
import os
import time

In [2]:
#notebook specific imports
from utils.gsheets_importer import gsheet2df, correct_behaviour_df, split_df, path_conversion, path_finder
from utils.artifact_removal import artifact_removal
from utils.utils_funcs import *
from utils.paq2py import *
import tifffile
import copy
import math

import suite2p
print(suite2p.__path__)
from suite2p.run_s2p import run_s2p
from settings import ops

import matplotlib.pyplot as plt

['C:\\ProgramData\\Anaconda3\\lib\\site-packages\\suite2p']


In [8]:
# read g sheets to populate some lists for processing stim artifact and running suite2p

sheet_ID = '1Z9CvuA1qlLsB0Gar48bkZscLhtlP9iIWk4PYZur8cvc'
SHEET_NAME = '2019-05-23_artifact_removal_s2p'
df = gsheet2df(sheet_ID, HEADER_ROW=4, SHEET_NAME=SHEET_NAME)

# at this point we have lots of files that could be whisker stim or photostim, need to find out which is which

for_processing = split_df(df, 's2p_me') # only files with TRUE in suite2p_me column

if not for_processing.shape[0]:
    raise Exception('ERROR: no files set for processing')

stim = for_processing.loc[:,'stim'] # find out what stims have been carried out
photostim_idx = [i for i,stim in enumerate(stim) if stim=='p'] # row indices of all photostim exps (for artifact removal)
whisker_stim_idx = [i for i,stim in enumerate(stim) if stim=='w'] # '' for whisker stim (no artifact removal)

if ( len(photostim_idx) + len(whisker_stim_idx) ) != stim.shape[0]:
    raise Exception('ERROR: stim type is not defined for some files')

tiff_paths = for_processing.loc[:,'tiff_path']
paq_paths = for_processing.loc[:,'paq_path']

if not all(tiff_paths) or not all(paq_paths):
    raise Exception('ERROR: missing tiff or paq paths')
    
# below is information that will be later required for suite2p and stim removal

n_frames = [int(i) for i in list(for_processing.loc[:,'n_frames'])] # for stim removal
stim_dur = [int(i) for i in list(for_processing.loc[:,'total_stim_duration'])] # for stim removal
n_planes = [int(i) for i in list(for_processing.loc[:,'n_planes'])] # for s2p and stim removal

if not all(n_frames) or not all([stim_dur[i] for i in photostim_idx]) or not all(n_planes):
    raise Exception('ERROR: missing important metadata for processing')

packerstation_path = r"P:" # the path to PackerStation on the local machine
# TODO: update this to path_finder rather than conversion, to increase failsafe at this point
tiffs_pstation = path_conversion(tiff_paths, packerstation_path) # convert paths (from Packer1 or PackerStation) to local PackerStation paths
paqs_pstation = path_conversion(paq_paths, packerstation_path)

tiffs_pstation

['P:rlees\\Data\\2019-03-12\\RL025\\2019-03-12_RL025_t-007\\MPTIFF',
 'P:aharris\\Data\\2019-04-11\\2019-04-11_RL025_t-006',
 'P:aharris\\Data\\2019-04-11\\2019-04-11_RL025_t-007',
 'P:aharris\\Data\\2019-04-11\\2019-04-11_RL025_t-008',
 'P:aharris\\Data\\2019-04-11\\2019-04-11_RL025_t-009']

In [4]:
# use first frame of tiffs to find resolution of image, required later for calculating cell diameter in pixels for s2p

frame_size = [] 

for tiff_path in tiffs_pstation:
    
    frame = np.empty((0))
    
    if '.tif' in tiff_path or '.tiff' in tiff_path: # the path points directly to a file
        
        frame = tifffile.imread(tiff_path, key=0) 
        frame_size.append(frame.shape)
        break
        
    else:
        
        for item in os.listdir(tiff_path): # the path points to a folder
            
            if '.tif' in item or '.tiff' in item:
                
                filename = os.path.join(tiff_path, item)
                frame = tifffile.imread(filename, key=0)
                frame_size.append(frame.shape)
                break
                
    if len(frame.shape)==1: 

        print(tiff_path)
        raise Exception('ERROR: could not load TIFF file')

frame_size

[(512, 512), (512, 512), (512, 512), (512, 512), (512, 512)]

In [5]:
# obtain list of tiffs to run stim removal on

tiff_lists = []

# need to find single multi-page TIFF or many TIFFs/MPTIFFs

for tiff in tiffs_pstation:
    print(tiff)
    
    if '.tif' not in tiff or '.tiff' not in tiff: # if the path is not directly a TIFF, it may be folder with MPTIFF or multiple TIFFs/MPTIFFs
        
        items = os.listdir(tiff)
        newlist = []
        
        for name in items:
            if name.endswith(".tiff") or name.endswith(".tif"):
                filename = os.path.join(tiff, name)
                newlist.append(filename)
        tiff_lists.append(newlist)
        
    else: # if provided path directs to TIFF file, make sure it is only one in folder
        
        tiff_count = 0
        
        parent_dir = os.path.dirname(tiff)
        items = os.listdir(parent_dir)
                
        for name in items:
            if name.endswith(".tiff") or name.endswith(".tif"):
                tiff_count += 1
        
        if tiff_count > 1:
            raise Exception('ERROR: make sure large, single TIFF files are in their own folder')
        else: 
            tiff_lists.append(tiff)
            
photostim_tiffs = [tiff_lists[i] for i in photostim_idx] # only run artifact removal-specific code on photostim exps

P:rlees\Data\2019-03-12\RL025\2019-03-12_RL025_t-007\MPTIFF
P:aharris\Data\2019-04-11\2019-04-11_RL025_t-006
P:aharris\Data\2019-04-11\2019-04-11_RL025_t-007
P:aharris\Data\2019-04-11\2019-04-11_RL025_t-008
P:aharris\Data\2019-04-11\2019-04-11_RL025_t-009


In [None]:
# load up paq file and find stim times + start frames

remove_frames = []

for i,_ in enumerate(photostim_tiffs):
    paq = paq_read(paqs_pstation[i])
    stim_frames = stim_start_frame(paq, 'markpoints2packio') # TODO: will fail in certain situations where each stim was triggered every time
    frame_clock = paq_data(paq, 'frame_clock', threshold_ttl=True)

    duration_ms = stim_dur[i] #length of stim
    duration_samples = (int(duration_ms) / 1000) * paq['rate']
    
    # use this to remove frames during stim based on paq
    to_remove = []

    for stim_frame in stim_frames: 

        # frame indexes that fall during the stim
        in_stim = np.where((frame_clock >= stim_frame) & (frame_clock <= stim_frame + duration_samples))[0]

        #empircal observation, these are the frames with artifact
        in_stim = np.append(in_stim, in_stim[-1]+1)
        in_stim = np.append(in_stim, in_stim[-1]+1)

        to_remove.append(in_stim)

    remove_frames.append(np.ravel(to_remove))

for frames in remove_frames:
    print(len(frames))

In [None]:
# load up TIFFs in blocks and run stim removal on them
user_block_size = 500 # should be above 100 to work without problems (enough TIFF pages for multiplane thresholding)

for i,tiff_list in enumerate(photostim_tiffs): # for each list of TIFFs 
        
    parent_dir = os.path.dirname(tiff_list[0]) # parent directory to create output directory inside of
    output_dir = os.path.join(parent_dir, 'Artifact_removed')
    print(output_dir)
    
    try:
        os.mkdir(output_dir) # make output directory for artifact removal TIFFs
    except:
        pass
    
    base_path = os.path.splitext(tiff_list[0])[0] # get parent folder of TIFF for saving files
    base_filename = os.path.basename(base_path)
    print(base_filename)
    
    substack = np.empty((0,512,512), dtype=np.uint16) # empty np array for appending to
    
    block_size = myround(user_block_size, base=n_planes[i]) # want the block_size to be in multiples of the number of planes if multiplanar TIFF
    
    iteration = 0
    prev_frames = 0
    current_frames = 0
    
    width_thresh = 5 * (frame_size[i][0] / 512) # currently using 5 pixels as default for 512 image, scaled for other resolution
    
    if len(tiff_list)>1: # multiple TIFFs or MPTIFFs
        print('Multiple TIFF files')
        tiff_list = sorted(tiff_list) 
        
        for tiff in tiff_list:
            
            if '.ome' in tiff and tiff == tiff_list[0]: # sometimes, if it is .ome.tiff, the first file has a header which is mistaken for other pages
                temp_tiff = tifffile.imread(tiff, key=0) 
            else:
                temp_tiff = tifffile.imread(tiff)
            
            if len(temp_tiff.shape) is not 3: # can only append substacks if dimensions match, so match them (should be 3d)
                temp_tiff = np.expand_dims(temp_tiff, axis=0)
                
            substack = np.append(substack, temp_tiff, axis=0)
            
            if substack.shape[0]>block_size or tiff==tiff_list[-1]: # if substack is large (RAM conservation) or at last TIFF in list, process the stack
                
                substack_frames = substack.shape[0] # number of frames in current stack
                current_frames = substack_frames + prev_frames
                remove_me = [frame - prev_frames for frame in remove_frames[i] if prev_frames <= frame < current_frames] # slice the frames to be removed for this substack
                
                non_stim_frames = [non_stim for non_stim,_ in enumerate(substack) if non_stim not in remove_me]
                
                try:
                    thresh_list = non_stim_frames[:50] # TODO: this will break if <50 frames, not common, but will be towards last block (could be any size)
                except:
                    if non_stim_frames:
                        thresh_list = non_stim_frames
                    else:
                        raise Exception('ERROR: block contains only stim, cant find baseline')
                    
                if remove_me:
                    substack_processed = artifact_removal(substack, thresh_list=thresh_list, remove_me=remove_me, width_thresh=width_thresh, nplanes=n_planes[i]) # remove artifact
                else:
                    substack_processed = substack
                
                filename = base_filename + '_artifactRemoved' + str(iteration) + '.tiff'
                output_path = os.path.join(output_dir, filename)
                tifffile.imwrite(output_path, substack_processed) # write processed tiff
                
                substack = np.empty((0,512,512), dtype=np.uint16)
                iteration += 1
                prev_frames = current_frames # save which frame we are at (for calculating stims to include next time)
                    
    else: # single, large MPTIFF
        print('Single TIFF file')
        remaining_frames = n_frames[i]
        
        while remaining_frames:
            
            if remaining_frames > block_size: # load a block of TIFF pages unless there are fewer than a block left, should pre-define block according to RAM capacity
                current_frames += block_size
            else:
                current_frames += remaining_frames # load all remaining TIFF pages
            
            substack = tifffile.imread(tiff_list, key=range(prev_frames, current_frames)) # load in all frames for this block starting from the previous block
            
            remove_me = [frame - prev_frames for frame in remove_frames[i] if prev_frames <= frame < current_frames] # slice the frames to be removed so they are for this substack only
            
            non_stim_frames = [non_stim for non_stim,_ in enumerate(substack) if non_stim not in remove_me] # sometimes can't find threshold so find non-stim frames and provide it to artifact removal
            thresh_list = non_stim_frames[:50] # list of frames for threshold in artifact removal function
            
            if remove_me:
                substack_processed = artifact_removal(substack, thresh_list=thresh_list, remove_me=remove_me, width_thresh=width_thresh, nplanes=n_planes[i]) # remove artifact
            else:
                substack_processed = substack # if no frames to be removed
                    
            filename = base_filename + '_artifactRemoved' + str(iteration) + '.tiff' # construct filename
            output_path = os.path.join(output_dir, filename)
            tifffile.imwrite(output_path, substack_processed) # write processed tiff
            iteration += 1 
            prev_frames = current_frames # save progress in terms of frames with artifact removed
                        
            remaining_frames = n_frames[i] - current_frames # frames remaining to load

In [6]:
# important: data paths must be lists even if only one element
# can also only run on specified tiffs

# sampling rate
fps = [float(i) for i in list(for_processing.loc[:,'fps'])]

# cell diameter
zoom = [float(i) for i in list(for_processing.loc[:,'zoom'])] # for s2p cell diameter calculation

user_batch_size = 500 # number of frames to be processed at once (i.e. registered)

db = []

for i,tiff_list in enumerate(tiff_lists):
    print(tiff_list[0])
    
    if tiff_list in photostim_tiffs: # photostim experiments should have tiffs processed with artifact removed
        print('Photostim experiment')
        umbrella_folder = os.path.dirname(tiff_list[0])
#         folder_name = path_finder(umbrella_folder, 'Artifact_removed',  is_folder=True) # find new folder containing artifact-removed data
        folder_name = umbrella_folder
    else: # whisker stim experiments
        print('Whisker stim experiment')
        folder_name = [tiff_list]
        
#     tiff_list = []
    
#     for file in os.listdir(folder_name):
#             if '.ome' not in file and '.tif' in file:
#                 tiff_list.append(file) 
    
#     tiff_list = sorted(tiff_list)

    sampling_rate = fps[i]/n_planes[i]
    diameter = (5.5 * zoom[i]) * (frame_size[i][0] / 512) # cell is 5.5 pixels in diameter at 1x zoom, multiplied by the resolution scale
    batch_size = user_batch_size * (512 / frame_size[i][0] ) # larger frames will be more RAM intensive, scaled user batch size based on 512x512 images
      
    db.append({ 'data_path' : [folder_name], 
#               'tiff_list' : tiff_list,
              'fs' : float(sampling_rate),
              'diameter' : float(diameter), 
              'batch_size' : int(batch_size), 
              'nimg_init' : int(batch_size),
              'nplanes' : n_planes[i] 
              })
    
db

P:rlees\Data\2019-03-12\RL025\2019-03-12_RL025_t-007\MPTIFF\2019-03-12_RL025_t-007_Cycle00001_Ch3.tif
Photostim experiment
P:aharris\Data\2019-04-11\2019-04-11_RL025_t-006\2019-04-11_RL025_t-006_Cycle00001_Ch3.tif
Photostim experiment
P:aharris\Data\2019-04-11\2019-04-11_RL025_t-007\2019-04-11_RL025_t-007_Cycle00001_Ch3.tif
Photostim experiment
P:aharris\Data\2019-04-11\2019-04-11_RL025_t-008\2019-04-11_RL025_t-008_Cycle00001_Ch3.tif
Photostim experiment
P:aharris\Data\2019-04-11\2019-04-11_RL025_t-009\2019-04-11_RL025_t-009_Cycle00001_Ch3.tif
Photostim experiment


[{'data_path': ['P:rlees\\Data\\2019-03-12\\RL025\\2019-03-12_RL025_t-007\\MPTIFF'],
  'fs': 30.0,
  'diameter': 11.0,
  'batch_size': 500,
  'nimg_init': 500,
  'nplanes': 1},
 {'data_path': ['P:aharris\\Data\\2019-04-11\\2019-04-11_RL025_t-006'],
  'fs': 30.0,
  'diameter': 11.0,
  'batch_size': 500,
  'nimg_init': 500,
  'nplanes': 1},
 {'data_path': ['P:aharris\\Data\\2019-04-11\\2019-04-11_RL025_t-007'],
  'fs': 30.0,
  'diameter': 11.0,
  'batch_size': 500,
  'nimg_init': 500,
  'nplanes': 1},
 {'data_path': ['P:aharris\\Data\\2019-04-11\\2019-04-11_RL025_t-008'],
  'fs': 30.0,
  'diameter': 11.0,
  'batch_size': 500,
  'nimg_init': 500,
  'nplanes': 1},
 {'data_path': ['P:aharris\\Data\\2019-04-11\\2019-04-11_RL025_t-009'],
  'fs': 30.0,
  'diameter': 11.0,
  'batch_size': 500,
  'nimg_init': 500,
  'nplanes': 1}]

In [7]:
t1 = time.time()

for dbi in db:
    opsEnd = run_s2p(ops=ops,db=dbi)
    
t2 = time.time()
t2 - t1

Found 1 tifs




30770
time 1577.6624. Wrote tifs to binaries for 1 planes
C:\BIN\suite2p\plane0\data_raw.bin
computed reference frame for registration
registered 2500/30770 frames in time 240.55
registered 5000/30770 frames in time 478.96
registered 7500/30770 frames in time 717.83
registered 10000/30770 frames in time 957.22
registered 12500/30770 frames in time 1195.59
registered 15000/30770 frames in time 1433.72
registered 17500/30770 frames in time 1673.76
registered 20000/30770 frames in time 1911.99
registered 22500/30770 frames in time 2150.23
registered 25000/30770 frames in time 2388.64
registered 27500/30770 frames in time 2623.74
registered 30000/30770 frames in time 2860.98
computed registration metrics in time 3048.85
time 4861.7706. Registration complete for 1 planes
[11 11]
nt0=60
(512, 504, 505)
6.0
ROIs: 108, cost: 0.0297, time: 199.2244
ROIs: 160, cost: 0.0266, time: 219.3036
ROIs: 170, cost: 0.0262, time: 225.3040
nt0=60
(512, 504, 505)
ROIs: 170, cost: 0.1387, time: 255.0470
remov



2988
time 145.2548. Wrote tifs to binaries for 1 planes
C:\BIN\suite2p\plane0\data_raw.bin
computed reference frame for registration
registered 2500/2988 frames in time 233.74
computed registration metrics in time 337.46
time 658.8996. Registration complete for 1 planes
[11 11]
nt0=60
(49, 507, 508)
6.0
ROIs: 200, cost: 0.1878, time: 9.7109
ROIs: 400, cost: 0.1581, time: 15.7693
ROIs: 553, cost: 0.1423, time: 20.3832
ROIs: 579, cost: 0.1388, time: 23.3308
ROIs: 585, cost: 0.1375, time: 25.7790
nt0=60
(49, 507, 508)
ROIs: 585, cost: 0.2914, time: 29.1233
removed 31 overlapping ROIs
ROIs: 554, cost: 0.2657, time: 34.7572
ROIs: 554, cost: 0.2645, time: 37.4006
time 49.5184. Found 554 ROIs
extracted 0/2988 frames in 2.02 sec
extracted 2988/2988 frames in 6.53 sec
time 64.8666. Extracted fluorescence from 554 ROIs
results saved to P:aharris\Data\2019-04-11\2019-04-11_RL025_t-006\suite2p\plane0
time 764.9155. Detected spikes in 554 ROIs
C:\ProgramData\Anaconda3\lib\site-packages\suite2p\clas



30472
time 1517.1066. Wrote tifs to binaries for 1 planes
C:\BIN\suite2p\plane0\data_raw.bin
computed reference frame for registration
registered 2500/30472 frames in time 244.77
registered 5000/30472 frames in time 488.98
registered 7500/30472 frames in time 734.23
registered 10000/30472 frames in time 979.23
registered 12500/30472 frames in time 1217.37
registered 15000/30472 frames in time 1456.54
registered 17500/30472 frames in time 1695.16
registered 20000/30472 frames in time 1935.21
registered 22500/30472 frames in time 2174.63
registered 25000/30472 frames in time 2414.67
registered 27500/30472 frames in time 2653.29
registered 30000/30472 frames in time 2900.43
computed registration metrics in time 3085.57
time 4841.5600. Registration complete for 1 planes
[11 11]
nt0=60
(507, 505, 506)
6.0
ROIs: 200, cost: 0.1314, time: 53.7915
ROIs: 400, cost: 0.1061, time: 61.7396
ROIs: 491, cost: 0.0995, time: 67.0337
ROIs: 502, cost: 0.0984, time: 71.1602
nt0=60
(507, 505, 506)
ROIs: 502



3287
time 166.9700. Wrote tifs to binaries for 1 planes
C:\BIN\suite2p\plane0\data_raw.bin
computed reference frame for registration
registered 2500/3287 frames in time 244.31
computed registration metrics in time 387.25
time 737.8819. Registration complete for 1 planes
[11 11]
nt0=60
(54, 509, 509)
6.0
ROIs: 200, cost: 0.1372, time: 18.2481
ROIs: 400, cost: 0.0982, time: 24.9589
ROIs: 504, cost: 0.0899, time: 29.2192
ROIs: 525, cost: 0.0876, time: 31.9559
ROIs: 531, cost: 0.0867, time: 34.5032
nt0=60
(54, 509, 509)
ROIs: 531, cost: 0.2238, time: 38.4076
removed 45 overlapping ROIs
ROIs: 486, cost: 0.2153, time: 44.6417
ROIs: 486, cost: 0.2127, time: 48.7879
time 61.3040. Found 486 ROIs
extracted 0/3287 frames in 2.00 sec
extracted 3287/3287 frames in 7.42 sec
time 77.3891. Extracted fluorescence from 486 ROIs
results saved to P:aharris\Data\2019-04-11\2019-04-11_RL025_t-008\suite2p\plane0
time 856.9694. Detected spikes in 486 ROIs
C:\ProgramData\Anaconda3\lib\site-packages\suite2p\cla



30472
time 1478.5729. Wrote tifs to binaries for 1 planes
C:\BIN\suite2p\plane0\data_raw.bin
computed reference frame for registration
registered 2500/30472 frames in time 248.85
registered 5000/30472 frames in time 498.59
registered 7500/30472 frames in time 746.57
registered 10000/30472 frames in time 995.64
registered 12500/30472 frames in time 1243.85
registered 15000/30472 frames in time 1491.65
registered 17500/30472 frames in time 1738.51
registered 20000/30472 frames in time 1984.35
registered 22500/30472 frames in time 2217.67
registered 25000/30472 frames in time 2451.10
registered 27500/30472 frames in time 2685.01
registered 30000/30472 frames in time 2917.83
computed registration metrics in time 3083.22
time 4797.1679. Registration complete for 1 planes
[11 11]
nt0=60
(507, 507, 508)
6.0
ROIs: 200, cost: 0.2817, time: 49.7698
ROIs: 400, cost: 0.2349, time: 59.2596
ROIs: 507, cost: 0.2205, time: 65.2806
ROIs: 538, cost: 0.2169, time: 69.4039
ROIs: 545, cost: 0.2158, time: 7

17302.224670171738