## This Jupyter Notebook extracts information about experimental conditions from image file names and creates inventories of image files and ROIs within each file for later data analysis
### Input: 
1. csv files containing fluorescent values for each time point of individual images with names specifiying experimental specifications separated by "_";
2. csv file storing the specifications of each image file batch_df_name_copy.csv
3. csv file storing the list of each ROI within each image file
### Output: 
1. Unormalized data with background-subtracted, bleedthrough corrected values with the following columns:
   - Annotation columns (same as batch_df_name_copy.csv)
   - Baseline
   - Unormalized fluorescent values from time point 0 to 123.
3. Normalized data with dF/F values, a column of baseline values and a column of dF/F peak average.
   - Annotation columns and Baseline same as above
   - dF/F_Mean_p (for positive peaks) or dF/F_Mean_n (for negative peaks)
   - Unormalized fluorescent values from time point 0 to 123.

## Import packages and define functions
Import necesssary packages and define the functions for normalization "norm()" and peak detection "find_peaks()"

In [1]:
#Import necessary packages
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
%matplotlib inline

from math import nan
from typing import ClassVar, Type
from glob import glob
import pprint
import pylab as P
from tqdm import tqdm

In [2]:
#Get file directory
import os
from os import listdir
from os.path import isfile, join
cwd = os.getcwd()
cwd

'C:\\Users\\fanr\\OneDrive - Max Planck Florida Institute for Neuroscience\\Codes\\mitoGCaMP\\manuscript'

In [5]:
#Specify the least number of time points in between peaks. For 0.3Hz & 0.2Hz stimulation protocols, INTER_PEAK_DISTANCE = 2
INTER_PEAK_DISTANCE = 2

#Specify the number of baseline timepoints
BS = 5

#Define function for normalization and peak detection

#Returns the normalized values (dF/F) of an array of time-lapse fluorescent value data 
def norm(arr):
    baseline = np.sum(arr[0:BS])/BS
    norm_arr = arr/baseline-1

    return norm_arr

#Detects peaks above threshold (2 times standard deviation of the baseline) in a normalized time-lapse fluorescent value data.
#Returns peak values and peak index.
def find_peaks(arr):
    global THRESHOLD_HEIGHT,INTER_PEAK_DISTANCE
    temp_peaks = []
    temp_peakcen = []
    real_peaks = []
    real_peakcen = []
    
    threshold = 2*np.std(arr[0:BS])
    arr = arr[~np.isnan(arr)]
    
    peak_one = 0
    for i in range(len(arr)):
        
        
        if i > BS-1 and i < len(arr)-1:
            if arr[i] > arr[i-1] and arr[i] > arr[i+1]:
                if peak_one == 0:
                    peak_one = i
                    temp_peakcen.append(i)
                    temp_peaks.append(arr[i])
                else:
                    if i - peak_one > INTER_PEAK_DISTANCE:
                        peak_one = i
                        temp_peakcen.append(i)
                        temp_peaks.append(arr[i])
                    elif arr[i] > arr[peak_one]:
                        peak_one = i
                        temp_peakcen.pop(-1)
                        temp_peaks.pop(-1)
                        temp_peakcen.append(i)
                        temp_peaks.append(arr[i])

    for i in range(len(temp_peaks)):
        if temp_peaks[i] > threshold:
            real_peaks.append(temp_peaks[i])
            real_peakcen.append(temp_peakcen[i])

    return [real_peaks, real_peakcen]



## Import files

In [7]:
#import all files from the source folder
#mypath: path for source folder
mypath = ""

onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
onlyfiles

['052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCaMP_myrCN27_unc.csv',
 '052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCaMP_unc.csv',
 '052625_dish1_cell1_stim2_488mitoGCaMP6f_561RCaMP_myrCN27_unc.csv',
 '052625_dish1_cell1_stim2_488mitoGCaMP6f_561RCaMP_unc.csv',
 '052625_dish1_cell2_stim1_488mitoGCaMP6f_561RCaMP_myrCN27_unc.csv',
 '052625_dish1_cell2_stim1_488mitoGCaMP6f_561RCaMP_unc.csv',
 '052625_dish1_cell2_stim3_488mitoGCaMP6f_561RCaMP_myrCN27_unc.csv',
 '052625_dish1_cell2_stim3_488mitoGCaMP6f_561RCaMP_unc.csv',
 '052625_dish1_cell3_stim1_488mitoGCaMP6f_561RCaMP_myrCN27_unc.csv',
 '052625_dish1_cell3_stim1_488mitoGCaMP6f_561RCaMP_unc.csv',
 '052625_dish1_cell3_stim2_488mitoGCaMP6f_561RCaMP_myrCN27_unc.csv',
 '052625_dish1_cell3_stim2_488mitoGCaMP6f_561RCaMP_unc.csv',
 '052625_dish1_cell4_stim1_488mitoGCaMP6f_561RCaMP_myrCN27_unc.csv',
 '052625_dish1_cell4_stim1_488mitoGCaMP6f_561RCaMP_unc.csv',
 '052625_dish1_cell4_stim2_488mitoGCaMP6f_561RCaMP_myrCN27_unc.csv',
 '052625_dish1_cell4_

In [9]:
#Specify batch, same as the prefix of output files from "Get_file_info"
batch = 'BATCH NAME'

In [19]:
#import name/selection files
df_name = pd.read_csv(batch+'_df_name_copy.csv')
selection =pd.read_csv(batch+'_selection.csv')


## Combine data from all files to an unormalized summary data frame df0
df0 will contain the annotating columns specified by the "batch_df_name_copy.csv", columns for channel and ROI name specified in "batch_selection.csv", and columns for background-subtracted fluoresent intensity values at each time point. The fluorescent intensity profile for each ROI of each channel under a specific condition is represented by one row.

In [27]:
df_unorm = []

ncol = 0
for i in onlyfiles:
    selection_i = selection[selection.File == i]
    selection_i2 = selection_i[selection_i['Selection']!='bg']
    df_name_i = df_name[df_name.File == i]
    
    df = pd.read_csv(mypath+'\\'+i)
    df = df.iloc[:,1:]
    
    
    
    for j in list(range(selection_i2.shape[0])):
        
        stim = selection_i2['Point'].iloc[j]
        channel = selection_i2['Channel'].iloc[j]
        select = selection_i2['Selection'].iloc[j]
       
        bg_arr = df[channel+"_"+str(stim)+"_bg"].values
        f_arr = df[channel+"_"+str(stim)+"_"+select].values      
        bg_subtract_arr = f_arr - bg_arr

        ncol = max(ncol,len(f_arr.tolist()))
        

        unorm_list = df_name_i.values.tolist()[0] + [stim]+[channel]+[select]+bg_subtract_arr.tolist()

        
        df_unorm = df_unorm+[unorm_list]
    

df0 = pd.DataFrame(df_unorm,columns =df_name.columns.to_list()+ ['Point','Channel','Selection'] + list(range(ncol)))



df0

Unnamed: 0,File,Date,Dish,Cell,Stim,C1,C0,Treatment,Point,Channel,...,114,115,116,117,118,119,120,121,122,123
0,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,488mitoGCaMP6f,561RCaMP,myrCN27,1,C0,...,127.375,100.607,94.382,170.324,127.481,108.759,99.500,166.572,129.498,108.654
1,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,488mitoGCaMP6f,561RCaMP,myrCN27,1,C1,...,8.274,-0.673,-2.087,0.492,0.404,-0.371,3.074,-2.751,2.110,2.002
2,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,488mitoGCaMP6f,561RCaMP,myrCN27,1,C0,...,202.972,168.880,143.564,248.077,202.493,158.339,160.682,270.351,201.606,159.992
3,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,488mitoGCaMP6f,561RCaMP,myrCN27,1,C1,...,21.494,20.426,19.793,21.349,16.880,12.265,24.575,23.399,21.284,17.509
4,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,488mitoGCaMP6f,561RCaMP,ctrl,1,C0,...,158.694,138.287,102.575,188.079,149.673,130.428,115.256,193.885,176.759,131.948
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,052625_dish1_cell6_stim2_488mitoGCaMP6f_561RCa...,52625,dish1,cell6,2,488mitoGCaMP6f,561RCaMP,myrCN27,1,C1,...,99.669,94.664,84.893,92.195,85.575,88.943,83.862,99.535,123.782,118.838
92,052625_dish1_cell6_stim2_488mitoGCaMP6f_561RCa...,52625,dish1,cell6,2,488mitoGCaMP6f,561RCaMP,ctrl,1,C0,...,295.091,217.760,207.677,468.865,271.883,194.855,161.510,444.996,265.756,179.755
93,052625_dish1_cell6_stim2_488mitoGCaMP6f_561RCa...,52625,dish1,cell6,2,488mitoGCaMP6f,561RCaMP,ctrl,1,C1,...,8.220,4.775,0.022,13.896,6.062,3.889,1.764,15.100,6.235,2.193
94,052625_dish1_cell6_stim2_488mitoGCaMP6f_561RCa...,52625,dish1,cell6,2,488mitoGCaMP6f,561RCaMP,ctrl,1,C0,...,428.353,320.923,286.189,672.683,497.490,353.113,287.700,593.087,416.951,314.386


## Correcting bleedthrough
The percentage of bleedthrough from 561 channel to 488 channel with the same imaging setting has been measured to be 0.57%. This amount is subtracted from the images before further analysis. Bleedtrough-corrected values are added as additional rows named "C1_correct".

In [29]:
#Create a dataframe "df0_correct" containing the C1 (488) bleedthrough-corrected values
df0 = df0.drop(columns = ['C0','C1'])
df0_C0 = df0[df0['Channel']=='C0'].drop(columns=['Channel'])
df0_C1 = df0[df0['Channel']=='C1'].drop(columns=['Channel'])

df0_rearrange = df0_C0.merge(df0_C1, on =[   'File',    'Date', 'Dish',      'Cell','Stim','Treatment', 'Point',
       'Selection'],how = 'outer',suffixes = ['_C0','_C1'])


for i in range(0,124):
    df0_rearrange[str(i)+'_C1_correct'] = df0_rearrange[str(i)+'_C1']-df0_rearrange[str(i)+'_C0']*0.0057

col = list(df0_rearrange.columns[8:256])
df0_correct = df0_rearrange.drop(columns = col)

df0_correct['Channel']=['C1_correct']*df0_correct.shape[0]
df0_correct = df0_correct[[  'File',    'Date',     'Dish',      'Cell','Stim','Treatment','Point','Channel', 'Selection']+list(df0_correct.columns[8:132])]

df0_correct

  df0_rearrange[str(i)+'_C1_correct'] = df0_rearrange[str(i)+'_C1']-df0_rearrange[str(i)+'_C0']*0.0057
  df0_rearrange[str(i)+'_C1_correct'] = df0_rearrange[str(i)+'_C1']-df0_rearrange[str(i)+'_C0']*0.0057
  df0_rearrange[str(i)+'_C1_correct'] = df0_rearrange[str(i)+'_C1']-df0_rearrange[str(i)+'_C0']*0.0057
  df0_rearrange[str(i)+'_C1_correct'] = df0_rearrange[str(i)+'_C1']-df0_rearrange[str(i)+'_C0']*0.0057
  df0_rearrange[str(i)+'_C1_correct'] = df0_rearrange[str(i)+'_C1']-df0_rearrange[str(i)+'_C0']*0.0057
  df0_rearrange[str(i)+'_C1_correct'] = df0_rearrange[str(i)+'_C1']-df0_rearrange[str(i)+'_C0']*0.0057
  df0_rearrange[str(i)+'_C1_correct'] = df0_rearrange[str(i)+'_C1']-df0_rearrange[str(i)+'_C0']*0.0057
  df0_rearrange[str(i)+'_C1_correct'] = df0_rearrange[str(i)+'_C1']-df0_rearrange[str(i)+'_C0']*0.0057
  df0_rearrange[str(i)+'_C1_correct'] = df0_rearrange[str(i)+'_C1']-df0_rearrange[str(i)+'_C0']*0.0057
  df0_rearrange[str(i)+'_C1_correct'] = df0_rearrange[str(i)+'_C1']-df0_r

Unnamed: 0,File,Date,Dish,Cell,Stim,Treatment,Point,Channel,Selection,0_C1_correct,...,114_C1_correct,115_C1_correct,116_C1_correct,117_C1_correct,118_C1_correct,119_C1_correct,120_C1_correct,121_C1_correct,122_C1_correct,123_C1_correct
0,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,myrCN27,1,C1_correct,sp,-3.003996,...,7.547963,-1.24646,-2.624977,-0.478847,-0.322642,-0.990926,2.50685,-3.70046,1.371861,1.382672
1,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,myrCN27,1,C1_correct,stim,19.055729,...,20.33706,19.463384,18.974685,19.934961,15.72579,11.362468,23.659113,21.857999,20.134846,16.597046
2,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,ctrl,1,C1_correct,sp,1.103063,...,0.721444,5.045764,6.719322,1.12695,2.086864,-5.41244,4.080041,-0.228144,3.400474,3.941896
3,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,ctrl,1,C1_correct,stim,7.776503,...,14.500425,8.879116,10.06612,11.694554,14.159679,10.877237,9.962066,14.355455,10.910059,9.97757
4,052625_dish1_cell1_stim2_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,2,myrCN27,1,C1_correct,sp,3.452827,...,0.195066,6.207745,-0.551168,9.059663,-1.82918,4.820443,0.170348,1.110632,8.262192,4.323084
5,052625_dish1_cell1_stim2_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,2,myrCN27,1,C1_correct,stim,15.778655,...,109.642237,103.161922,98.660514,104.797758,111.034006,91.875026,100.14942,111.206281,107.793678,96.060485
6,052625_dish1_cell1_stim2_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,2,ctrl,1,C1_correct,sp,5.430585,...,3.380802,2.841811,7.508979,4.854083,3.172012,6.417098,7.683265,1.727858,1.807715,3.402451
7,052625_dish1_cell1_stim2_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,2,ctrl,1,C1_correct,stim,18.700661,...,97.784364,96.081665,94.759731,95.241863,101.730134,90.848496,93.876478,94.710003,102.347238,97.884125
8,052625_dish1_cell2_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell2,1,myrCN27,1,C1_correct,sp,-2.01866,...,5.827324,-1.01846,-0.439438,1.892954,-1.210547,6.553121,4.003837,0.042018,3.249642,2.85241
9,052625_dish1_cell2_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell2,1,myrCN27,1,C1_correct,stim,14.010918,...,28.652034,25.503533,27.879262,24.51956,24.070426,29.28239,25.335876,27.937904,28.570824,29.492711


In [31]:
# Add the bleedthrough-corrected data into the main dataframe df0
df0_correct.columns = [  'File',    'Date', 'Dish',      'Cell','Stim','Treatment','Point','Channel', 'Selection' ]+list(df0.columns[9:])
df0 = pd.concat([df0,df0_correct],axis=0)
df0.replace(0, np.nan, inplace=True)
df0

Unnamed: 0,File,Date,Dish,Cell,Stim,Treatment,Point,Channel,Selection,0,...,114,115,116,117,118,119,120,121,122,123
0,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,myrCN27,1,C0,sp,45.087000,...,127.375000,100.607000,94.382000,170.324000,127.481000,108.759000,99.500000,166.572000,129.498000,108.654000
1,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,myrCN27,1,C1,sp,-2.747000,...,8.274000,-0.673000,-2.087000,0.492000,0.404000,-0.371000,3.074000,-2.751000,2.110000,2.002000
2,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,myrCN27,1,C0,stim,148.644000,...,202.972000,168.880000,143.564000,248.077000,202.493000,158.339000,160.682000,270.351000,201.606000,159.992000
3,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,myrCN27,1,C1,stim,19.903000,...,21.494000,20.426000,19.793000,21.349000,16.880000,12.265000,24.575000,23.399000,21.284000,17.509000
4,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,ctrl,1,C0,sp,49.989000,...,158.694000,138.287000,102.575000,188.079000,149.673000,130.428000,115.256000,193.885000,176.759000,131.948000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43,052625_dish1_cell6_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell6,1,ctrl,1,C1_correct,stim,16.661540,...,71.448396,66.407014,62.533867,78.663016,74.185153,68.061440,61.319028,71.418889,73.224165,70.011097
44,052625_dish1_cell6_stim2_488mitoGCaMP6f_561RCa...,52625,dish1,cell6,2,myrCN27,1,C1_correct,sp,-2.665894,...,13.491168,9.715104,1.144497,10.650109,3.484582,3.229086,-2.634690,17.235059,5.759455,-2.579988
45,052625_dish1_cell6_stim2_488mitoGCaMP6f_561RCa...,52625,dish1,cell6,2,myrCN27,1,C1_correct,stim,22.345828,...,98.178883,93.326980,83.715483,90.620535,84.218782,87.627412,82.560867,97.346006,122.169960,117.428761
46,052625_dish1_cell6_stim2_488mitoGCaMP6f_561RCa...,52625,dish1,cell6,2,ctrl,1,C1_correct,sp,3.299147,...,6.537981,3.533768,-1.161759,11.223470,4.512267,2.778327,0.843393,12.563523,4.720191,1.168396


## Calculate & Rearrange Baseline
Calculate the baseline value before stimulation for dF/F analysis in the "normalization" section. For ERGCaMP experiments, the ERGCaMP signal has been observed to have an initial photo-bleaching phase. Therefore, 15 baseline time points were acquired as prior to stimulation but only the last 5 time points were averaged to get the baseline value.

In [33]:
t0 = 9 #The start of the baseline
t1 = 14 #The end of the baseline



df0.iloc[:,t0:t1].dropna(axis=1,inplace=True)
df0['Baseline'] = df0.iloc[:,t0:t1].apply(np.mean,axis=1)
cols = list(df0)
cols.insert(t0, cols.pop(cols.index('Baseline')))
df0 = df0.loc[:, cols]

df0

  df0['Baseline'] = df0.iloc[:,t0:t1].apply(np.mean,axis=1)


Unnamed: 0,File,Date,Dish,Cell,Stim,Treatment,Point,Channel,Selection,Baseline,...,114,115,116,117,118,119,120,121,122,123
0,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,myrCN27,1,C0,sp,50.036400,...,127.375000,100.607000,94.382000,170.324000,127.481000,108.759000,99.500000,166.572000,129.498000,108.654000
1,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,myrCN27,1,C1,sp,-1.120200,...,8.274000,-0.673000,-2.087000,0.492000,0.404000,-0.371000,3.074000,-2.751000,2.110000,2.002000
2,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,myrCN27,1,C0,stim,158.863600,...,202.972000,168.880000,143.564000,248.077000,202.493000,158.339000,160.682000,270.351000,201.606000,159.992000
3,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,myrCN27,1,C1,stim,19.746000,...,21.494000,20.426000,19.793000,21.349000,16.880000,12.265000,24.575000,23.399000,21.284000,17.509000
4,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,ctrl,1,C0,sp,53.829800,...,158.694000,138.287000,102.575000,188.079000,149.673000,130.428000,115.256000,193.885000,176.759000,131.948000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43,052625_dish1_cell6_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell6,1,ctrl,1,C1_correct,stim,17.508097,...,71.448396,66.407014,62.533867,78.663016,74.185153,68.061440,61.319028,71.418889,73.224165,70.011097
44,052625_dish1_cell6_stim2_488mitoGCaMP6f_561RCa...,52625,dish1,cell6,2,myrCN27,1,C1_correct,sp,2.920524,...,13.491168,9.715104,1.144497,10.650109,3.484582,3.229086,-2.634690,17.235059,5.759455,-2.579988
45,052625_dish1_cell6_stim2_488mitoGCaMP6f_561RCa...,52625,dish1,cell6,2,myrCN27,1,C1_correct,stim,23.141075,...,98.178883,93.326980,83.715483,90.620535,84.218782,87.627412,82.560867,97.346006,122.169960,117.428761
46,052625_dish1_cell6_stim2_488mitoGCaMP6f_561RCa...,52625,dish1,cell6,2,ctrl,1,C1_correct,sp,1.388332,...,6.537981,3.533768,-1.161759,11.223470,4.512267,2.778327,0.843393,12.563523,4.720191,1.168396


In [35]:
# save unormalized data
df0.to_csv(batch + "_unorm.csv",index=False)

## Normalization
To normalize the data with the baseline values using the formula of dF/F = (F-F0)/F0, where F is the fluorescent value at each time point, and F0 is the baseline value calculated earlier. For ERGCaMP experiments, the first 10 time points are removed from the normalized dataset.

In [37]:
# For mitoGCaMP and RCaMP analysis
c = 10 # Number of columns including Baseline column
df_norm = pd.concat([df0.iloc[:,0:c],df0.iloc[:,c:].apply(norm,axis=1)],axis = 1)
df_norm

Unnamed: 0,File,Date,Dish,Cell,Stim,Treatment,Point,Channel,Selection,Baseline,...,114,115,116,117,118,119,120,121,122,123
0,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,myrCN27,1,C0,sp,50.036400,...,1.545647,1.010676,0.886267,2.404002,1.547765,1.173598,0.988552,2.329016,1.588076,1.171499
1,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,myrCN27,1,C1,sp,-1.120200,...,-8.386181,-0.399214,0.863060,-1.439207,-1.360650,-0.668809,-3.744153,1.455811,-2.883592,-2.787181
2,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,myrCN27,1,C0,stim,158.863600,...,0.277650,0.063050,-0.096307,0.561572,0.274634,-0.003302,0.011446,0.701781,0.269051,0.007103
3,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,myrCN27,1,C1,stim,19.746000,...,0.088524,0.034437,0.002380,0.081181,-0.145143,-0.378862,0.244556,0.184999,0.077889,-0.113289
4,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,ctrl,1,C0,sp,53.829800,...,1.948070,1.568967,0.905543,2.493957,1.780486,1.422970,1.141119,2.601815,2.283664,1.451207
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43,052625_dish1_cell6_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell6,1,ctrl,1,C1_correct,stim,17.508097,...,3.080877,2.792932,2.571711,3.492951,3.237191,2.887427,2.502324,3.079192,3.182303,2.998784
44,052625_dish1_cell6_stim2_488mitoGCaMP6f_561RCa...,52625,dish1,cell6,2,myrCN27,1,C1_correct,sp,2.920524,...,3.619433,2.326493,-0.608119,2.646642,0.193136,0.105653,-1.902129,4.901357,0.972062,-1.883399
45,052625_dish1_cell6_stim2_488mitoGCaMP6f_561RCa...,52625,dish1,cell6,2,myrCN27,1,C1_correct,stim,23.141075,...,3.242624,3.032958,2.617614,2.916004,2.639363,2.786661,2.567720,3.206633,4.279355,4.074473
46,052625_dish1_cell6_stim2_488mitoGCaMP6f_561RCa...,52625,dish1,cell6,2,ctrl,1,C1_correct,sp,1.388332,...,3.709234,1.545333,-1.836802,7.084137,2.250135,1.001197,-0.392514,8.049363,2.399900,-0.158417


In [117]:
# For ERGCaMP: remove 10 baselines
c=12 # Number of columns including Baseline column
c1 = 22 # c + (number of baseline timepoints to be removed)
df_norm = pd.concat([df0.iloc[:,0:c],df0.iloc[:,c1:].apply(norm,axis=1)],axis = 1)

df_norm.columns=list(df_norm.columns[0:12]) +list(range(50))

df_norm

Unnamed: 0,File,Date,Dish,Cell,Stim1,C1,C0,Treatment,Replicate,Channel,...,40,41,42,43,44,45,46,47,48,49
0,110924_dish1_cell1_stim1_488ERGCaMP_561PSDmCh_...,110924,dish1,cell1,1,488ERGCaMP,561PSDmCh,ctrl,0,C0,...,-0.959046,-0.706571,-0.274227,-0.356586,0.058056,-0.180018,0.436544,-0.462646,0.392739,-0.905941
1,110924_dish1_cell1_stim1_488ERGCaMP_561PSDmCh_...,110924,dish1,cell1,1,488ERGCaMP,561PSDmCh,ctrl,0,C1,...,-0.059735,-0.039983,-0.056874,-0.033314,-0.040394,-0.040454,-0.051630,-0.072988,-0.070390,-0.031004
2,110924_dish1_cell1_stim1_488ERGCaMP_561PSDmCh_...,110924,dish1,cell1,1,488ERGCaMP,561PSDmCh,2APB,0,C0,...,1.119597,0.415710,1.291487,0.405959,0.814141,0.092567,-0.512903,-0.230577,0.670824,-0.204272
3,110924_dish1_cell1_stim1_488ERGCaMP_561PSDmCh_...,110924,dish1,cell1,1,488ERGCaMP,561PSDmCh,2APB,0,C1,...,-0.051437,0.014318,-0.045588,0.058842,0.030833,-0.048681,0.024859,0.058072,0.122229,0.141520
4,110924_dish1_cell1_stim2_488ERGCaMP_561PSDmCh_...,110924,dish1,cell1,2,488ERGCaMP,561PSDmCh,ctrl,0,C0,...,-1.259509,-0.157232,0.215494,-2.232668,4.636687,0.901158,-0.429462,-1.276046,0.825467,0.857906
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69,111224_dish1_cell2_stim3_488ERGCaMP_561PSDmCh_...,111224,dish1,cell2,3,488ERGCaMP,561PSDmCh,ctrl,0,C1,...,-0.221111,-0.345809,-0.233818,-0.172668,-0.118316,-0.218189,-0.175177,-0.044730,-0.179450,-0.153576
70,111224_dish1_cell2_stim3_488ERGCaMP_561PSDmCh_...,111224,dish1,cell2,3,488ERGCaMP,561PSDmCh,2APB,1,C0,...,-3.642183,-3.529906,-4.927597,-8.055614,3.271773,-2.076600,-5.605456,-4.485834,-3.369360,-0.611752
71,111224_dish1_cell2_stim3_488ERGCaMP_561PSDmCh_...,111224,dish1,cell2,3,488ERGCaMP,561PSDmCh,2APB,1,C1,...,-0.258938,-0.675583,-0.428827,-0.338593,-0.398865,-0.336091,-0.471077,-0.296089,-0.350217,-0.582119
72,111224_dish1_cell2_stim3_488ERGCaMP_561PSDmCh_...,111224,dish1,cell2,3,488ERGCaMP,561PSDmCh,2APB,2,C0,...,-6.573770,-29.401639,5.204918,2.639344,46.295082,-13.000000,-38.344262,60.229508,11.590164,58.114754


## Find peaks - Positive peaks
For MitoGCaMP and RCaMP analysis, average the first 10 peaks detected using the "find_peaks" function and add a column after the baseline column.

In [39]:
c = 10 # Number of columns including Baseline
unc = 10 # Number of peaks to be averaged.

df_norm['dF/F_Mean_p'] = (df_norm.iloc[:,c:]).apply(find_peaks,axis=1, result_type='expand')[0].apply(lambda x: np.mean(x[0:unc]))


df_norm = df_norm.fillna(0)

cols = list(df_norm)
cols.insert(c, cols.pop(cols.index('dF/F_Mean_p')))
df_norm = df_norm.loc[:, cols]


df_norm

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Unnamed: 0,File,Date,Dish,Cell,Stim,Treatment,Point,Channel,Selection,Baseline,...,114,115,116,117,118,119,120,121,122,123
0,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,myrCN27,1,C0,sp,50.036400,...,1.545647,1.010676,0.886267,2.404002,1.547765,1.173598,0.988552,2.329016,1.588076,1.171499
1,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,myrCN27,1,C1,sp,-1.120200,...,-8.386181,-0.399214,0.863060,-1.439207,-1.360650,-0.668809,-3.744153,1.455811,-2.883592,-2.787181
2,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,myrCN27,1,C0,stim,158.863600,...,0.277650,0.063050,-0.096307,0.561572,0.274634,-0.003302,0.011446,0.701781,0.269051,0.007103
3,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,myrCN27,1,C1,stim,19.746000,...,0.088524,0.034437,0.002380,0.081181,-0.145143,-0.378862,0.244556,0.184999,0.077889,-0.113289
4,052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell1,1,ctrl,1,C0,sp,53.829800,...,1.948070,1.568967,0.905543,2.493957,1.780486,1.422970,1.141119,2.601815,2.283664,1.451207
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43,052625_dish1_cell6_stim1_488mitoGCaMP6f_561RCa...,52625,dish1,cell6,1,ctrl,1,C1_correct,stim,17.508097,...,3.080877,2.792932,2.571711,3.492951,3.237191,2.887427,2.502324,3.079192,3.182303,2.998784
44,052625_dish1_cell6_stim2_488mitoGCaMP6f_561RCa...,52625,dish1,cell6,2,myrCN27,1,C1_correct,sp,2.920524,...,3.619433,2.326493,-0.608119,2.646642,0.193136,0.105653,-1.902129,4.901357,0.972062,-1.883399
45,052625_dish1_cell6_stim2_488mitoGCaMP6f_561RCa...,52625,dish1,cell6,2,myrCN27,1,C1_correct,stim,23.141075,...,3.242624,3.032958,2.617614,2.916004,2.639363,2.786661,2.567720,3.206633,4.279355,4.074473
46,052625_dish1_cell6_stim2_488mitoGCaMP6f_561RCa...,52625,dish1,cell6,2,ctrl,1,C1_correct,sp,1.388332,...,3.709234,1.545333,-1.836802,7.084137,2.250135,1.001197,-0.392514,8.049363,2.399900,-0.158417


## Peak finding - Negative peaks
For ERGCaMP analysis, average the 6 inverse peaks and add one column after the baseline column.

In [119]:
c = 12 # Number of columns including Baseline
unc = 6 # Number of peaks to be averaged.

df_norm['Mean_n'] = -(-df_norm.iloc[:,c:]).apply(find_peaks,axis=1, result_type='expand')[0].apply(lambda x: np.mean(x[0:unc]))
df_norm = df_norm.fillna(0)

cols = list(df_norm)
cols.insert(c, cols.pop(cols.index('Mean_n')))
df_norm = df_norm.loc[:, cols]

df_norm

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Unnamed: 0,File,Date,Dish,Cell,Stim1,C1,C0,Treatment,Replicate,Channel,...,40,41,42,43,44,45,46,47,48,49
0,110924_dish1_cell1_stim1_488ERGCaMP_561PSDmCh_...,110924,dish1,cell1,1,488ERGCaMP,561PSDmCh,ctrl,0,C0,...,-0.959046,-0.706571,-0.274227,-0.356586,0.058056,-0.180018,0.436544,-0.462646,0.392739,-0.905941
1,110924_dish1_cell1_stim1_488ERGCaMP_561PSDmCh_...,110924,dish1,cell1,1,488ERGCaMP,561PSDmCh,ctrl,0,C1,...,-0.059735,-0.039983,-0.056874,-0.033314,-0.040394,-0.040454,-0.051630,-0.072988,-0.070390,-0.031004
2,110924_dish1_cell1_stim1_488ERGCaMP_561PSDmCh_...,110924,dish1,cell1,1,488ERGCaMP,561PSDmCh,2APB,0,C0,...,1.119597,0.415710,1.291487,0.405959,0.814141,0.092567,-0.512903,-0.230577,0.670824,-0.204272
3,110924_dish1_cell1_stim1_488ERGCaMP_561PSDmCh_...,110924,dish1,cell1,1,488ERGCaMP,561PSDmCh,2APB,0,C1,...,-0.051437,0.014318,-0.045588,0.058842,0.030833,-0.048681,0.024859,0.058072,0.122229,0.141520
4,110924_dish1_cell1_stim2_488ERGCaMP_561PSDmCh_...,110924,dish1,cell1,2,488ERGCaMP,561PSDmCh,ctrl,0,C0,...,-1.259509,-0.157232,0.215494,-2.232668,4.636687,0.901158,-0.429462,-1.276046,0.825467,0.857906
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69,111224_dish1_cell2_stim3_488ERGCaMP_561PSDmCh_...,111224,dish1,cell2,3,488ERGCaMP,561PSDmCh,ctrl,0,C1,...,-0.221111,-0.345809,-0.233818,-0.172668,-0.118316,-0.218189,-0.175177,-0.044730,-0.179450,-0.153576
70,111224_dish1_cell2_stim3_488ERGCaMP_561PSDmCh_...,111224,dish1,cell2,3,488ERGCaMP,561PSDmCh,2APB,1,C0,...,-3.642183,-3.529906,-4.927597,-8.055614,3.271773,-2.076600,-5.605456,-4.485834,-3.369360,-0.611752
71,111224_dish1_cell2_stim3_488ERGCaMP_561PSDmCh_...,111224,dish1,cell2,3,488ERGCaMP,561PSDmCh,2APB,1,C1,...,-0.258938,-0.675583,-0.428827,-0.338593,-0.398865,-0.336091,-0.471077,-0.296089,-0.350217,-0.582119
72,111224_dish1_cell2_stim3_488ERGCaMP_561PSDmCh_...,111224,dish1,cell2,3,488ERGCaMP,561PSDmCh,2APB,2,C0,...,-6.573770,-29.401639,5.204918,2.639344,46.295082,-13.000000,-38.344262,60.229508,11.590164,58.114754


## Save the normalized dataframe

In [41]:
df_norm.to_csv(batch+'_norm.csv',index=False)