# Filepaths need to be adjusted, maybe some minor refactoring bugs left

In [6]:
import os
import sys
import glob

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy import ndimage
from skimage import morphology
from skimage import io
from scipy import stats

In [2]:
def open_3ch(folder_path, path_BF, flip_upside_down=False):
    
    FI_folder_path = os.path.join(folder_path, 'FI')
    SEG_folder_path = os.path.join(folder_path, 'SEG')
    file_name_FI = path_BF.split(sep='\\')[-1][:-6] + 'FI.tif'
    path_FI = os.path.join(FI_folder_path, file_name_FI)
    try:
        file_name_SEG = path_BF.split(sep='\\')[-1][:-4] + '_SEG.tiff'
        path_SEG = os.path.join(SEG_folder_path, file_name_SEG)
        mask_bin = io.imread(path_SEG)
    except:
        file_name_SEG = path_BF.split(sep='\\')[-1][:-7] + '_SEG.tiff'
        path_SEG = os.path.join(SEG_folder_path, file_name_SEG)
        mask_bin = io.imread(path_SEG)
    
    im_fi = np.squeeze(io.imread(path_FI))
    # add [::2] if FI only every second frame
    im_bf = np.squeeze(io.imread(path_BF))
    mask_bin[mask_bin==2] = 0
    mask_bin[mask_bin==3] = 0
    mask_bin = mask_bin.astype(bool)
    T = im_fi.shape[0]
    
    if flip_upside_down:
        im_fi = im_fi[:,::-1,:]
        im_bf = im_bf[:,::-1,:]
        mask_bin = mask_bin[:,::-1,:]
    
    return im_fi, im_bf, mask_bin, T

# frame extraction function
def frame_extract(t):
    # process and label mask
    mask_proc = morphology.binary_erosion(mask_bin[t])
    #mask_proc = morphology.binary_erosion(mask_proc)
    mask_proc = morphology.convex_hull_object(mask_proc)
    mask_proc = morphology.binary_erosion(mask_proc)
    #mask_proc = morphology.opening(mask_proc)
    mask_label, num_features = ndimage.label(mask_proc)

    # extract centroid, area
    area = np.zeros_like(range(num_features))
    coord = np.zeros((2,num_features))
    for i in range(num_features):
        area[i] = len(mask_proc[mask_label==i+1].flatten())
        coord_tuple = ndimage.center_of_mass(mask_bin[t], mask_label, i+1)
        coord[0,i] = coord_tuple[0]
        coord[1,i] = coord_tuple[1]
    labels = np.arange(1,num_features+1)
    dic_frame = {'label':labels, 'area':area, 'coordx':coord[0], 'coordy':coord[1]}
    df_frame = pd.DataFrame(dic_frame, columns = ['label', 'area', 'coordx', 'coordy'])

    # find mother cell
    treshold = 50 # pixels
    area_index = df_frame[df_frame['area'] < treshold].index
    df_frame.drop(area_index, inplace=True) # filter by area
    # exctract mother cell data
    try:
        mother_label = df_frame.set_index('label')['coordx'].idxmax() # highest x coord is mother cell        
        mask_mother = mask_bin[t].astype(int)
        mask_mother[mask_label==mother_label] = 2
    

        # optional: verification with plot
        if t in verify_frames:
            plt.figure(figsize=(10,10))
            plt.subplot(131)
            plt.imshow(im_bf[t], cmap='Greys')
            plt.title('BF')
            plt.subplot(132)
            plt.imshow(im_fi[t], cmap='Greys')
            plt.title('FI')
            plt.subplot(133)
            plt.imshow(mask_mother)
            plt.title('Mask')
            plt.savefig(str(sample_name) + '_t' + str(t) + '_verify.png', dpi=300)
    
        # extract FI
        fi_mean = np.mean(im_fi[t][mask_mother==2].flatten())
        fi_std = np.std(im_fi[t][mask_mother==2].flatten())
        mother_area = df_frame.set_index('label').loc[mother_label]['area']
        mother_coordx = df_frame.set_index('label').loc[mother_label]['coordx']
        mother_coordy = df_frame.set_index('label').loc[mother_label]['coordy']
        
    except ValueError:
        print('No mother cell found for file ' + str(sample_name) + ' frame ' + str(t), end='\r')
        fi_mean = np.nan
        fi_std = np.nan
        mother_area = np.nan
        mother_coordx = np.nan
        mother_coordy = np.nan
        
    # exctract SECOND mother cell value
    try:
        mother2_label = df_frame.set_index('label').drop(mother_label)['coordx'].idxmax() # second highest x coord is 2nd mother cell    
        mask_mother = mask_bin[t].astype(int)
        mask_mother[mask_label==mother2_label] = 3
        # extract FI
        fi2_mean = np.mean(im_fi[t][mask_mother==3].flatten())
        fi2_std = np.std(im_fi[t][mask_mother==3].flatten())
        mother2_area = df_frame.set_index('label').loc[mother2_label]['area']
        mother2_coordx = df_frame.set_index('label').loc[mother2_label]['coordx']
        mother2_coordy = df_frame.set_index('label').loc[mother2_label]['coordy']
    except ValueError:
        print('No 2nd mother cell found for file ' + str(sample_name) + ' frame ' + str(t), end='\r')
        fi2_mean = np.nan
        fi2_std = np.nan
        mother2_area = np.nan
        mother2_coordx = np.nan
        mother2_coordy = np.nan
    except UnboundLocalError: # first mother cell not found
        fi2_mean = np.nan
        fi2_std = np.nan
        mother2_area = np.nan
        mother2_coordx = np.nan
        mother2_coordy = np.nan        

    # extract value of background (channel/bright and PDMS/dark)
    roi_channel = np.zeros_like(im_fi[t])
    roi_PDMS = np.zeros_like(im_fi[t])
    roi_channel[0:50,:] = 1
    roi_PDMS[-100:-1,0:25] = 1
    roi_PDMS[-100:-1,-25:-1] = 1
    fi_channel_mean = np.mean(im_fi[t][roi_channel==1].flatten())
    fi_channel_std = np.std(im_fi[t][roi_channel==1].flatten())
    fi_PDMS_mean = np.mean(im_fi[t][roi_PDMS==1].flatten())
    fi_PDMS_std = np.std(im_fi[t][roi_PDMS==1].flatten())
    
    dic_fi = {'frame':t, \
             'FI':fi_mean, 'FIstd':fi_std, 'area':mother_area, 'coord_x':mother_coordx, 'coord_y':mother_coordy, \
             'FI2':fi2_mean, 'FI2std':fi2_std, 'area2':mother2_area, 'coord2_x':mother2_coordx, 'coord2_y':mother2_coordy, \
             'FIchannel':fi_channel_mean, 'FIchannelstd':fi_channel_std, 'FIpdms':fi_PDMS_mean, 'FIpdmsstd':fi_PDMS_std}
    return dic_fi
    
# main loop
def analyse_tifs():
    row_list = []
    for t in range(T):
        dic_fi = frame_extract(t)
        row_list.append(dic_fi)
    df_results = pd.DataFrame(row_list, columns = ['frame', 'FI', 'FIstd', 'area', 'coord_x', 'coord_y', 'FI2', 'FI2std', 'area2', 'coord2_x', 'coord2_y', \
                                                   'FIchannel', 'FIchannelstd', 'FIpdms', 'FIpdmsstd'])
    #df_results.to_csv(str(sample_name) + '.csv', index=False)
    return df_results

def background_correction(FIchannel, FIpdms, pdms_only = False):
    # todo: automatically detect high variation or buildup in feed channel
    if pdms_only:
        background = FIpdms
    else:
        background = np.mean(np.stack([FIchannel ,FIpdms]), axis=0)
    correction = background/np.mean(background)
    return correction

def comp_cutoff(area, diff_crit, length_crit):
    area_diff = np.diff(area)
    diff_condition = np.abs(area_diff) < diff_crit
    condition_labeled, num_features = ndimage.label(diff_condition)
    # filter out short regions with no change in area
    filtered_condition = np.copy(condition_labeled)
    for i in range(num_features):
        length = len(condition_labeled[condition_labeled==i+1].flatten())
        if length < length_crit:
            filtered_condition[condition_labeled==i+1] = 0
    try:
        cutoff = np.min(np.argwhere(filtered_condition > 0))
    except:
        # no region that satisfies criteria
        return None
    return cutoff

# Dataset 1
- input tif videos of cropped to seperate channels, with folder for brightfield (BF), GFP fluorescence intensity (FI) and segmentation output from ilastik (SEG)

In [3]:
# input
folder_path = '../191120_split' 
verify_frames = []

- process videos and output csv table for each channel

In [None]:
# iterate over all tifs in folder_path, assume subfolder structure (BF, FI and SEG) and specific filenames
BF_folder_path = os.path.join(folder_path, 'BF')
BF_list = [i for i in glob.glob(os.path.join(BF_folder_path, '*.tif'))]

# first iterate over BF tifs, then open corresponding FI and SEG
i = 1
interrupt = 1000
for path_BF in BF_list:
    if i > interrupt:
        sys.exit( 0 )
    print('\n%i out of %i' % (i, len(BF_list)))
    sample_name = '191120' + path_BF.split(sep='\\')[-1][4:-7]
    im_fi, im_bf, mask_bin, T = open_3ch(folder_path, path_BF)
    # execute main process
    df = analyse_tifs()
    df['name'] = str(sample_name)
    df['id'] = i
    # save CSV
    out_path = os.path.join(folder_path, 'CSV', str(sample_name)) + '.csv'
    df.to_csv(out_path, index=False)
    i += 1

- open csv tabes of individual channels, process and summarize in single csv table

In [17]:
csv_folder_path = '../191120_proc/5_data/CSV' 
csv_files = [i for i in glob.glob(os.path.join(csv_folder_path, '*.csv'))]

In [18]:
# add time vector (extracted from metadata in seperate script, index shift issue with this specific dataset) to dataframe
df_time = pd.read_csv('data/191120_time_vectors.csv')
t_pre = df_time['t_pre'].values[:-1]
t_post = df_time['t_post'].values[:-1]

for path in csv_files:
    pos = int(path.split(sep='\\')[-1][10:12])
    table = pd.read_csv(path, skiprows=0)
    if pos < 9:
        table['t'] = t_pre
    if pos >= 9:
        table['t'] = t_post
    table.to_csv(path, index=False)

In [None]:
# Add sep=, to csv files to open them easily in Excel
#print('sep=, added to the following files:')
#for file_name in csv_files:
#    with open(file_name, 'r') as fin:
#        content = fin.read().splitlines(True)
#    with open(file_name, 'w') as fout:
#        if content[0] != 'sep=,\n':
#            print(file_name)
#            content.insert(0,'sep=,\n')
#        fout.writelines(content)

In [None]:
# initialise dataframe
df_clean = pd.DataFrame(columns=['id', 'name', 'frame', 't', 'FI_corr', 'switch', 'background_CV', 'area', 'area2'])
# loop over CSVs
for file_name in csv_files:
    # load data
    name = file_name.split(sep='\\')[-1].split(sep='.')[0][7:]
    df = pd.read_csv(file_name, skiprows=0)
    
    # compute stuff
    cutoff = comp_cutoff(df['area'].values, diff_crit=50, length_crit=15)
    df['switch'] = cutoff
    
    corr = background_correction(df['FIchannel'].values, df['FIpdms'].values)
    
    background_cv = stats.variation(df['FIchannel'].values/corr)
    df['background_CV'] = background_cv
    
    # clean FI signal
    # combine first/second mother cell
    if not cutoff is None:
        df['FI_corr'] = np.concatenate([df['FI'].values[:cutoff], df['FI2'].values[cutoff:]])
    else: 
        df['FI_corr'] = df['FI'].values
    # background correction
    df['FI_corr'] = df['FI_corr'].values/corr

    # drop timepoint 151
    df = df[df['frame'] != 151]
    
    
    # add rows if background is ok (CV of channel below 0.04)
    if background_cv < 0.04:
        df_clean = df_clean.append(df[['id', 'name', 'frame', 't', 'FI_corr', 'switch', 'background_CV', 'area', 'area2']])
        
df_clean.to_csv('../191120_proc/5_data/191120_tracesX.csv', index=False)

# Dataset 2
- input tif videos of cropped to seperate channels, with folder for brightfield (BF), GFP fluorescence intensity (FI) and segmentation output from ilastik (SEG) (3 subfolders, respectively)

In [3]:
# input
folder_path = '../200114_proc/4_gchannel_split' 
verify_frames = []

- process videos and output csv table for each channel

In [None]:
# iterate over all tifs in folder_path, assume subfolder structure (BF, FI and SEG) and specific filenames
BF_folder_path = os.path.join(folder_path, 'BF')
BF_list = [i for i in glob.glob(os.path.join(BF_folder_path, '*.tif'))]

# first iterate over BF tifs, then open corresponding FI and SEG
i = 1
interrupt = 1000
for path_BF in BF_list:
    if i > interrupt:
        sys.exit( 0 )
    print('\n%i out of %i' % (i, len(BF_list)))
    sample_name = '200114' + path_BF.split(sep='\\')[-1][4:-7]
    im_fi, im_bf, mask_bin, T = open_3ch(folder_path, path_BF, flip_upside_down=True)
    # execute main process
    df = analyse_tifs()
    df['name'] = str(sample_name)
    df['id'] = i
    # save CSV
    out_path = os.path.join(folder_path, 'CSV', str(sample_name)) + '.csv'
    df.to_csv(out_path, index=False)
    i += 1

- open csv tabes of individual channels, process and summarize in single csv table
- for this dataset create one csv table containing all channels selected for BF plus one csv table containing channels selected for BF and FI

In [12]:
csv_folder_path = '../200114_proc/4_gchannel_split/CSV_select' # CSV_select folder for channels selected for FI
csv_files = [i for i in glob.glob(os.path.join(csv_folder_path, '*.csv'))]

# check that all the FI vectors are the same length
l = []
for path in csv_files:
    pos = int(path.split(sep='\\')[-1][10:12])
    table = pd.read_csv(path, skiprows=0)
    s = "Pos %i: Length %i" % (pos, len(table['FI']))
    #print(s)
    l.append(len(table['FI']))
if l.count(l[0]) == len(l):
    print("All fine")
else:
    print("Unequal length")

# time vector
t = np.array(range(l[0]))*1/6

All fine


In [None]:
# initialise dataframe
df_clean = pd.DataFrame(columns=['id', 'name', 'frame', 't', 'FI_corr', 'switch', 'background_CV', 'area', 'area2'])
# loop over CSVs
for file_name in csv_files:
    print(file_name)
    # load data
    name = file_name.split(sep='\\')[-1].split(sep='.')[0][7:]
    df = pd.read_csv(file_name, skiprows=0)
    df['t'] = t
    # compute stuff
    cutoff = comp_cutoff(df['area'].values, diff_crit=50, length_crit=15)
    df['switch'] = cutoff
    
    corr = background_correction(df['FIchannel'].values, df['FIpdms'].values, pdms_only=True)
    
    background_cv = stats.variation(df['FIchannel'].values/corr)
    df['background_CV'] = background_cv
    
    # clean FI signal
    # combine first/second mother cell
    if not cutoff is None:
        df['FI_corr'] = np.concatenate([df['FI'].values[:cutoff], df['FI2'].values[cutoff:]])
    else: 
        df['FI_corr'] = df['FI'].values
    # background correction
    df['FI_corr'] = df['FI_corr'].values/corr

    # drop timepoint 151
    df = df[df['frame'] != 136]
    
    df_clean = df_clean.append(df[['id', 'name', 'frame', 't', 'FI_corr', 'switch', 'background_CV', 'area', 'area2']])
        
df_clean.to_csv('../200114_proc/200114_traces_select.csv', index=False)

In [11]:
# exclude some more traces from selection
df = pd.read_csv('../200114_proc/200114_traces_select.csv')
exclude = [17, 20, 40, 53, 3, 51]
for e in exclude:
    df.drop(df[df['id'] == e].index, inplace=True)
df.to_csv('../200114_proc/200114_traces_select_new.csv', index=False)

In [None]:
csv_folder_path = '../200114_proc/4_gchannel_split/CSV' # all channels

# initialise dataframe
df_clean = pd.DataFrame(columns=['id', 'name', 'frame', 't', 'FI_corr', 'switch', 'background_CV', 'area', 'area2'])
# loop over CSVs
csv_files_s = [i for i in glob.glob(os.path.join(csv_folder_path, '*.csv'))]
for file_name in csv_files_s:
    print(file_name)
    # load data
    name = file_name.split(sep='\\')[-1].split(sep='.')[0][7:]
    df = pd.read_csv(file_name, skiprows=0)
    df['t'] = t
    # compute stuff
    cutoff = comp_cutoff(df['area'].values, diff_crit=50, length_crit=15)
    df['switch'] = cutoff
    
    corr = background_correction(df['FIchannel'].values, df['FIpdms'].values, pdms_only=True)
    
    background_cv = stats.variation(df['FIchannel'].values/corr)
    df['background_CV'] = background_cv
    
    # clean FI signal
    # combine first/second mother cell
    if not cutoff is None:
        df['FI_corr'] = np.concatenate([df['FI'].values[:cutoff], df['FI2'].values[cutoff:]])
    else: 
        df['FI_corr'] = df['FI'].values
    # background correction
    df['FI_corr'] = df['FI_corr'].values/corr

    # drop timepoint 151
    df = df[df['frame'] != 136]
    
    df_clean = df_clean.append(df[['id', 'name', 'frame', 't', 'FI_corr', 'switch', 'background_CV', 'area', 'area2']])
        
df_clean.to_csv('../200114_proc/200114_traces_all.csv', index=False)

# Dataset 3
- control experiment, with aTc added
- acquisition freeze after t216, discard later timepoints

In [5]:
# input
folder_path = '../200617_channels' 
verify_frames = []

- process videos and output csv table for each channel
- (rename segmentation suffix)

In [12]:
# iterate over all tifs in folder_path, assume subfolder structure (BF, FI and SEG) and specific filenames
BF_folder_path = os.path.join(folder_path, 'BF')
BF_list = [i for i in glob.glob(os.path.join(BF_folder_path, '*.tif'))]

# first iterate over BF tifs, then open corresponding FI and SEG
i = 1
interrupt = 1000
for path_BF in BF_list:
    if i > interrupt:
        sys.exit( 0 )
    print('\n%i out of %i' % (i, len(BF_list)))
    sample_name = '200114' + path_BF.split(sep='\\')[-1][4:-7]
    im_fi, im_bf, mask_bin, T = open_3ch(folder_path, path_BF, flip_upside_down=False)
    # execute main process
    df = analyse_tifs()
    df['name'] = str(sample_name)
    df['id'] = i
    # save CSV
    out_path = os.path.join(folder_path, 'CSV', str(sample_name)) + '.csv'
    df.to_csv(out_path, index=False)
    i += 1


1 out of 60
No mother cell found for file 20011417_02_gch4 frame 218 108
2 out of 60
No 2nd mother cell found for file 20011417_03_gch7 frame 218
3 out of 60
No 2nd mother cell found for file 20011417_03_gch9 frame 218
4 out of 60

5 out of 60
No mother cell found for file 20011417_04_gch5 frame 218 15
6 out of 60
No 2nd mother cell found for file 20011417_05_gch2 frame 218
7 out of 60
No 2nd mother cell found for file 20011417_06_gch11 frame 132
8 out of 60
No 2nd mother cell found for file 20011417_06_gch2 frame 218
9 out of 60
No mother cell found for file 20011417_08_gch1 frame 218 217
10 out of 60
No 2nd mother cell found for file 20011417_09_gch1 frame 183
11 out of 60
No 2nd mother cell found for file 20011417_09_gch7 frame 218
12 out of 60
No 2nd mother cell found for file 20011417_10_gch11 frame 180
13 out of 60
No mother cell found for file 20011417_10_gch1 frame 218 197
14 out of 60
No 2nd mother cell found for file 20011417_10_gch3 frame 218
15 out of 60
No 2nd mother cell

- open csv tabes of individual channels, process and summarize in single csv table
- cutoff after t216

In [30]:
csv_folder_path = '../200617_channels/CSV' # CSV_select folder for channels selected for FI
csv_files = [i for i in glob.glob(os.path.join(csv_folder_path, '*.csv'))]

# check that all the FI vectors are the same length
l = []
for path in csv_files:
    pos = int(path.split(sep='\\')[-1][9:11])
    table = pd.read_csv(path, skiprows=0)
    s = "Pos %i: Length %i" % (pos, len(table['FI']))
    #print(s)
    l.append(len(table['FI']))
if l.count(l[0]) == len(l):
    print("All fine")
else:
    print("Unequal length")

# time vector
t = np.array(range(l[0]))*1/6

All fine


In [45]:
# initialise dataframe
df_clean = pd.DataFrame(columns=['id', 'name', 'frame', 't', 'FI_corr', 'switch', 'background_CV', 'area', 'area2'])
# loop over CSVs
for file_name in csv_files:
    # load data
    name = file_name.split(sep='\\')[-1].split(sep='.')[0][7:]
    df = pd.read_csv(file_name, skiprows=0)
    
    # compute stuff
    cutoff = comp_cutoff(df['area'].values, diff_crit=50, length_crit=15)
    df['switch'] = cutoff
    
    corr = background_correction(df['FIchannel'].values, df['FIpdms'].values)
    
    background_cv = stats.variation(df['FIchannel'].values/corr)
    df['background_CV'] = background_cv
    
    # clean FI signal
    # combine first/second mother cell
    if not cutoff is None:
        df['FI_corr'] = np.concatenate([df['FI'].values[:cutoff], df['FI2'].values[cutoff:]])
    else: 
        df['FI_corr'] = df['FI'].values
    # background correction
    df['FI_corr'] = df['FI_corr'].values/corr
    
    df['t'] = t
    
    # cut off last three timepoints
    df = df[df['frame'] != 218]
    df = df[df['frame'] != 217]
    df = df[df['frame'] != 216]
    
    # add rows if background is ok (CV of channel below 0.04)
    if background_cv < 0.04:
        df_clean = df_clean.append(df[['id', 'name', 'frame', 't', 'FI_corr', 'switch', 'background_CV', 'area', 'area2']])
        
df_clean.to_csv('../200617_channels/200617_traces.csv', index=False)



# Combine datasets for publication

In [17]:
paths = {1:'data/191120_traces.csv', 2:'data/200114_traces_select_new.csv', 3:'data/200617_traces.csv'}
df_comb = pd.DataFrame()
for key in paths.keys():
    df = pd.read_csv(paths[key])
    df['key'] = key
    df_comb = df_comb.append(df, ignore_index=True)

df_comb.to_csv('data/dataset_s1.csv', index=False)

# Dataset 4 (control experiment repeat)
- input tif videos of cropped to seperate channels, with folder for brightfield (BF), GFP fluorescence intensity (FI) and segmentation output from ilastik (SEG)

In [7]:
# input
folder_path = '../200721_split' 
verify_frames = []

- process videos and output csv table for each channel

In [4]:
# iterate over all tifs in folder_path, assume subfolder structure (BF, FI and SEG) and specific filenames
BF_folder_path = os.path.join(folder_path, 'BF')
BF_list = [i for i in glob.glob(os.path.join(BF_folder_path, '*.tif'))]

# first iterate over BF tifs, then open corresponding FI and SEG
i = 1
interrupt = 1000
for path_BF in BF_list:
    if i > interrupt:
        sys.exit( 0 )
    print('\n%i out of %i' % (i, len(BF_list)))
    sample_name = '191120' + path_BF.split(sep='\\')[-1][4:-7]
    im_fi, im_bf, mask_bin, T = open_3ch(folder_path, path_BF)
    # execute main process
    df = analyse_tifs()
    df['name'] = str(sample_name)
    df['id'] = i
    # save CSV
    out_path = os.path.join(folder_path, 'CSV', str(sample_name)) + '.csv'
    df.to_csv(out_path, index=False)
    i += 1


1 out of 19
No mother cell found for file 19112021_02_gch10 frame 296 294
2 out of 19
No 2nd mother cell found for file 19112021_02_gch11 frame 290
3 out of 19
No 2nd mother cell found for file 19112021_07_gch1 frame 2
4 out of 19
No 2nd mother cell found for file 19112021_07_gch3 frame 114
5 out of 19

6 out of 19

7 out of 19

8 out of 19

9 out of 19
No 2nd mother cell found for file 19112021_16_gch7 frame 296
10 out of 19
No 2nd mother cell found for file 19112021_17_gch10 frame 131
11 out of 19
No 2nd mother cell found for file 19112021_17_gch9 frame 15
12 out of 19

13 out of 19

14 out of 19

15 out of 19

16 out of 19

17 out of 19
No 2nd mother cell found for file 19112021_24_gch11 frame 83
18 out of 19
No 2nd mother cell found for file 19112021_26_gch11 frame 265
19 out of 19


- open csv tabes of individual channels, process and summarize in single csv table

In [8]:
csv_folder_path = '../200721_split/CSV' 
csv_files = [i for i in glob.glob(os.path.join(csv_folder_path, '*.csv'))]

# check that all the FI vectors are the same length
l = []
for path in csv_files:
    pos = int(path.split(sep='\\')[-1][9:11])
    table = pd.read_csv(path, skiprows=0)
    s = "Pos %i: Length %i" % (pos, len(table['FI']))
    #print(s)
    l.append(len(table['FI']))
if l.count(l[0]) == len(l):
    print("All fine")
else:
    print("Unequal length")

# time vector
t = np.array(range(l[0]))*1/6

All fine


In [None]:
# Add sep=, to csv files to open them easily in Excel
#print('sep=, added to the following files:')
#for file_name in csv_files:
#    with open(file_name, 'r') as fin:
#        content = fin.read().splitlines(True)
#    with open(file_name, 'w') as fout:
#        if content[0] != 'sep=,\n':
#            print(file_name)
#            content.insert(0,'sep=,\n')
#        fout.writelines(content)

In [10]:
# initialise dataframe
df_clean = pd.DataFrame(columns=['id', 'name', 'frame', 't', 'FI_corr', 'switch', 'background_CV', 'area', 'area2'])
# loop over CSVs
for file_name in csv_files:
    # load data
    name = file_name.split(sep='\\')[-1].split(sep='.')[0][7:]
    df = pd.read_csv(file_name, skiprows=0)
    
    # compute stuff
    cutoff = comp_cutoff(df['area'].values, diff_crit=50, length_crit=15)
    df['switch'] = cutoff
    
    corr = background_correction(df['FIchannel'].values, df['FIpdms'].values)
    
    background_cv = stats.variation(df['FIchannel'].values/corr)
    df['background_CV'] = background_cv
    
    # clean FI signal
    # combine first/second mother cell
    if not cutoff is None:
        df['FI_corr'] = np.concatenate([df['FI'].values[:cutoff], df['FI2'].values[cutoff:]])
    else: 
        df['FI_corr'] = df['FI'].values
    # background correction
    df['FI_corr'] = df['FI_corr'].values/corr

    df['t'] = t
    
    
    # add rows if background is ok (CV of channel below 0.04)
    if background_cv < 0.04:
        df_clean = df_clean.append(df[['id', 'name', 'frame', 't', 'FI_corr', 'switch', 'background_CV', 'area', 'area2']])
        
df_clean.to_csv('../200721_split/200721_traces.csv', index=False)

