### This Jupyter Notebook extracts information about experimental conditions from image file names and creates inventories of image files and ROIs within each file for later data analysis
#### Input: 
csv files containing fluorescent values for each time point of individual images with names specifiying experimental specifications separated by "_",  generated by Image J macro "getaverage.ijm".
#### Output: two csv files:
1. batch_df_name.csv file: specifications of each image file. This needs the user to review, add specific column names and fill up missing information in the columns mannually. The reviewed file will be saved as a separate csv file as batch_df_name_copy.csv which should contain the following columns:
   - File: filename
   - Date: date of the image
   - Dish: dish number
   - Cell: cell number
   - Stim: stimulation point number
   - C1: the channel specification, e.g. 488 mitoGCaMP
   - C0: the channel specification, e.g. 561 561RCaMP
   - Treatment: the treatment or specific condition of the image. e.g. "Ctrl" or "APV".
3. batch_selection.csv file: list of each ROI within each image file with the following columns:  
   - File: filename
   - Channel: C0 for 561 RCaMP and C1 for 488 MitoGCaMP or ERGCaMP
   - Point: stimulation point number. This is usually 1 unless multiple points are stimulated at the same time within each image acquisition.
   - Selection: "sp" for spine ROI, "stim" for mito ROI and "bg" for background ROI.

In [1]:
#Import necessary packages
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
%matplotlib inline

In [3]:
#Get file directory
import os
from os import listdir
from os.path import isfile, join
cwd = os.getcwd()
cwd

'C:\\Users\\fanr\\OneDrive - Max Planck Florida Institute for Neuroscience\\Codes\\mitoGCaMP\\manuscript'

In [11]:
#import all files from the source folder
#mypath: path for source folder
mypath = ""

onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
onlyfiles

['052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCaMP_myrCN27_unc.csv',
 '052625_dish1_cell1_stim1_488mitoGCaMP6f_561RCaMP_unc.csv',
 '052625_dish1_cell1_stim2_488mitoGCaMP6f_561RCaMP_myrCN27_unc.csv',
 '052625_dish1_cell1_stim2_488mitoGCaMP6f_561RCaMP_unc.csv',
 '052625_dish1_cell2_stim1_488mitoGCaMP6f_561RCaMP_myrCN27_unc.csv',
 '052625_dish1_cell2_stim1_488mitoGCaMP6f_561RCaMP_unc.csv',
 '052625_dish1_cell2_stim3_488mitoGCaMP6f_561RCaMP_myrCN27_unc.csv',
 '052625_dish1_cell2_stim3_488mitoGCaMP6f_561RCaMP_unc.csv',
 '052625_dish1_cell3_stim1_488mitoGCaMP6f_561RCaMP_myrCN27_unc.csv',
 '052625_dish1_cell3_stim1_488mitoGCaMP6f_561RCaMP_unc.csv',
 '052625_dish1_cell3_stim2_488mitoGCaMP6f_561RCaMP_myrCN27_unc.csv',
 '052625_dish1_cell3_stim2_488mitoGCaMP6f_561RCaMP_unc.csv',
 '052625_dish1_cell4_stim1_488mitoGCaMP6f_561RCaMP_myrCN27_unc.csv',
 '052625_dish1_cell4_stim1_488mitoGCaMP6f_561RCaMP_unc.csv',
 '052625_dish1_cell4_stim2_488mitoGCaMP6f_561RCaMP_myrCN27_unc.csv',
 '052625_dish1_cell4_

In [15]:
#Specify batch name. This will be the prefix of output file names.
batch = 'BATCH NAME'

In [17]:
#save all image names into one csv file for further specification
#Format: Date_dish number_cell number_stimulation number_Channel1_Channel0_Treatment


df_name = []

for i in onlyfiles:
    colname_list = []
    stim_list=[]
    name = i[0:-4].split("_")
    df_name.append([i] + name)
    

df_name = pd.DataFrame(df_name)

df_name.to_csv(batch+'_df_name.csv',index = False)

In [21]:
#save all selections into one csv file
#ROI name format: Channel_Stim_Selection

df_selection=pd.DataFrame(columns = ['File','Channel','Point','Selection'])

for i in onlyfiles:
    stim_list=[]
    colname_list = []
    selection = []
    df = pd.read_csv(mypath+'\\'+i)
    df = df.iloc[:,1:]


    for colname in df.columns:
        selection.append([i]+colname.split("_"))
        n = colname.split('_',2)[1]

        if n not in stim_list:
            stim_list = stim_list + [n]
        
    
    selection = pd.DataFrame(selection,columns = ['File','Channel','Point','Selection'])
    
    #print if any ROI is missing for a certain stimulation
    for n in stim_list:       
        selection_n = selection[selection['Point']==n]
        for s in ['stim','sp','bg']:
            if s not in selection_n['Selection'].tolist():
                print(n+"_"+s +" missing in "+i )
       
    df_selection = pd.concat([df_selection,selection])
                

df_selection.to_csv(batch+'_selection.csv',index = False)
