In [None]:
import fcsparser
import numpy as np
import os
import pandas as pd

In [None]:
def make_dir(path):
    '''Creates the directory path'''
    if not os.path.exists(path):
        os.makedirs(path)
    
def filterFiles(path, extension):
    '''
    Equivalent to:
    Files = []
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(extension):
                Files.append(file)
    '''
    files = [file for root, dirs, files in os.walk(path) for file in files if file.endswith(extension)]
    for i in files:
        print(i)
    return files

def fcs_to_csv(directory):
    '''
    Creates and saves the csv files associated with each file in files
    files: list of filenames
    '''
    for i in files:
        path = fcs_folder + i
        meta, data = fcsparser.parse(path, meta_data_only=False, reformat_meta=True)
        data.columns=['FSC-A','SSC-A','FL1-A','FL2-A','FL3-A','FL4-A','FSC-H','SSC-H','FL1-H','FL2-H','FL3-H','FL4-H','Width','Time']
        data.to_csv(csv_folder +'/'+ i[:-3]+'csv',index=False)

In [None]:
fcs_folder = './fcs/'
fcs_ext = '.fcs'
csv_folder = './csv_data/Raw_csv'

make_dir(csv_folder)
files = filterFiles(fcs_folder, fcs_ext)

In [None]:
fcs_to_csv(csv_folder)
files_csv = filterFiles(csv_folder, '.csv')

In [None]:
#defines dir
csv_correc = './csv_data/Corrected_csv/'

#creates dir
make_dir(csv_correc)
    
#create stats df
columns = ['label', 'mFSC-H', 'stdFSC-H', 'mFL3-A', 'stdFL3-A']
index = [i for i in range(len(files))]
stats_df = pd.DataFrame(columns=columns, index = index)

#for each csv file
for j in range(len(files_csv)):
    
    #creates a dataframe
    df = pd.read_csv(csv_folder+'/'+ files_csv[j])
    ind = []
    
    #for each line of the dataFrame
    for i in range(len(df)):
        
        #Apply the filter and stores the indices
        if df['SSC-A'][i] == 0 or df['FL3-A'][i] < 800 or df['FSC-H'][i] < 11000:
            ind.append(i)
            
    #drop the lines corresponding to the indices and reset indices
    df = df.drop(df.index[ind])
    df = df.reset_index(drop=True)
    
    #the name of the new DataFrame consists of the first three letters (cell label)
    df['name'] = files_csv[j][0:3]
    
    #Makes statistics and creates a column with the results
    sigmasqH = np.std(df['FSC-H'].tolist())
    promH = np.mean(df['FSC-H'].tolist())
    fanofact =sigmasqH/promH
    df['cvFSC-H'] = fanofact
    df['mFSC-H'] = promH
    sigmasq3 = np.std(df['FL3-A'].tolist())
    prom3 = np.mean(df['FL3-A'].tolist())
    fanofact =sigmasq3/prom3
    df['cvFL3-A'] = fanofact
    df['mFL3-A'] = prom3
    
    #Save the csv with the filter and statistics
    df.to_csv(csv_correc  +files_csv[j][0:3]+'.csv',index=False)
    print('File ' + str(j+1) + ' out of ' + str(len(files_csv)) + ' Done')    
    stats_df.loc[j] = [files_csv[j][0:3], promH, sigmasqH, prom3, sigmasq3]

In [None]:
#Statistics saved in statistics.csv
stats_df = stats_df.sort_values('label')
stats_df = stats_df.reset_index(drop=True)
csv_stats_dir = './csv_data/Statistics_csv/'
make_dir(csv_stats_dir)
stats_df.to_csv(csv_stats_dir + 'statistics'+'.csv', index = False)

In [None]:
csv_folder = './csv_data/Corrected_csv/'
def merge_dfs(directory):
    
    files = filterFiles(directory, '.csv')
    df = pd.read_csv(csv_folder + files[0])
    
    for i in files[1:]:
        df1 = pd.read_csv(csv_folder + i)
        df = df.append(df1)
    
    csv_dir_final = './csv_data/Merged/'
    make_dir(csv_folder_final)
    df.to_csv(csv_folder_final +'mergedData'+'.csv',index=False)
#merge_dfs(csv_folder)