### Prepare fcs files for deep learning
This is a small example for formatting data from fcs files into numpy array, and save the metaData, marker names and the numpy array into allData.obj file. Use the script as a template to prepare your own fcs files for deep learning. 

In [1]:
import numpy as np
import scipy as sp
import pandas as pd
import rpy2 as rp
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import localconverter
import os 
import rpy2.robjects as ro
import pickle
from collections import Counter
from pathlib import Path
import csv
import multiprocessing as mp
import matplotlib.pyplot as plt
import re

In [2]:
base_dir = Path('../../aging/data')
print(base_dir.resolve())
pickle_dir = base_dir/'ResultFiles'/'pickles'
pickle_dir.mkdir(exist_ok=True)
fcs_dir = base_dir/Path('ResultFiles/Flow_cytometry_result')

mfest_path = base_dir/Path('SDY420-DR40_Subject_2_Flow_cytometry_result.txt')
mfest = pd.read_csv(mfest_path, sep='\t')

prediction_target = 'Subject Age'

/home/ubuntu/a/aging/data


In [3]:
def fcs2pkl_filename(fcs_path):
    return pickle_dir/fcs_path.with_suffix('.pkl').name

def import_fcs(fcs_path):
    fcs_path = str(fcs_path)
    r = rp.robjects.r
    r_code = ("library(flowCore);"+
          "library(MetaCyto);"+
          "fn = '"+ fcs_path+ "'; "+
          "fcs = read.FCS(fn,truncate_max_range = FALSE);"+
          "expr = fcs@exprs;"+
          "markers = markerFinder(fcs);"+
          "colnames(expr) = markers;"+
          "expr = as.data.frame(expr);"
         )
    expr =  r(r_code)
    df = pandas2ri.rpy2py(expr)
    df.columns = pd.Series([re.sub('\s*/\s*','/',_) for _ in df.columns])
    df.drop(columns=['TIME'], inplace=True)
    return df

def write_dataframe(fcs_path):
    df = import_fcs(fcs_path)
    wf = fcs2pkl_filename(fcs_path)
    print(wf)
    df.to_pickle(wf)

def load_dataframe(fcs_path):
    pkl = fcs2pkl_filename(fcs_path)
    return pd.read_pickle(pkl)

def read_df_metadata(arguments):
    mfest_index = arguments[0]
    pkl_path = arguments[1].pkl
    df = pd.read_pickle(pkl_path)
    return {
        'i': mfest_index,
        'n_rows': int(df.shape[0]),
        'n_columns': int(df.shape[1]),
        'markers': list(df.columns),
    }

In [4]:
mfest['fcs'] = mfest.apply(lambda row: (fcs_dir/row['File Name']).resolve(), axis=1)
mfest['pkl'] = mfest.apply(lambda row: fcs2pkl_filename(fcs_dir/row['fcs']).resolve(), axis=1)
mfest['pkl_exists'] = mfest.apply(lambda row: row['pkl'].exists(), axis=1)

In [5]:
pool = mp.Pool()

../../aging/data/ResultFiles/pickles/RC4_080513_RC4_080513_11-020_IFNa_B04.532468.pkl
../../aging/data/ResultFiles/pickles/RC4_080613_RC4_080613_RC4-080613_RC4_080613_11-068_IFNa_B04.532752.pkl
../../aging/data/ResultFiles/pickles/RC4_080513_RC4_080513_11-043_IL10_D06.532509.pkl
../../aging/data/ResultFiles/pickles/RC4_080513_RC4_080513_11-048_US_A08.532537.pkl
../../aging/data/ResultFiles/pickles/RC4_080513_RC4_080513_11-020_IL10_D04.532469.pkl
../../aging/data/ResultFiles/pickles/RC4_080513_RC4_080513_11-033_IL6_C05.532491.pkl
../../aging/data/ResultFiles/pickles/RC4_080613_RC4_080613_RC4-080613_RC4_080613_11-068_IL10_D04.532753.pkl
../../aging/data/ResultFiles/pickles/RC4-080813_weiqiupdate_RC4-080813_weiqiupdate_11-120_IL21_E05.532868.pkl
../../aging/data/ResultFiles/pickles/RC4_080613_RC4_080613_RC4-080613_RC4_080613_11-068_IL21_E04.532754.pkl
../../aging/data/ResultFiles/pickles/RC4_080513_RC4_080513_11-020_IL21_E04.532470.pkl
../../aging/data/ResultFiles/pickles/RC4_080513_RC4_0

In [6]:
fcs_to_convert = mfest[~(mfest['pkl_exists'])]['fcs']
pool.map(write_dataframe, fcs_to_convert)
mfest['pkl_exists'] = mfest.apply(lambda row: row['pkl'].exists(), axis=1)

In [7]:
mfest['n_rows'] = pd.Series(dtype='int64')
mfest['n_columns'] = pd.Series(dtype='int64')
mfest['markers'] = pd.Series(dtype='object')

df_metadata = pool.map(read_df_metadata, mfest.iterrows())

for md in df_metadata:
    i = md['i']
    mfest.at[i,'n_rows'] = md['n_rows']
    mfest.at[i,'n_columns'] = md['n_columns']
    mfest.at[i,'markers'] = md['markers']

In [8]:
pool.close()
pool.terminate()

In [9]:
mfest.to_csv('fcs_metadata.csv')
mfest

Unnamed: 0,Subject Accession,Species,Race,Race Specify,Ethnicity,Strain,Gender,Age Event,Age Event Specify,Subject Age,...,File Info ID,File Detail,File Name,Original File Name,fcs,pkl,pkl_exists,n_rows,n_columns,markers
0,SUB137174,Homo sapiens,Black or African American,,Not Hispanic or Latino,,Female,Age at enrollment,,53,...,532468,Flow cytometry result,RC4_080513_RC4_080513_11-020_IFNa_B04.532468.fcs,RC4_080513_RC4_080513_11-020_IFNa_B04.fcs,/home/ubuntu/a/aging/data/ResultFiles/Flow_cyt...,/home/ubuntu/a/aging/data/ResultFiles/pickles/...,True,41826.0,13.0,"[FSC-A, FSC-H, FSC-W, SSC-A, SSC-H, SSC-W, PST..."
1,SUB137174,Homo sapiens,Black or African American,,Not Hispanic or Latino,,Female,Age at enrollment,,53,...,532469,Flow cytometry result,RC4_080513_RC4_080513_11-020_IL10_D04.532469.fcs,RC4_080513_RC4_080513_11-020_IL10_D04.fcs,/home/ubuntu/a/aging/data/ResultFiles/Flow_cyt...,/home/ubuntu/a/aging/data/ResultFiles/pickles/...,True,41872.0,13.0,"[FSC-A, FSC-H, FSC-W, SSC-A, SSC-H, SSC-W, PST..."
2,SUB137174,Homo sapiens,Black or African American,,Not Hispanic or Latino,,Female,Age at enrollment,,53,...,532470,Flow cytometry result,RC4_080513_RC4_080513_11-020_IL21_E04.532470.fcs,RC4_080513_RC4_080513_11-020_IL21_E04.fcs,/home/ubuntu/a/aging/data/ResultFiles/Flow_cyt...,/home/ubuntu/a/aging/data/ResultFiles/pickles/...,True,30830.0,13.0,"[FSC-A, FSC-H, FSC-W, SSC-A, SSC-H, SSC-W, PST..."
3,SUB137174,Homo sapiens,Black or African American,,Not Hispanic or Latino,,Female,Age at enrollment,,53,...,532471,Flow cytometry result,RC4_080513_RC4_080513_11-020_IL6_C04.532471.fcs,RC4_080513_RC4_080513_11-020_IL6_C04.fcs,/home/ubuntu/a/aging/data/ResultFiles/Flow_cyt...,/home/ubuntu/a/aging/data/ResultFiles/pickles/...,True,49194.0,13.0,"[FSC-A, FSC-H, FSC-W, SSC-A, SSC-H, SSC-W, PST..."
4,SUB137174,Homo sapiens,Black or African American,,Not Hispanic or Latino,,Female,Age at enrollment,,53,...,532472,Flow cytometry result,RC4_080513_RC4_080513_11-020_US_A04.532472.fcs,RC4_080513_RC4_080513_11-020_US_A04.fcs,/home/ubuntu/a/aging/data/ResultFiles/Flow_cyt...,/home/ubuntu/a/aging/data/ResultFiles/pickles/...,True,74924.0,13.0,"[FSC-A, FSC-H, FSC-W, SSC-A, SSC-H, SSC-W, PST..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1125,SUB147387,Homo sapiens,Not Specified,,Not Specified,,Not Specified,Not Specified,,99,...,533134,Flow cytometry result,s_3-control-2.533134.fcs,s_3-control-2.fcs,/home/ubuntu/a/aging/data/ResultFiles/Flow_cyt...,/home/ubuntu/a/aging/data/ResultFiles/pickles/...,True,500000.0,15.0,"[FSC-A, FSC-H, FSC-W, SSC-A, SSC-H, SSC-W, CD6..."
1126,SUB147387,Homo sapiens,Not Specified,,Not Specified,,Not Specified,Not Specified,,99,...,533134,Flow cytometry result,s_3-control-2.533134.fcs,s_3-control-2.fcs,/home/ubuntu/a/aging/data/ResultFiles/Flow_cyt...,/home/ubuntu/a/aging/data/ResultFiles/pickles/...,True,500000.0,15.0,"[FSC-A, FSC-H, FSC-W, SSC-A, SSC-H, SSC-W, CD6..."
1127,SUB147387,Homo sapiens,Not Specified,,Not Specified,,Not Specified,Not Specified,,99,...,533134,Flow cytometry result,s_3-control-2.533134.fcs,s_3-control-2.fcs,/home/ubuntu/a/aging/data/ResultFiles/Flow_cyt...,/home/ubuntu/a/aging/data/ResultFiles/pickles/...,True,500000.0,15.0,"[FSC-A, FSC-H, FSC-W, SSC-A, SSC-H, SSC-W, CD6..."
1128,SUB147387,Homo sapiens,Not Specified,,Not Specified,,Not Specified,Not Specified,,99,...,533134,Flow cytometry result,s_3-control-2.533134.fcs,s_3-control-2.fcs,/home/ubuntu/a/aging/data/ResultFiles/Flow_cyt...,/home/ubuntu/a/aging/data/ResultFiles/pickles/...,True,500000.0,15.0,"[FSC-A, FSC-H, FSC-W, SSC-A, SSC-H, SSC-W, CD6..."
