# 01_DownSampling_celltype

## Import modules

In [1]:
#%pip install --upgrade pip

In [2]:
#%pip install bokeh

In [3]:
#%pip install Flowkit

In [4]:
import os
import bokeh
from bokeh.plotting import show
import matplotlib.pyplot as plt
import pandas as pd

import flowkit as fk

bokeh.io.output_notebook()
%matplotlib inline

_ = plt.ioff()

import session_info

In [5]:
# check version so users can verify they have the same version/API
fk.__version__

'1.1.1'

## Creating a Workspace

Let’s create a Workspace starting with a FlowJo 10 workspace file and some FCS files.

In [6]:
ls ../raw/

[0m[01;32m231211_Symphony_002_ACK_231208_231211_60.fcs[0m*
[01;32m231218_Symphony_011_ACK_231212_2311218_60.fcs[0m*
[01;32m231218_Symphony_016_ACK_231212_2311218_ACK.fcs[0m*
[01;32m231225_Symphony_021_ACK_231219_231225_60.fcs[0m*
[01;32m231225_Symphony_026_ACK_231219_231225_ACK.fcs[0m*
[01;32m240115_Symphony_001_033_BMX_240112_240115_1.fcs[0m*
[01;32m240115_Symphony_001_034_BMX_240112_240115_2.fcs[0m*
[01;32m240115_Symphony_001_035_BMX_240112_240115_3.fcs[0m*
[01;32m240115_Symphony_001_036_BMX_240112_240115_ACK_1.fcs[0m*
[01;32m240115_Symphony_001_037_BMX_240112_240115_ACK_2.fcs[0m*
[01;32m240115_Symphony_001_038_BMX_240112_240115_ACK_3.fcs[0m*
[01;32m240126_Symphony_040_BMX_240123_240126_1_002.fcs[0m*
[01;32m240126_Symphony_041_BMX_240123_240126_2_003.fcs[0m*
[01;32m240126_Symphony_042_BMX_240123_240126_3_004.fcs[0m*
[01;32m240126_Symphony_043_BMX_240123_240126_1_ACK_005.fcs[0m*
[01;32m240126_Symphony_044_BMX_240123_240126_2_ACK_006.fcs[0m*
[01;32m240

In [7]:
# setup some file paths for our data
base_dir = %pwd

sample_path = os.path.join(base_dir, "../raw")
wsp_path = os.path.join(base_dir, "../raw/JAXA_Symphony.wsp")

In [8]:
# Create a Workspace with the path to our WSP file and FCS files.
wsp = fk.Workspace(wsp_path, sample_path)

In [9]:
# look at a summary of the Workspace
wsp.summary()

Unnamed: 0_level_0,samples,loaded_samples,gates,max_gate_depth
group_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
All Samples,17,17,17,8
ACK_Frozen,8,8,17,8
BMX_Frozen_6mm,3,3,17,8
BMX_Frozen_10mm,3,3,17,8
JAXA_Symphony_001,1,1,17,8
JAXA_Symphony_002,2,2,17,8
JAXA_Symphony_003,2,2,17,8
JAXA_Symphony_005,6,6,17,8
JAXA_Symphony_006,6,6,17,8
Kimtowel_Frozen,3,3,17,8


In [10]:
# get a list of sample groups
wsp.get_sample_groups()

['All Samples',
 'ACK_Frozen',
 'BMX_Frozen_6mm',
 'BMX_Frozen_10mm',
 'JAXA_Symphony_001',
 'JAXA_Symphony_002',
 'JAXA_Symphony_003',
 'JAXA_Symphony_005',
 'JAXA_Symphony_006',
 'Kimtowel_Frozen']

In [11]:
# From the summary, we can see all the "real" analysis is within the "All Samples" group
sample_group = "All Samples"

In [12]:
# get the sample IDs that are included in the group
sample_list = wsp.get_sample_ids(group_name=sample_group)
sample_list

['231211_Symphony_002_ACK_231208_231211_60.fcs',
 '231218_Symphony_011_ACK_231212_2311218_60.fcs',
 '231218_Symphony_016_ACK_231212_2311218_ACK.fcs',
 '231225_Symphony_021_ACK_231219_231225_60.fcs',
 '231225_Symphony_026_ACK_231219_231225_ACK.fcs',
 '240115_Symphony_001_033_BMX_240112_240115_1.fcs',
 '240115_Symphony_001_034_BMX_240112_240115_2.fcs',
 '240115_Symphony_001_035_BMX_240112_240115_3.fcs',
 '240115_Symphony_001_036_BMX_240112_240115_ACK_1.fcs',
 '240115_Symphony_001_037_BMX_240112_240115_ACK_2.fcs',
 '240115_Symphony_001_038_BMX_240112_240115_ACK_3.fcs',
 '240126_Symphony_040_BMX_240123_240126_1_002.fcs',
 '240126_Symphony_041_BMX_240123_240126_2_003.fcs',
 '240126_Symphony_042_BMX_240123_240126_3_004.fcs',
 '240126_Symphony_043_BMX_240123_240126_1_ACK_005.fcs',
 '240126_Symphony_044_BMX_240123_240126_2_ACK_006.fcs',
 '240126_Symphony_045_BMX_240123_240126_3_ACK_007.fcs']

### Retrieving Gate Components

In [13]:
# The gating hierarchy is retrieved per sample.
# This is due to FlowJo allowing variation in the gate tree among samples.
sample_id = sample_list[1]
print(wsp.get_gate_hierarchy(sample_id))

root
╰── Lymphocytes
    ╰── Single Cells
        ╰── Single Cells
            ╰── Alive
                ├── Basophils
                ├── CD45+
                │   ├── Lymphoid
                │   │   ├── NK cells
                │   │   ╰── NK1_1-
                │   │       ├── B cells
                │   │       ╰── T cells
                │   ╰── Myeloid
                │       ├── Eosinophils
                │       ├── Monocytes
                │       ╰── Neutrophils
                ├── CD45neg
                ╰── CD45pos


## Each cell type is isolated (Down-sampling)
eliminating poorly labeled cells

In [14]:
def DownSample(sample_list, gate_name, dir_output):
    i = 1
    for sample_id in sample_list:
        sample_results = wsp.get_gate_events(sample_id, gate_name=gate_name)
        sample_results['celltype'] = gate_name
        sample_results['group'] = sample_group
        sample_results['sample_id'] = sample_id
        sample_results.to_csv(dir_output+"Downsample_"+gate_name+"_"+sample_id+".csv", encoding="utf-8")

        if i == 1:
            df_concat = sample_results
        else:
            df_concat = pd.concat([df_concat, sample_results])
        print(i, ". Complete DownSampling: ", sample_id)
        i = i+1
    df_concat.to_csv(dir_output+"concat_"+gate_name+"_"+sample_group+".csv")

In [15]:
group_list = ['ACK_Frozen', 'BMX_Frozen_10mm', 'BMX_Frozen_6mm', 'Kimtowel_Frozen']
gate_names = ['Basophils', 'B cells', 'NK cells', 'T cells', 'Eosinophils', 'Monocytes', 'Neutrophils']
dir_output = "../data/DownSamples_celltype/"
%mkdir $dir_output

In [16]:
for sample_group in group_list:
    wsp.analyze_samples(sample_group, verbose=True)
    sample_list = wsp.get_sample_ids(group_name=sample_group)
    for gate_name in gate_names:
        DownSample(sample_list, gate_name, dir_output)

#### Processing gates for 8 samples (multiprocessing is enabled - 8 cpus) ####
231218_Symphony_016_ACK_231212_2311218_ACK.fcs: processing gate Lymphocytes
231218_Symphony_016_ACK_231212_2311218_ACK.fcs: processing gate Single Cells
231218_Symphony_016_ACK_231212_2311218_ACK.fcs: processing gate Single Cells
231218_Symphony_016_ACK_231212_2311218_ACK.fcs: processing gate Alive
231225_Symphony_026_ACK_231219_231225_ACK.fcs: processing gate Lymphocytes231218_Symphony_016_ACK_231212_2311218_ACK.fcs: processing gate Basophils

231225_Symphony_026_ACK_231219_231225_ACK.fcs: processing gate Single Cells240115_Symphony_001_036_BMX_240112_240115_ACK_1.fcs: processing gate Lymphocytes
231218_Symphony_016_ACK_231212_2311218_ACK.fcs: processing gate CD45+


240115_Symphony_001_036_BMX_240112_240115_ACK_1.fcs: processing gate Single Cells
240115_Symphony_001_037_BMX_240112_240115_ACK_2.fcs: processing gate Lymphocytes240115_Symphony_001_036_BMX_240112_240115_ACK_1.fcs: processing gate Single Cells


In [17]:
os.mkdir(dir_output+'concat/')
for gate_name in gate_names:
    df_ACK = pd.read_csv(dir_output+"concat_"+gate_name+"_ACK_Frozen.csv")
    df_BMX_10mm = pd.read_csv(dir_output+"concat_"+gate_name+"_BMX_Frozen_10mm.csv")
    df_BMX_6mm = pd.read_csv(dir_output+"concat_"+gate_name+"_BMX_Frozen_6mm.csv")
    df_Kimtowel = pd.read_csv(dir_output+"concat_"+gate_name+"_Kimtowel_Frozen.csv")
    df_concat = pd.concat([df_ACK, df_BMX_10mm, df_BMX_6mm, df_Kimtowel])
    df_concat.to_csv(dir_output+"concat/concat_"+gate_name+".csv")

In [18]:
def Concat(csv_list, dir_output):
    i = 1
    for csv_name in csv_list:
        df = pd.read_csv(csv_name, header=0)
        
        if i == 1:
            df_concat = df
        else:
            df_concat = pd.concat([df_concat, df])
        print(i, ". Complete Concatinate: ", csv_name)
        i = i+1
    df_concat.to_csv(dir_output+"concat.csv")

In [19]:
import os, glob
csv_list = glob.glob(dir_output+'concat/*')

In [20]:
csv_list

['../data/DownSamples_celltype/concat/concat_B cells.csv',
 '../data/DownSamples_celltype/concat/concat_Basophils.csv',
 '../data/DownSamples_celltype/concat/concat_Eosinophils.csv',
 '../data/DownSamples_celltype/concat/concat_Monocytes.csv',
 '../data/DownSamples_celltype/concat/concat_Neutrophils.csv',
 '../data/DownSamples_celltype/concat/concat_NK cells.csv',
 '../data/DownSamples_celltype/concat/concat_T cells.csv']

In [21]:
Concat(csv_list, dir_output=dir_output+'concat/')

1 . Complete Concatinate:  ../data/DownSamples_celltype/concat/concat_B cells.csv
2 . Complete Concatinate:  ../data/DownSamples_celltype/concat/concat_Basophils.csv
3 . Complete Concatinate:  ../data/DownSamples_celltype/concat/concat_Eosinophils.csv
4 . Complete Concatinate:  ../data/DownSamples_celltype/concat/concat_Monocytes.csv
5 . Complete Concatinate:  ../data/DownSamples_celltype/concat/concat_Neutrophils.csv
6 . Complete Concatinate:  ../data/DownSamples_celltype/concat/concat_NK cells.csv
7 . Complete Concatinate:  ../data/DownSamples_celltype/concat/concat_T cells.csv


## Downsampling of gated on CD45 positive cells

In [22]:
group_list = ['ACK_Frozen', 'BMX_Frozen_10mm', 'BMX_Frozen_6mm', 'Kimtowel_Frozen']
gate_names = ['CD45pos']
dir_output = "../data/DownSamples/"
%mkdir $dir_output

In [23]:
for sample_group in group_list:
    wsp.analyze_samples(sample_group, verbose=True)
    sample_list = wsp.get_sample_ids(group_name=sample_group)
    for gate_name in gate_names:
        DownSample(sample_list, gate_name, dir_output)

#### Processing gates for 8 samples (multiprocessing is enabled - 8 cpus) ####
231218_Symphony_016_ACK_231212_2311218_ACK.fcs: processing gate Lymphocytes
231218_Symphony_016_ACK_231212_2311218_ACK.fcs: processing gate Single Cells
231218_Symphony_016_ACK_231212_2311218_ACK.fcs: processing gate Single Cells
231218_Symphony_016_ACK_231212_2311218_ACK.fcs: processing gate Alive
231218_Symphony_016_ACK_231212_2311218_ACK.fcs: processing gate Basophils
231218_Symphony_016_ACK_231212_2311218_ACK.fcs: processing gate CD45+
231218_Symphony_016_ACK_231212_2311218_ACK.fcs: processing gate CD45neg
231218_Symphony_016_ACK_231212_2311218_ACK.fcs: processing gate CD45pos
231225_Symphony_026_ACK_231219_231225_ACK.fcs: processing gate Lymphocytes
231218_Symphony_016_ACK_231212_2311218_ACK.fcs: processing gate Lymphoid
231225_Symphony_026_ACK_231219_231225_ACK.fcs: processing gate Single Cells
231218_Symphony_016_ACK_231212_2311218_ACK.fcs: processing gate Myeloid
231225_Symphony_026_ACK_231219_231225

In [24]:
os.mkdir(dir_output+'concat/')
for gate_name in gate_names:
    df_ACK = pd.read_csv(dir_output+"concat_"+gate_name+"_ACK_Frozen.csv")
    df_BMX_10mm = pd.read_csv(dir_output+"concat_"+gate_name+"_BMX_Frozen_10mm.csv")
    df_BMX_6mm = pd.read_csv(dir_output+"concat_"+gate_name+"_BMX_Frozen_6mm.csv")
    df_Kimtowel = pd.read_csv(dir_output+"concat_"+gate_name+"_Kimtowel_Frozen.csv")
    df_concat = pd.concat([df_ACK, df_BMX_10mm, df_BMX_6mm, df_Kimtowel])
    df_concat.to_csv(dir_output+"concat/concat_"+gate_name+".csv")

## Data shaping

In [25]:
df_cd45p = pd.read_csv('../data/DownSamples/concat/concat_CD45pos.csv')
df_cd45p.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,sample_id,FSC-A,FSC-H,FSC-W,SSC-A,SSC-H,SSC-W,BUV395-A CD45,...,BV786-A Ly6C,BB515-A NK1_1,BB700-A SIRPa,APC-A Siglec F,R718-A MHCII,APC-Cy7-A CD3e,PE-Cy7-A FceR1,Time,celltype,group
0,0,2,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.468508,0.435227,0.47269,0.143135,0.144756,0.329818,3161.997367,...,3052.501691,2215.461119,936.170309,929.571744,920.134451,761.051003,879.530415,0.018907,CD45pos,ACK_Frozen
1,1,3,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.409059,0.389471,0.464305,0.102905,0.106058,0.307402,3123.005344,...,1022.169682,934.52792,930.706597,924.471786,2217.410148,868.647453,966.170343,0.01891,CD45pos,ACK_Frozen
2,2,7,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.369099,0.320502,0.476163,0.296838,0.287299,0.37356,3018.692338,...,978.702126,958.861395,2136.135613,859.072528,809.957239,961.405746,1405.154676,0.018997,CD45pos,ACK_Frozen
3,3,8,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.44927,0.389286,0.488482,0.293847,0.281103,0.399821,3103.081661,...,3261.856554,984.384086,2307.147093,881.775708,764.697099,805.907786,1557.813709,0.01904,CD45pos,ACK_Frozen
4,4,9,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.349807,0.327736,0.452241,0.118176,0.121834,0.31323,3004.757033,...,914.503155,952.751105,934.815031,917.780532,2017.982173,1012.125136,919.241522,0.019059,CD45pos,ACK_Frozen


In [26]:
df_cd45p = df_cd45p.rename(columns={'Unnamed: 0':'cell_id'})
df_cd45p = df_cd45p.eval('id = sample_id.astype("string")+cell_id.astype("string")')
df_cd45p = df_cd45p.drop(columns=['Unnamed: 0.1', 'cell_id', 'celltype'])
df_cd45p.head()

  df_cd45p = df_cd45p.eval('id = sample_id.astype("string")+cell_id.astype("string")')


Unnamed: 0,sample_id,FSC-A,FSC-H,FSC-W,SSC-A,SSC-H,SSC-W,BUV395-A CD45,DAPI-A Dead,BV421-A CD19,...,BV786-A Ly6C,BB515-A NK1_1,BB700-A SIRPa,APC-A Siglec F,R718-A MHCII,APC-Cy7-A CD3e,PE-Cy7-A FceR1,Time,group,id
0,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.468508,0.435227,0.47269,0.143135,0.144756,0.329818,3161.997367,928.554866,1064.025393,...,3052.501691,2215.461119,936.170309,929.571744,920.134451,761.051003,879.530415,0.018907,ACK_Frozen,231218_Symphony_016_ACK_231212_2311218_ACK.fcs2
1,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.409059,0.389471,0.464305,0.102905,0.106058,0.307402,3123.005344,921.608884,1913.591939,...,1022.169682,934.52792,930.706597,924.471786,2217.410148,868.647453,966.170343,0.01891,ACK_Frozen,231218_Symphony_016_ACK_231212_2311218_ACK.fcs3
2,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.369099,0.320502,0.476163,0.296838,0.287299,0.37356,3018.692338,1082.635498,1028.582432,...,978.702126,958.861395,2136.135613,859.072528,809.957239,961.405746,1405.154676,0.018997,ACK_Frozen,231218_Symphony_016_ACK_231212_2311218_ACK.fcs7
3,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.44927,0.389286,0.488482,0.293847,0.281103,0.399821,3103.081661,793.494761,503.895618,...,3261.856554,984.384086,2307.147093,881.775708,764.697099,805.907786,1557.813709,0.01904,ACK_Frozen,231218_Symphony_016_ACK_231212_2311218_ACK.fcs8
4,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.349807,0.327736,0.452241,0.118176,0.121834,0.31323,3004.757033,1016.718669,1728.83733,...,914.503155,952.751105,934.815031,917.780532,2017.982173,1012.125136,919.241522,0.019059,ACK_Frozen,231218_Symphony_016_ACK_231212_2311218_ACK.fcs9


In [27]:
df_celltype = pd.read_csv('../data/DownSamples_celltype/concat/concat.csv')
df_celltype.head()

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,sample_id,FSC-A,FSC-H,FSC-W,SSC-A,SSC-H,SSC-W,...,BV786-A Ly6C,BB515-A NK1_1,BB700-A SIRPa,APC-A Siglec F,R718-A MHCII,APC-Cy7-A CD3e,PE-Cy7-A FceR1,Time,celltype,group
0,0,0,3,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.409059,0.389471,0.464305,0.102905,0.106058,0.307402,...,1022.169682,934.52792,930.706597,924.471786,2217.410148,868.647453,966.170343,0.01891,B cells,ACK_Frozen
1,1,1,9,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.349807,0.327736,0.452241,0.118176,0.121834,0.31323,...,914.503155,952.751105,934.815031,917.780532,2017.982173,1012.125136,919.241522,0.019059,B cells,ACK_Frozen
2,2,2,11,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.285836,0.273334,0.435879,0.113333,0.116311,0.314682,...,943.162209,965.5005,906.098652,928.433188,2087.500482,943.719518,917.054093,0.019121,B cells,ACK_Frozen
3,3,3,14,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.458968,0.422462,0.472522,0.115109,0.116781,0.316093,...,965.698768,940.840058,967.159426,950.68011,2017.978368,989.989169,863.467616,0.019169,B cells,ACK_Frozen
4,4,4,22,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.384545,0.354436,0.47364,0.167984,0.169063,0.348722,...,1025.059138,927.069397,822.327653,977.146915,2919.431163,897.925808,921.352504,0.019367,B cells,ACK_Frozen


In [28]:
df_celltype = df_celltype.rename(columns={'Unnamed: 0':'cell_id'})
df_celltype = df_celltype.eval('id = sample_id.astype("string")+cell_id.astype("string")')
df_celltype = df_celltype.drop(columns=['Unnamed: 0.2', 'Unnamed: 0.1', 'cell_id'])
df_celltype.head()

  df_celltype = df_celltype.eval('id = sample_id.astype("string")+cell_id.astype("string")')


Unnamed: 0,sample_id,FSC-A,FSC-H,FSC-W,SSC-A,SSC-H,SSC-W,BUV395-A CD45,DAPI-A Dead,BV421-A CD19,...,BB515-A NK1_1,BB700-A SIRPa,APC-A Siglec F,R718-A MHCII,APC-Cy7-A CD3e,PE-Cy7-A FceR1,Time,celltype,group,id
0,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.409059,0.389471,0.464305,0.102905,0.106058,0.307402,3123.005344,921.608884,1913.591939,...,934.52792,930.706597,924.471786,2217.410148,868.647453,966.170343,0.01891,B cells,ACK_Frozen,231218_Symphony_016_ACK_231212_2311218_ACK.fcs3
1,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.349807,0.327736,0.452241,0.118176,0.121834,0.31323,3004.757033,1016.718669,1728.83733,...,952.751105,934.815031,917.780532,2017.982173,1012.125136,919.241522,0.019059,B cells,ACK_Frozen,231218_Symphony_016_ACK_231212_2311218_ACK.fcs9
2,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.285836,0.273334,0.435879,0.113333,0.116311,0.314682,2661.441658,1005.258989,1450.798343,...,965.5005,906.098652,928.433188,2087.500482,943.719518,917.054093,0.019121,B cells,ACK_Frozen,231218_Symphony_016_ACK_231212_2311218_ACK.fcs11
3,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.458968,0.422462,0.472522,0.115109,0.116781,0.316093,2935.940625,917.806938,1904.661097,...,940.840058,967.159426,950.68011,2017.978368,989.989169,863.467616,0.019169,B cells,ACK_Frozen,231218_Symphony_016_ACK_231212_2311218_ACK.fcs14
4,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.384545,0.354436,0.47364,0.167984,0.169063,0.348722,3073.417631,937.891912,1868.156846,...,927.069397,822.327653,977.146915,2919.431163,897.925808,921.352504,0.019367,B cells,ACK_Frozen,231218_Symphony_016_ACK_231212_2311218_ACK.fcs22


In [29]:
df = pd.merge(df_cd45p, df_celltype, how='left')
df.head()

Unnamed: 0,sample_id,FSC-A,FSC-H,FSC-W,SSC-A,SSC-H,SSC-W,BUV395-A CD45,DAPI-A Dead,BV421-A CD19,...,BB515-A NK1_1,BB700-A SIRPa,APC-A Siglec F,R718-A MHCII,APC-Cy7-A CD3e,PE-Cy7-A FceR1,Time,group,id,celltype
0,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.468508,0.435227,0.47269,0.143135,0.144756,0.329818,3161.997367,928.554866,1064.025393,...,2215.461119,936.170309,929.571744,920.134451,761.051003,879.530415,0.018907,ACK_Frozen,231218_Symphony_016_ACK_231212_2311218_ACK.fcs2,NK cells
1,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.409059,0.389471,0.464305,0.102905,0.106058,0.307402,3123.005344,921.608884,1913.591939,...,934.52792,930.706597,924.471786,2217.410148,868.647453,966.170343,0.01891,ACK_Frozen,231218_Symphony_016_ACK_231212_2311218_ACK.fcs3,B cells
2,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.369099,0.320502,0.476163,0.296838,0.287299,0.37356,3018.692338,1082.635498,1028.582432,...,958.861395,2136.135613,859.072528,809.957239,961.405746,1405.154676,0.018997,ACK_Frozen,231218_Symphony_016_ACK_231212_2311218_ACK.fcs7,Monocytes
3,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.44927,0.389286,0.488482,0.293847,0.281103,0.399821,3103.081661,793.494761,503.895618,...,984.384086,2307.147093,881.775708,764.697099,805.907786,1557.813709,0.01904,ACK_Frozen,231218_Symphony_016_ACK_231212_2311218_ACK.fcs8,Monocytes
4,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.349807,0.327736,0.452241,0.118176,0.121834,0.31323,3004.757033,1016.718669,1728.83733,...,952.751105,934.815031,917.780532,2017.982173,1012.125136,919.241522,0.019059,ACK_Frozen,231218_Symphony_016_ACK_231212_2311218_ACK.fcs9,B cells


In [30]:
df

Unnamed: 0,sample_id,FSC-A,FSC-H,FSC-W,SSC-A,SSC-H,SSC-W,BUV395-A CD45,DAPI-A Dead,BV421-A CD19,...,BB515-A NK1_1,BB700-A SIRPa,APC-A Siglec F,R718-A MHCII,APC-Cy7-A CD3e,PE-Cy7-A FceR1,Time,group,id,celltype
0,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.468508,0.435227,0.472690,0.143135,0.144756,0.329818,3161.997367,928.554866,1064.025393,...,2215.461119,936.170309,929.571744,920.134451,761.051003,879.530415,0.018907,ACK_Frozen,231218_Symphony_016_ACK_231212_2311218_ACK.fcs2,NK cells
1,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.409059,0.389471,0.464305,0.102905,0.106058,0.307402,3123.005344,921.608884,1913.591939,...,934.527920,930.706597,924.471786,2217.410148,868.647453,966.170343,0.018910,ACK_Frozen,231218_Symphony_016_ACK_231212_2311218_ACK.fcs3,B cells
2,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.369099,0.320502,0.476163,0.296838,0.287299,0.373560,3018.692338,1082.635498,1028.582432,...,958.861395,2136.135613,859.072528,809.957239,961.405746,1405.154676,0.018997,ACK_Frozen,231218_Symphony_016_ACK_231212_2311218_ACK.fcs7,Monocytes
3,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.449270,0.389286,0.488482,0.293847,0.281103,0.399821,3103.081661,793.494761,503.895618,...,984.384086,2307.147093,881.775708,764.697099,805.907786,1557.813709,0.019040,ACK_Frozen,231218_Symphony_016_ACK_231212_2311218_ACK.fcs8,Monocytes
4,231218_Symphony_016_ACK_231212_2311218_ACK.fcs,0.349807,0.327736,0.452241,0.118176,0.121834,0.313230,3004.757033,1016.718669,1728.837330,...,952.751105,934.815031,917.780532,2017.982173,1012.125136,919.241522,0.019059,ACK_Frozen,231218_Symphony_016_ACK_231212_2311218_ACK.fcs9,B cells
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
936523,231225_Symphony_021_ACK_231219_231225_60.fcs,0.361491,0.367661,0.495106,0.182121,0.172837,0.375594,3313.593901,835.687103,934.061509,...,995.199593,950.952586,944.647145,1032.428495,1626.879408,1033.710522,0.998300,Kimtowel_Frozen,231225_Symphony_021_ACK_231219_231225_60.fcs31...,T cells
936524,231225_Symphony_021_ACK_231219_231225_60.fcs,0.231283,0.255159,0.451273,0.098846,0.097993,0.325847,3072.615518,1205.713607,1748.729526,...,995.217129,913.167101,929.778290,2651.114715,780.680006,1081.739542,0.998331,Kimtowel_Frozen,231225_Symphony_021_ACK_231219_231225_60.fcs31...,B cells
936525,231225_Symphony_021_ACK_231219_231225_60.fcs,0.218991,0.237735,0.451429,0.082036,0.080115,0.323032,3086.531216,1501.261503,1724.759645,...,1096.634360,1201.735868,1045.508463,1046.401109,2011.659089,1368.280734,0.998335,Kimtowel_Frozen,231225_Symphony_021_ACK_231219_231225_60.fcs31...,
936526,231225_Symphony_021_ACK_231219_231225_60.fcs,0.431169,0.468044,0.499803,0.087215,0.084184,0.326467,2956.201265,953.592817,1752.053980,...,954.666904,943.769408,948.487385,2395.042617,1014.800095,981.141923,0.998355,Kimtowel_Frozen,231225_Symphony_021_ACK_231219_231225_60.fcs31...,B cells


In [31]:
df['celltype'].isnull().sum()

65752

In [32]:
df.to_csv('../data/cd45pos_celltype.csv', index=False)