In [1]:
import os

from pathlib import Path
from multistim.io import prepare_cytometry_data

import matplotlib.pyplot as plt
from matplotlib.pyplot import rc_context
import seaborn as sns

%matplotlib inline
%config InlineBackend.figure_format='retina'

import matplotlib as mpl
mpl.rcParams["figure.facecolor"] = 'white'

sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})

In [2]:
lineage_channels = [
    'In113Di', 'In115Di', 'La139Di',
    'Ce140Di', 'Pr141Di', 'Nd142Di', 'Nd143Di', 'Nd144Di', 'Nd145Di',
    'Nd146Di', 'Sm147Di', 'Nd148Di', 'Sm149Di', 'Sm150Di', 'Eu151Di',
    'Sm152Di', 'Eu153Di', 'Sm154Di', 'Gd155Di', 'Gd156Di', 'Gd157Di',
    'Gd158Di', 'Tb159Di', 'Gd160Di', 'Dy161Di', 'Dy162Di', 'Dy163Di',
    'Dy164Di', 'Ho165Di', 'Er166Di', 'Er167Di', 'Er168Di', 'Tm169Di',
    'Er170Di', 'Yb171Di', 'Yb172Di', 'Yb173Di', 'Yb174Di', 'Lu175Di',
    'Yb176Di', 'Ir191Di', 'Ir193Di']

In [3]:
home_dir = str(Path.home())

adata = prepare_cytometry_data(
    data_dir=os.path.join(home_dir, "Desktop/Data/MultiStim"),
    metadata=os.path.join(home_dir, "Desktop/Data/MultiStim/meta_data_sub.csv"),
    filename_col="fcs_file",
    lineage_channels=lineage_channels,
    num_workers=16,
    groupby="stimulus",
    down_sample=10000,
    gate_kwds={
        'arcsinh': True,
        'cofactor': 5,
        'auto_channels': False,
        'gate_debris_removal': False,
        'gate_intact_cells': True,
        'gate_live_cells': False,
        'gate_center_offset_residual': False,
        'bead_normalization': False,
        'time_channel': ['Time'],
        'DNA_channels': ['Ir191Di', 'Ir193Di']
    },
    verbose=True,
)

[2;36m[15:15:58][0m[2;36m [0m Reading and preprocessing file: R26/R26W3_19454_Hu_F_Basal.fcs      
[2;36m[15:15:58][0m[2;36m [0m Reading and preprocessing file: R28/R28W9_161011_Hu_F_Basal.fcs     
[2;36m[15:15:59][0m[2;36m [0m Reading and preprocessing file: R26/R26W7_200200_Hu_M_Basal.fcs     
[2;36m[15:15:59][0m[2;36m [0m Reading and preprocessing file: R28/R28W3_20283_Hu_M_Basal.fcs      
[2;36m[15:15:59][0m[2;36m [0m Reading and preprocessing file: R28/R28W1_20814_Hu_F_Basal.fcs      
[2;36m[15:15:59][0m[2;36m [0m Reading and preprocessing file: R28/R28W11_22181_Hu_F_Basal.fcs     
[2;36m[15:15:59][0m[2;36m [0m Reading and preprocessing file: R25/R25W7_2339_Hu_F_Basal.fcs       
[2;36m[15:15:59][0m[2;36m [0m Reading and preprocessing file: R18/R18W1_2369_Hu_M_Basal.fcs       
[2;36m[15:15:59][0m[2;36m [0m Reading and preprocessing file: R27/R27W3_24252_Hu_M_Basal.fcs      
[2;36m[15:15:59][0m[2;36m [0m Reading and preprocessing file: R28/R28

In [7]:
adata['Basal'].obs

Unnamed: 0,stimulus,species,donor,sex,fcs_file,tag
0,Basal,Human,161011,F,R28/R28W9_161011_Hu_F_Basal.fcs,R28W9
1,Basal,Human,161011,F,R28/R28W9_161011_Hu_F_Basal.fcs,R28W9
2,Basal,Human,161011,F,R28/R28W9_161011_Hu_F_Basal.fcs,R28W9
3,Basal,Human,161011,F,R28/R28W9_161011_Hu_F_Basal.fcs,R28W9
4,Basal,Human,161011,F,R28/R28W9_161011_Hu_F_Basal.fcs,R28W9
...,...,...,...,...,...,...
1799995,Basal,Human,W3231,F,R25/R25W3_W3231_Hu_F_Basal.fcs,R25W3
1799996,Basal,Human,W3231,F,R25/R25W3_W3231_Hu_F_Basal.fcs,R25W3
1799997,Basal,Human,W3231,F,R25/R25W3_W3231_Hu_F_Basal.fcs,R25W3
1799998,Basal,Human,W3231,F,R25/R25W3_W3231_Hu_F_Basal.fcs,R25W3


In [8]:
import pickle
with open("/Users/haidyi/Desktop/Data/MultiStim/multistim.pkl", 'wb') as outfile:
    pickle.dump(adata, outfile)

In [10]:
for key in adata:
    adata[key].obs['sample_id'] = adata[key].obs['donor'].astype(str) + "-" + adata[key].obs['tag'].astype(str)
    adata[key].obs.set_index('sample_id', drop=False, inplace=True)
    adata[key].obs.index.name = "Cell_ID"
    adata[key].obs_names_make_unique()

In [11]:
adata

{'Basal': AnnData object with n_obs × n_vars = 1800000 × 51
     obs: 'stimulus', 'species', 'donor', 'sex', 'fcs_file', 'tag', 'sample_id'
     var: 'marker', '$PnB', '$PnE',
 'CD40L': AnnData object with n_obs × n_vars = 1800000 × 51
     obs: 'stimulus', 'species', 'donor', 'sex', 'fcs_file', 'tag', 'sample_id'
     var: 'marker', '$PnB', '$PnE',
 'IFNa2': AnnData object with n_obs × n_vars = 1800000 × 51
     obs: 'stimulus', 'species', 'donor', 'sex', 'fcs_file', 'tag', 'sample_id'
     var: 'marker', '$PnB', '$PnE',
 'IFNg': AnnData object with n_obs × n_vars = 1800000 × 51
     obs: 'stimulus', 'species', 'donor', 'sex', 'fcs_file', 'tag', 'sample_id'
     var: 'marker', '$PnB', '$PnE'}

In [12]:
adata['Basal'].obs.head()

Unnamed: 0_level_0,stimulus,species,donor,sex,fcs_file,tag,sample_id
Cell_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
161011-R28W9,Basal,Human,161011,F,R28/R28W9_161011_Hu_F_Basal.fcs,R28W9,161011-R28W9
161011-R28W9-1,Basal,Human,161011,F,R28/R28W9_161011_Hu_F_Basal.fcs,R28W9,161011-R28W9
161011-R28W9-2,Basal,Human,161011,F,R28/R28W9_161011_Hu_F_Basal.fcs,R28W9,161011-R28W9
161011-R28W9-3,Basal,Human,161011,F,R28/R28W9_161011_Hu_F_Basal.fcs,R28W9,161011-R28W9
161011-R28W9-4,Basal,Human,161011,F,R28/R28W9_161011_Hu_F_Basal.fcs,R28W9,161011-R28W9


In [13]:
sample_ids = ['161011-R28W9',
 '19454-R26W3',
 '200200-R26W7',
 '20283-R28W3',
 '20814-R28W1',
 '22181-R28W11',
 '2339-R25W7',
 '2369-R18W1',
 '24252-R27W3',
 '25112-R28W5',
 '2810-R16W7',
 '31191-R28W7',
 '3577-R20W1',
 '3606-R18W3',
 '3675-R24W7',
 '3LB2E-R27W7',
 '4558-R20W7',
 '4600-R21W11',
 '4704-R23W3',
 '4809-R17W3',
 '4903-R17W9',
 '4913-R18W5',
 '4951-R20W5',
 '4969-R23W9',
 '5040-R29W9',
 '5140-R23W1',
 '5293-R17W7',
 '5335-R24W9',
 '5376-R19W5',
 '5436-R20W9',
 '5440-R18W11',
 '5614-R18W7',
 '5656-R21W9',
 '5751-R19W11',
 '5826-R29W1',
 '6054-R21W3',
 '6093-R24W1',
 '6159-R23W11',
 '6296-R25W1',
 '6347-R24W3',
 '6386-R21W1',
 '6390-R29W7',
 '6453-R27W5',
 '6538-R23W5',
 '6543-R21W7',
 '671113-R30W9',
 '6871-R18W9',
 '6955-R19W1',
 '7079-R29W5',
 '7091-R27W9',
 '7149-R26W9',
 '7209-R23W7',
 '7308-R24W11',
 '7338-R30W5',
 '7393-R19W7',
 '7546-R17W5',
 '7584-R16W1',
 '7605-R19W3',
 '7641-R26W5',
 '7688-R26W1',
 '7718-R16W3',
 '7755-R17W1',
 '7766-R30W11',
 '7784-R19W9',
 '7826-R16W11',
 '7886-R16W5',
 '7905-R17W11',
 '7916-R16W9',
 '7932-R29W11',
 '7946-R21W5',
 '7962-R20W11',
 '7976-R24W5',
 '8009-R30W7',
 '82290-R26W11',
 '85210-R30W3',
 '97751-R29W3',
 '9819-R27W11',
 '9914-R27W1',
 'A7635-R32W11',
 'AGM132-R32W9',
 'D00522-R10W9',
 'D00562-R7W7',
 'D00602-R2W3',
 'D00612-R12W9',
 'D00642-R12W3',
 'D00682-R4W11',
 'D00692-R3W1',
 'D00732-R8W7',
 'D06022-R4W9',
 'D06122-R7W5',
 'D07052-R13W3',
 'D07062-R11W11',
 'D07102-R3W9',
 'D07112-R11W3',
 'D07122-R3W3',
 'D07132-R12W7',
 'D07142-R8W11',
 'D07162-R4W3',
 'D07172-R12W1',
 'D07212-R9W7',
 'D07222-R8W9',
 'D07242-R7W11',
 'D07262-R13W9',
 'D07282-R8W1',
 'D07292-R10W5',
 'D07322-R2W5',
 'D07332-R10W11',
 'D07352-R3W5',
 'D07362-R3W7',
 'D07372-R11W1',
 'D07382-R11W5',
 'D07401-R7W1',
 'D07412-R13W1',
 'D07422-R13W7',
 'D07432-R12W5',
 'D07442-R7W3',
 'D07472-R9W1',
 'D07482-R9W5',
 'D07492-R12W11',
 'D07502-R7W9',
 'D07552-R14W11',
 'D07602-R14W7',
 'D07652-R9W9',
 'D07672-R13W5',
 'D07692-R14W9',
 'D07702-R14W3',
 'D07742-R10W7',
 'D07762-R4W5',
 'D07772-R11W7',
 'D07782-R11W9',
 'D07812-R8W5',
 'D07892-R10W3',
 'D07902-R4W7',
 'D07932-R2W1',
 'D07942-R2W7',
 'D07962-R9W3',
 'D07972-R13W11',
 'D09321-R8W3',
 'D09331-R10W1',
 'D09361-R2W9',
 'D09441-R4W1',
 'D09521-R3W11',
 'D09561-R14W5',
 'D09571-R14W1',
 'MS1-R35W1',
 'MS2-R35W3',
 'MS3-R35W5',
 'MS4-R35W7',
 'MS5-R35W9',
 'MS6-R35W11',
 'O5124-R31W3',
 'O7289-R33W5',
 'O7581-R36W5',
 'O7622-R36W9',
 'O7634-R31W11',
 'O7637-R36W1',
 'O7661-R31W7',
 'O7732-R33W3',
 'O7734-R36W3',
 'O77394-R33W7',
 'O7760-R32W5',
 'O7761-R31W9',
 'O7768-R33W1',
 'O7771-R33W9',
 'O7778-R32W1',
 'O7793-R32W7',
 'O7876-R32W3',
 'O7931-R31W1',
 'O7944-R31W5',
 'W070515100685-R15W7',
 'W070515101208-R15W11',
 'W070515101352-R15W9',
 'W070515107458-R15W3',
 'W1281-R25W11',
 'W3231-R25W3']

In [18]:
for key in adata:
    adata[key] = adata[key][adata[key].obs['sample_id'].isin(sample_ids)].copy()

In [19]:
from multistim.preprocessing._anndata import MultiAnnData

In [20]:
adata

{'Basal': AnnData object with n_obs × n_vars = 1750000 × 51
     obs: 'stimulus', 'species', 'donor', 'sex', 'fcs_file', 'tag', 'sample_id'
     var: 'marker', '$PnB', '$PnE',
 'CD40L': AnnData object with n_obs × n_vars = 1750000 × 51
     obs: 'stimulus', 'species', 'donor', 'sex', 'fcs_file', 'tag', 'sample_id'
     var: 'marker', '$PnB', '$PnE',
 'IFNa2': AnnData object with n_obs × n_vars = 1750000 × 51
     obs: 'stimulus', 'species', 'donor', 'sex', 'fcs_file', 'tag', 'sample_id'
     var: 'marker', '$PnB', '$PnE',
 'IFNg': AnnData object with n_obs × n_vars = 1750000 × 51
     obs: 'stimulus', 'species', 'donor', 'sex', 'fcs_file', 'tag', 'sample_id'
     var: 'marker', '$PnB', '$PnE'}

In [21]:
for key in adata:
    adata[key] = MultiAnnData(adata[key], sampleid="sample_id")
    adata[key].obs_to_sample(columns=["stimulus", "species", "donor", "sex", "fcs_file", "tag"])

In [22]:
import mudata as md

In [23]:
for key in adata:
    adata[key].obs.index = f"{key}-" + adata[key].obs.index

In [24]:
mdata = md.MuData(adata, axis=1)

In [25]:
mdata.update()
md.write_h5mu(os.path.join(home_dir, "Documents/proj/multi-stim/data/ImmuneAtlas/ImmuneAtlas.h5mu"), mdata)