In [1]:
import pandas as pd
import scanpy as sc
import anndata as ad
from scipy.sparse import csr_matrix

# Pure

## Bulk

In [2]:
pure_count = pd.read_csv("../../s1/pure/simulated_mixture_raw_counts.csv", index_col=0)
pure_meta = pd.read_csv("../../s1/pure/simulated_mixture_coordinates.csv", index_col=0)
pure_bulk = ad.AnnData(X=csr_matrix(pure_count.values), obs = pd.DataFrame({'array_col': pure_meta.x,
                                                                'array_row': pure_meta.y}, index = pure_meta.index),
                       var = pd.DataFrame({'gene': pure_count.columns.tolist()},
                                          index = pure_count.columns))
pure_bulk.write("cell2loc_inputs/pure/simulated_pure_bulk.h5")

### MIST reference

In [3]:
MIST_count = pd.read_csv("../../s1/pure/MIST_reference_count.csv", index_col=0)
MIST_meta = pd.read_csv("../../s1/pure/MIST_reference_meta.csv", index_col=0)

MIST_ref = ad.AnnData(X=csr_matrix(MIST_count.values), obs = MIST_meta,
                       var = pd.DataFrame({'gene': MIST_count.columns.tolist()},
                                          index = MIST_count.columns))
MIST_ref

AnnData object with n_obs × n_vars = 1051 × 19738
    obs: 'bio_celltype'
    var: 'gene'

In [4]:
MIST_ref.write("cell2loc_inputs/pure/MIST_ref.h5")

... storing 'bio_celltype' as categorical


### Internal reference

In [5]:
int_count = pd.read_csv("../../single_cell_ref/single_cell_ref_count_internal.csv", index_col=0)
int_meta = pd.read_csv("../../single_cell_ref/single_cell_ref_meta_internal.csv", index_col=0)

int_ref = ad.AnnData(X=csr_matrix(int_count.values), obs = int_meta,
                       var = pd.DataFrame({'gene': int_count.columns.tolist()},
                                          index = int_count.columns))
int_ref

AnnData object with n_obs × n_vars = 1794 × 19738
    obs: 'bio_celltype'
    var: 'gene'

In [6]:
int_ref.write("cell2loc_inputs/pure/internal_ref.h5")
int_ref.write("cell2loc_inputs/infiltrated/internal_ref.h5")

... storing 'bio_celltype' as categorical


### External reference

In [7]:
ext_count = pd.read_csv("../../single_cell_ref/single_cell_ref_count_external.csv", index_col=0)
ext_meta = pd.read_csv("../../single_cell_ref/single_cell_ref_meta_external.csv", index_col=0)

ext_ref = ad.AnnData(X=csr_matrix(ext_count.values), obs = ext_meta,
                       var = pd.DataFrame({'gene': ext_count.columns.tolist()},
                                          index = ext_count.columns))
ext_ref

AnnData object with n_obs × n_vars = 1515 × 19738
    obs: 'bio_celltype'
    var: 'gene'

In [8]:
ext_ref.write("cell2loc_inputs/pure/external_ref.h5")
ext_ref.write("cell2loc_inputs/infiltrated/external_ref.h5")

... storing 'bio_celltype' as categorical


# infiltrated

## bulk

In [9]:
inf_count = pd.read_csv("../../s1/cancer_inf_0.1/simulated_mixture_raw_counts.csv", index_col=0)
inf_meta = pd.read_csv("../../s1/cancer_inf_0.1/simulated_mixture_coordinates.csv", index_col=0)

inf_bulk = ad.AnnData(X=csr_matrix(inf_count.values), obs = pd.DataFrame({'array_col': inf_meta.x,
                                                                'array_row': inf_meta.y}, 
                                                             index = inf_meta.index),
                       var = pd.DataFrame({'gene': inf_count.columns.tolist()},
                                          index = inf_count.columns))
inf_bulk.write("cell2loc_inputs/infiltrated/simulated_infiltrated_bulk.h5")

## MIST reference

In [11]:
MIST_count = pd.read_csv("../../s1/cancer_inf_0.1/MIST_reference_count.csv", index_col=0)
MIST_meta = pd.read_csv("../../s1/cancer_inf_0.1/MIST_reference_meta.csv", index_col=0)

MIST_ref = ad.AnnData(X=csr_matrix(MIST_count.values), obs = MIST_meta,
                       var = pd.DataFrame({'gene': MIST_count.columns.tolist()},
                                          index = MIST_count.columns))
MIST_ref

AnnData object with n_obs × n_vars = 891 × 19738
    obs: 'bio_celltype'
    var: 'gene'

In [12]:
MIST_ref.write("cell2loc_inputs/infiltrated/MIST_ref.h5")

... storing 'bio_celltype' as categorical
