## References

Tutorial: https://docs.scvi-tools.org/en/stable/user_guide/notebooks/MultiVI_tutorial.html <br>
Paper: https://www.biorxiv.org/content/10.1101/2021.08.20.457057v2

## Dataset to prepare

### 1) RNA (scnRNA + Multiome-RNA)
* Read in data: post-CellBender, filtered as the previous HCA object, cell-type annotated
* Subset scnRNA: barcode x gene -> **`adata_rna.h5ad`**
* Subset MultiomeRNA: barcode x gene

### 2) ATAC (snATAC + Multiome-ATAC)
* Read in data: post-cellatac and filtered peaks and nuclei, `6reg-v2_ATACs_filtered.h5ad`
* Subset snATAC: barcode x peak -> **`adata_atac.h5ad`**
* Subset MultiomeATAC: barcode x peak

### 3) Concatenate Multiome RNA+ATAC
barcode x (gene+peak) -> **`adata_paired.h5ad`**

In [1]:
import scanpy as sc
import numpy as np
import pandas as pd
import anndata
import scipy

In [2]:
import session_info
session_info.show()

In [3]:
def downsize_cell(adata, groupby, n_down, seed):
   
    groups = adata.obs[groupby].cat.categories
    for group in groups:
        adata_group = adata[adata.obs[groupby].isin([group])]
        if len(adata_group)>n_down:
            np.random.seed(seed)
            rdcell = np.random.choice(adata_group.shape[0], size=n_down, replace=False)
            if group == groups[0]:
                down = adata_group[rdcell, :]
            else:
                down = down.concatenate(adata_group[rdcell, :], index_unique=None)
        else:
            if group == groups[0]:
                down = adata_group
            else:
                down = down.concatenate(adata_group, index_unique=None)
        del adata_group
    
    down.obs[groupby] = down.obs[groupby].astype('category')
    return down

## Read in data

**RNA**

In [4]:
rna=sc.read_h5ad('/nfs/team205/kk18/HeartAtlas/anndata/adult-heart_scnRNA-MultiRNA_raw_210720.h5ad')
rna.obs['modality_fine']=rna.obs['modality'].copy()

# add intra-batchkey: 'donor_cellnuc'
rna.obs['donor_cellnuc']=rna.obs['donor'].astype('str')+'_'+rna.obs['cell_or_nuclei'].astype('str')

# add modality to .var
rna.var['modality']='Gene Expression'

rna

AnnData object with n_obs × n_vars = 473643 × 31915
    obs: 'sangerID', 'modality', 'donor', 'age_group', 'region', 'cell_or_nuclei', 'gender', 'type', 'cell_states', 'modality_fine', 'donor_cellnuc'
    var: 'gene_ids-0', 'gene_ids-1', 'modality'

In [5]:
rna.X.data[:10]

array([1., 1., 1., 1., 4., 1., 1., 1., 1., 1.], dtype=float32)

In [6]:
# set 'modality' in .obs as 'scnRNA' and 'Multiome'
rna.obs.replace({'modality':{
    '3prime_v3':'scnRNA',
    '3prime_v2':'scnRNA',
}},inplace=True)

rna.obs['modality'].value_counts()

scnRNA      443004
Multiome     30639
Name: modality, dtype: int64

**ATAC**

In [7]:
atac = sc.read_h5ad('/nfs/team205/kk18/data/6region_v2/ATAC/6reg-v2_ATACs_filtered.h5ad')

# add intra-batchkey: 'donor_cellnuc'
atac.obs['cell_or_nuclei']='nuclei'
atac.obs.rename(columns={'Donor':'donor'},inplace=True)
atac.obs['donor_cellnuc']=atac.obs['donor'].astype('str')+'_'+atac.obs['cell_or_nuclei'].astype('str')

# add modality to .var
atac.var['modality']='Peaks'

atac

AnnData object with n_obs × n_vars = 79208 × 102627
    obs: 'cellatac_clusters', 'cellatac_code', 'sangerID', 'dataset', 'donor', 'Region', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'barcode', 'oribarcode', 'cell_or_nuclei', 'donor_cellnuc'
    var: 'peak_width', 'exon', 'gene', 'promoter', 'annotation', 'gene_name', 'gene_id', 'tss_distance', 'ENCODE_blacklist', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'modality'
    layers: 'binary_raw'

In [8]:
atac.obs=atac.obs[['cellatac_clusters','cellatac_code', 'sangerID', 'dataset', 'donor', 'Region', 'barcode', 'oribarcode','donor_cellnuc']]
atac.var=atac.var[['modality','peak_width', 'exon', 'gene', 'promoter', 'annotation', 'gene_name','gene_id', 'tss_distance', 'ENCODE_blacklist',]]

# set 'modality' in .obs as 'snATAC' and 'Multiome'
atac.obs['modality']=atac.obs['dataset'].copy()
atac.obs.replace({'modality':{
    'Multiome_1':'Multiome',
    'Multiome_2':'Multiome',
    'LV':'snATAC',
    'AX':'snATAC',
    'SP':'snATAC',
    'RA':'snATAC',
    'LA':'snATAC',
    'RV':'snATAC',
}},inplace=True)
atac.obs['modality'].value_counts()

snATAC      48098
Multiome    31110
Name: modality, dtype: int64

In [9]:
atac.X.data[:10]

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], dtype=float32)

## Generate `adata_rna.h5ad`

In [10]:
# subset scnRNA data
adata_rna=rna[rna.obs['modality']=='scnRNA']
adata_rna

View of AnnData object with n_obs × n_vars = 443004 × 31915
    obs: 'sangerID', 'modality', 'donor', 'age_group', 'region', 'cell_or_nuclei', 'gender', 'type', 'cell_states', 'modality_fine', 'donor_cellnuc'
    var: 'gene_ids-0', 'gene_ids-1', 'modality'

In [11]:
# downsize adata_rna per 'cell_states'
adata_rna=downsize_cell(adata_rna,'cell_states',n_down=200, seed=123)
adata_rna.var=adata_rna.var[['modality']].copy()
adata_rna

AnnData object with n_obs × n_vars = 13822 × 31915
    obs: 'sangerID', 'modality', 'donor', 'age_group', 'region', 'cell_or_nuclei', 'gender', 'type', 'cell_states', 'modality_fine', 'donor_cellnuc', 'batch'
    var: 'modality'

In [12]:
# save
adata_rna.write('/nfs/team205/kk18/data/6region_v2/MultiVI/adata_rna_downsized.h5ad')

  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'sangerID' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'donor' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'age_group' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'region' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'cell_or_nuclei' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'type' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'donor_cellnuc' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'modality' as categorical


## Generate `adata_atac.h5ad`

In [13]:
adata_atac=atac[atac.obs['modality']=='snATAC']
adata_atac

View of AnnData object with n_obs × n_vars = 48098 × 102627
    obs: 'cellatac_clusters', 'cellatac_code', 'sangerID', 'dataset', 'donor', 'Region', 'barcode', 'oribarcode', 'donor_cellnuc', 'modality'
    var: 'modality', 'peak_width', 'exon', 'gene', 'promoter', 'annotation', 'gene_name', 'gene_id', 'tss_distance', 'ENCODE_blacklist'
    layers: 'binary_raw'

In [14]:
# downsize adata_atac per 'cellatac_clusters'
adata_atac.obs['cellatac_clusters']=adata_atac.obs['cellatac_clusters'].astype('category')
adata_atac=downsize_cell(adata_atac,'cellatac_clusters',n_down=500, seed=123)
adata_atac.var=adata_atac.var[['modality']].copy()
adata_atac

Trying to set attribute `.obs` of view, copying.


AnnData object with n_obs × n_vars = 11170 × 102627
    obs: 'cellatac_clusters', 'cellatac_code', 'sangerID', 'dataset', 'donor', 'Region', 'barcode', 'oribarcode', 'donor_cellnuc', 'modality', 'batch'
    var: 'modality'
    layers: 'binary_raw'

In [15]:
adata_atac.write('/nfs/team205/kk18/data/6region_v2/MultiVI/adata_atac_downsized.h5ad')

  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'cellatac_code' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'sangerID' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'dataset' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'donor' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Region' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'barcode' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'donor_cellnuc' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'modality' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'modality' as categorical


## Generate `adata_paired.h5ad`

In [16]:
# multiome metadata
meta=pd.read_csv('/nfs/team205/kk18/data/cellatac/tic-1050/6reg-v2_Multiome_metadata.csv',index_col=0)
meta

Unnamed: 0,SangerID,SampleID,Protocol,Target,Donor,Region,Gender,Age,Type,cellranger_outs,Combined_ID
0,HCAHeart9508819,A24-HEA-1-FTM-3,ATAC,10000,3,LV,Male,55-60,DBD,/seq/illumina/cellranger-arc/cellranger-arc101...,HCAHeart9508627_HCAHeart9508819
1,HCAHeart9508820,A35-HEA-4-FTM-2,ATAC,10000,7,RA,Male,60-65,DCD,/seq/illumina/cellranger-arc/cellranger-arc101...,HCAHeart9508628_HCAHeart9508820
2,HCAHeart9508821,A35-HEA-1-FTM-2,ATAC,10000,7,LV,Male,60-65,DCD,/seq/illumina/cellranger-arc/cellranger-arc101...,HCAHeart9508629_HCAHeart9508821
3,HCAHeart9508627,A24-HEA-1-FTM-3,RNA,10000,3,LV,Male,55-60,DBD,/seq/illumina/cellranger-arc/cellranger-arc101...,HCAHeart9508627_HCAHeart9508819
4,HCAHeart9508628,A35-HEA-4-FTM-2,RNA,10000,7,RA,Male,60-65,DCD,/seq/illumina/cellranger-arc/cellranger-arc101...,HCAHeart9508628_HCAHeart9508820
5,HCAHeart9508629,A35-HEA-1-FTM-2,RNA,10000,7,LV,Male,60-65,DCD,/seq/illumina/cellranger-arc/cellranger-arc101...,HCAHeart9508629_HCAHeart9508821
6,HCAHeart9845431,A39-HEA-1-FTM-4,RNA,10000,8,LV,Male,45-50,DCD,/seq/illumina/cellranger-arc/cellranger-arc101...,HCAHeart9845431_HCAHeart9917173
7,HCAHeart9845432,A39-HEA-6-FTM-4,RNA,10000,8,AX,Male,45-50,DCD,/seq/illumina/cellranger-arc/cellranger-arc101...,HCAHeart9845432_HCAHeart9917174
8,HCAHeart9845433,A39-HEA-3-FTM-4,RNA,10000,8,LA,Male,45-50,DCD,/seq/illumina/cellranger-arc/cellranger-arc101...,HCAHeart9845433_HCAHeart9917175
9,HCAHeart9845434,A39-HEA-2-FTM-2,RNA,10000,8,RV,Male,45-50,DCD,/seq/illumina/cellranger-arc/cellranger-arc101...,HCAHeart9845434_HCAHeart9917176


**Multiome, RNA**

In [17]:
# subset Multiome-RNA
multiome_rna=rna[rna.obs['modality']=='Multiome']

# prepare GEM barcodes
multiome_rna.obs['barcode']=multiome_rna.obs.index.copy()
multiome_rna.obs['barcode']=[x.split('_')[1] for x in multiome_rna.obs.index]

# add Combined_ID
multiome_rna.obs.rename(columns={'sangerID':'rna_sangerID'},inplace=True)
multiome_rna.obs=multiome_rna.obs.merge(meta[['SangerID','Combined_ID']],how='left',left_on='rna_sangerID',right_on='SangerID')
multiome_rna.obs.drop(['SangerID'],axis=1,inplace=True)

# set index with fullbarcode: Combined_ID + barcodes
multiome_rna.obs['Combined_barcode']=multiome_rna.obs['Combined_ID'].astype('str')+ \
                                     '_'+ multiome_rna.obs['barcode'].astype('str')
multiome_rna.obs.set_index('Combined_barcode',inplace=True)

multiome_rna

Trying to set attribute `.obs` of view, copying.
AnnData expects .obs.index to contain strings, but got values like:
    [0, 1, 2, 3, 4]

    Inferred to be: integer

  value_idx = self._prep_dim_index(value.index, attr)


AnnData object with n_obs × n_vars = 30639 × 31915
    obs: 'rna_sangerID', 'modality', 'donor', 'age_group', 'region', 'cell_or_nuclei', 'gender', 'type', 'cell_states', 'modality_fine', 'donor_cellnuc', 'barcode', 'Combined_ID'
    var: 'gene_ids-0', 'gene_ids-1', 'modality'

In [18]:
multiome_rna.obs.head()

Unnamed: 0_level_0,rna_sangerID,modality,donor,age_group,region,cell_or_nuclei,gender,type,cell_states,modality_fine,donor_cellnuc,barcode,Combined_ID
Combined_barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
HCAHeart9508627_HCAHeart9508819_AAACATGCAGCAGGTA-1,HCAHeart9508627,Multiome,D3,55-60,LV,nuclei,Male,DBD,PC2_atria,Multiome,D3_nuclei,AAACATGCAGCAGGTA-1,HCAHeart9508627_HCAHeart9508819
HCAHeart9508627_HCAHeart9508819_AAACATGCATAGCTGC-1,HCAHeart9508627,Multiome,D3,55-60,LV,nuclei,Male,DBD,vCM3,Multiome,D3_nuclei,AAACATGCATAGCTGC-1,HCAHeart9508627_HCAHeart9508819
HCAHeart9508627_HCAHeart9508819_AAACCAACAAGGTGCA-1,HCAHeart9508627,Multiome,D3,55-60,LV,nuclei,Male,DBD,DOCK4+aMØ,Multiome,D3_nuclei,AAACCAACAAGGTGCA-1,HCAHeart9508627_HCAHeart9508819
HCAHeart9508627_HCAHeart9508819_AAACCAACACTTAACG-1,HCAHeart9508627,Multiome,D3,55-60,LV,nuclei,Male,DBD,vCM3,Multiome,D3_nuclei,AAACCAACACTTAACG-1,HCAHeart9508627_HCAHeart9508819
HCAHeart9508627_HCAHeart9508819_AAACCAACAGCCGCTA-1,HCAHeart9508627,Multiome,D3,55-60,LV,nuclei,Male,DBD,vCM3,Multiome,D3_nuclei,AAACCAACAGCCGCTA-1,HCAHeart9508627_HCAHeart9508819


**Multiome, ATAC**

In [19]:
# subset Multiome-ATAC
multiome_atac=atac[atac.obs['modality']=='Multiome']

# add Combined_ID
multiome_atac.obs.rename(columns={'sangerID':'atac_sangerID'},inplace=True)
multiome_atac.obs=multiome_atac.obs.merge(meta[['SangerID','Combined_ID']],how='left',left_on='atac_sangerID',right_on='SangerID')
multiome_atac.obs.drop(['SangerID'],axis=1,inplace=True)

# set index with fullbarcode: Combined_ID + barcodes
multiome_atac.obs['Combined_barcode']=multiome_atac.obs['Combined_ID'].astype('str')+ \
                                     '_'+ multiome_atac.obs['barcode'].astype('str')
multiome_atac.obs.set_index('Combined_barcode',inplace=True)

multiome_atac

AnnData expects .obs.index to contain strings, but got values like:
    [0, 1, 2, 3, 4]

    Inferred to be: integer

  value_idx = self._prep_dim_index(value.index, attr)


AnnData object with n_obs × n_vars = 31110 × 102627
    obs: 'cellatac_clusters', 'cellatac_code', 'atac_sangerID', 'dataset', 'donor', 'Region', 'barcode', 'oribarcode', 'donor_cellnuc', 'modality', 'Combined_ID'
    var: 'modality', 'peak_width', 'exon', 'gene', 'promoter', 'annotation', 'gene_name', 'gene_id', 'tss_distance', 'ENCODE_blacklist'
    layers: 'binary_raw'

In [20]:
multiome_atac.obs.head()

Unnamed: 0_level_0,cellatac_clusters,cellatac_code,atac_sangerID,dataset,donor,Region,barcode,oribarcode,donor_cellnuc,modality,Combined_ID
Combined_barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
HCAHeart9508627_HCAHeart9508819_AAACATGCAGCAGGTA-1,21,22,HCAHeart9508819,Multiome_1,D3,LV,AAACATGCAGCAGGTA-1,22-AAACATGCAGCAGGTA-1,D3_nuclei,Multiome,HCAHeart9508627_HCAHeart9508819
HCAHeart9508627_HCAHeart9508819_AAACATGCATAGCTGC-1,13,22,HCAHeart9508819,Multiome_1,D3,LV,AAACATGCATAGCTGC-1,22-AAACATGCATAGCTGC-1,D3_nuclei,Multiome,HCAHeart9508627_HCAHeart9508819
HCAHeart9508627_HCAHeart9508819_AAACCAACAAGGTGCA-1,6,22,HCAHeart9508819,Multiome_1,D3,LV,AAACCAACAAGGTGCA-1,22-AAACCAACAAGGTGCA-1,D3_nuclei,Multiome,HCAHeart9508627_HCAHeart9508819
HCAHeart9508627_HCAHeart9508819_AAACCAACACTTAACG-1,13,22,HCAHeart9508819,Multiome_1,D3,LV,AAACCAACACTTAACG-1,22-AAACCAACACTTAACG-1,D3_nuclei,Multiome,HCAHeart9508627_HCAHeart9508819
HCAHeart9508627_HCAHeart9508819_AAACCAACAGCCGCTA-1,13,22,HCAHeart9508819,Multiome_1,D3,LV,AAACCAACAGCCGCTA-1,22-AAACCAACAGCCGCTA-1,D3_nuclei,Multiome,HCAHeart9508627_HCAHeart9508819


**Concatenate Multiome-RNA and Multiome-ATAC**

In [21]:
# take intersecting barcodes between 
barcodes_inter=list(set(multiome_rna.obs_names).intersection(multiome_atac.obs_names))
len(barcodes_inter)

30639

In [22]:
multiome_rna=multiome_rna[barcodes_inter,:]
multiome_atac=multiome_atac[barcodes_inter,:]
all(multiome_rna.obs_names==multiome_atac.obs_names)

True

In [23]:
adata_paired=anndata.AnnData(
    X=scipy.sparse.hstack([multiome_rna.X,multiome_atac.X]).tocsr(), # concatenate sparse matrix and convert to Compressed Sparse Row format
    obs=pd.concat([multiome_rna.obs[['Combined_ID','rna_sangerID','barcode','donor', 
                                     'age_group','region','cell_or_nuclei', 'gender', 'type', 'cell_states','modality','donor_cellnuc']], \
                   multiome_atac.obs[['atac_sangerID','cellatac_clusters','cellatac_code',]]
                  ],axis=1),
    var=pd.concat([multiome_rna.var[['modality','gene_ids-0', 'gene_ids-1']], \
                   multiome_atac.var[['modality','peak_width', 'exon', 'gene', 'promoter', 'annotation', 'gene_name','gene_id', 'tss_distance', 'ENCODE_blacklist']]
                  ],axis=0)
)
adata_paired

AnnData object with n_obs × n_vars = 30639 × 134542
    obs: 'Combined_ID', 'rna_sangerID', 'barcode', 'donor', 'age_group', 'region', 'cell_or_nuclei', 'gender', 'type', 'cell_states', 'modality', 'donor_cellnuc', 'atac_sangerID', 'cellatac_clusters', 'cellatac_code'
    var: 'modality', 'gene_ids-0', 'gene_ids-1', 'peak_width', 'exon', 'gene', 'promoter', 'annotation', 'gene_name', 'gene_id', 'tss_distance', 'ENCODE_blacklist'

In [24]:
adata_paired.obs.head()

Unnamed: 0_level_0,Combined_ID,rna_sangerID,barcode,donor,age_group,region,cell_or_nuclei,gender,type,cell_states,modality,donor_cellnuc,atac_sangerID,cellatac_clusters,cellatac_code
Combined_barcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
HCAHeart9508627_HCAHeart9508819_CCGTTGCGTTAGACCA-1,HCAHeart9508627_HCAHeart9508819,HCAHeart9508627,CCGTTGCGTTAGACCA-1,D3,55-60,LV,nuclei,Male,DBD,EC10_CMC-like,Multiome,D3_nuclei,HCAHeart9508819,13,22
HCAHeart9845433_HCAHeart9917175_CTAGGACGTCACAGAC-1,HCAHeart9845433_HCAHeart9917175,HCAHeart9845433,CTAGGACGTCACAGAC-1,D8,45-50,LA,nuclei,Male,DCD,aCM1,Multiome,D8_nuclei,HCAHeart9917175,12,27
HCAHeart9508628_HCAHeart9508820_ACAACAACATCCTGAA-1,HCAHeart9508628_HCAHeart9508820,HCAHeart9508628,ACAACAACATCCTGAA-1,D7,60-65,RA,nuclei,Male,DCD,FB3,Multiome,D7_nuclei,HCAHeart9508820,10,23
HCAHeart9508628_HCAHeart9508820_CCTCAATGTTAGTACG-1,HCAHeart9508628_HCAHeart9508820,HCAHeart9508628,CCTCAATGTTAGTACG-1,D7,60-65,RA,nuclei,Male,DCD,aCM2,Multiome,D7_nuclei,HCAHeart9508820,4,23
HCAHeart9508628_HCAHeart9508820_CAATGACTCGGTTTGG-1,HCAHeart9508628_HCAHeart9508820,HCAHeart9508628,CAATGACTCGGTTTGG-1,D7,60-65,RA,nuclei,Male,DCD,aCM4,Multiome,D7_nuclei,HCAHeart9508820,11,23


In [25]:
# downsize adata_paired per 'cell_states'
adata_paired=downsize_cell(adata_paired,'cell_states',n_down=200, seed=123)
adata_paired.var=adata_paired.var[['modality']].copy()
adata_paired

AnnData object with n_obs × n_vars = 7517 × 134542
    obs: 'Combined_ID', 'rna_sangerID', 'barcode', 'donor', 'age_group', 'region', 'cell_or_nuclei', 'gender', 'type', 'cell_states', 'modality', 'donor_cellnuc', 'atac_sangerID', 'cellatac_clusters', 'cellatac_code', 'batch'
    var: 'modality'

In [26]:
# save
adata_paired.write('/nfs/team205/kk18/data/6region_v2/MultiVI/adata_paired_downsized.h5ad')

  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'Combined_ID' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'rna_sangerID' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'barcode' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'donor' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'age_group' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'region' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'type' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'donor_cellnuc' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'atac_sangerID' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'cellatac_code' as