In [1]:
import h5py
import os
import numpy as np
import pandas as pd
import anndata as ad
import scanpy as sc


In [2]:
hf = h5py.File('rna.pbmc.h5', 'r')
rna_mtx = hf.get('RNA')
rna_mtx = np.array(rna_mtx).T


In [3]:
rna_meta = pd.read_csv('rna.meta.csv',sep='\t')

In [4]:
rna_gene = pd.read_csv('rna_gene_name.txt',header=None)
rna_gene[0] = rna_gene[0].str.upper()

In [5]:
rna_gene.shape

(36601, 1)

In [6]:
len(np.unique(rna_gene[0].to_numpy()))

36601

In [7]:
from collections import Counter
d = rna_gene[0].tolist()
print([item for item, count in Counter(d).items() if count > 1])

[]


In [8]:
rna_anndat = ad.AnnData(
    X = rna_mtx,
    obs = rna_meta,
)
rna_anndat.var_names = rna_gene[0]

In [9]:
rna_anndat.layers['counts'] = rna_anndat.X.copy()

In [10]:
sc.pp.normalize_total(rna_anndat, target_sum=1e6)

In [11]:
sc.pp.log1p(rna_anndat)
sc.pp.scale(rna_anndat, max_value=10)


In [12]:
rna_anndat

AnnData object with n_obs × n_vars = 10412 × 36601
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'percent.mt', 'seurat_annotations', 'RNA_snn_res.0.4', 'seurat_clusters', 'RNA_snn_res.0.6', 'RNA_snn_res.0.8', 'RNA_snn_res.1', 'RNA_snn_res.0.9'
    var: 'mean', 'std'
    uns: 'log1p'
    layers: 'counts'

In [14]:
hf = h5py.File('atac.pbmc.h5', 'r')
atac_mtx = hf.get('ATAC')
atac_mtx = np.array(atac_mtx).T

atac_meta = pd.read_csv('atac.meta.csv',sep='\t')
atac_gene = pd.read_csv('atac_gene_name.txt',header=None)
atac_gene[0] = atac_gene[0].str.upper()


In [15]:
atac_anndat = ad.AnnData(
    X = atac_mtx,
    obs = atac_meta,
)
atac_anndat.var_names = atac_gene[0]

In [16]:
atac_anndat.layers['counts'] = atac_anndat.X.copy()

In [17]:
sc.pp.normalize_total(atac_anndat, target_sum=1e6)
sc.pp.log1p(atac_anndat)
sc.pp.scale(atac_anndat, max_value=10)


In [18]:
atac_anndat

AnnData object with n_obs × n_vars = 10412 × 18652
    obs: 'orig.ident', 'nCount_ATAC', 'nFeature_ATAC', 'seurat_annotations', 'nCount_ACTIVITY', 'nFeature_ACTIVITY', 'ATAC_snn_res.0.8', 'seurat_clusters'
    var: 'mean', 'std'
    uns: 'log1p'
    layers: 'counts'

In [19]:
rna_anndat.write_h5ad('../PBMC_10X_GEX.h5ad')
atac_anndat.write_h5ad('../PBMC_10X_ATAC.h5ad')

  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'orig.ident' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'seurat_annotations' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'orig.ident' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'seurat_annotations' as categorical


In [2]:
hf = h5py.File('atac.pbmc.peak.h5', 'r')
        
key_list = list(hf.keys())
atac_key = []
for i in range(len(key_list)):
    if key_list[i].startswith('ATAC'):
        atac_key.append(key_list[i])

atac_mtx = hf.get(atac_key[0])
atac_mtx = np.array(atac_mtx).T
for index in atac_key[1:]:
    Y2 = hf.get(index)
    Y2 = np.array(Y2).T
    atac_mtx = np.concatenate((atac_mtx, Y2), axis=0)


In [3]:
atac_mtx.shape

(10412, 106056)

In [4]:
atac_meta = pd.read_csv('atac.meta.csv',sep='\t')
atac_peak = pd.read_csv('atac_peak_name.txt',header=None)


In [5]:
atac_anndat = ad.AnnData(
    X = atac_mtx,
    obs = atac_meta,
)
atac_anndat.var_names = atac_peak[0]

In [8]:
atac_anndat

AnnData object with n_obs × n_vars = 10412 × 106056
    obs: 'orig.ident', 'nCount_ATAC', 'nFeature_ATAC', 'seurat_annotations', 'nCount_ACTIVITY', 'nFeature_ACTIVITY', 'ATAC_snn_res.0.8', 'seurat_clusters'

In [9]:
rna = ad.read_h5ad('../PBMC_10X_GEX.h5ad')

In [10]:
np.all(rna.obs_names == atac_anndat.obs_names)

True

In [11]:
atac_anndat.write_h5ad('../PBMC_10X_ATAC.peak.h5ad')

  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'orig.ident' as categorical
  c.reorder_categories(natsorted(c.categories), inplace=True)
... storing 'seurat_annotations' as categorical
