In [1]:
import scanpy as sc
import anndata
from scipy import io
from scipy.sparse import coo_matrix, csr_matrix
import numpy as np
import os
import pandas as pd

In [2]:
input_path = '/project/gca/yuzhao1/work/final_RC2rna/velocity/epithelial/'

In [None]:
# load sparse matrix:
X = io.mmread(input_path+"counts.mtx")

In [None]:
# create anndata object
adata = anndata.AnnData(
    X=X.transpose().tocsr()
)

In [None]:
# load cell metadata:
cell_meta = pd.read_csv(input_path+"metadata.csv")

In [None]:
# load gene names:
with open(input_path+"gene_names.csv", 'r') as f:
    gene_names = f.read().splitlines()

In [None]:
# set anndata observations and index obs by barcodes, var by gene names
adata.obs = cell_meta
adata.obs.index = adata.obs['barcode']
adata.var.index = gene_names

In [None]:
# load dimensional reduction:
DimRec = pd.read_csv(input_path+"DimRec.csv")
DimRec.index = adata.obs.index

In [None]:
# set pca and umap (from harmony and harmony_umap in seurat)
adata.obsm['X_pca'] = DimRec.to_numpy()
adata.obsm['X_umap'] = np.vstack((adata.obs['embedding1'].to_numpy(), adata.obs['embedding2'].to_numpy())).T
adata.obsm['X_harmony_umap'] = np.vstack((adata.obs['embedding1'].to_numpy(), adata.obs['embedding2'].to_numpy())).T

In [None]:
# plot a UMAP colored by sampleID to test:
sc.pl.umap(adata, color=['anno1'], frameon=False, save=True)

In [None]:
# save dataset as anndata format
adata.write(input_path+'adata_input.h5ad')

In [None]:
# reload dataset
adata = sc.read_h5ad(input_path+'adata_input.h5ad')