# 1.2 ArchR cell-by-peak matrix to scanpy

This notebook describes how to read the ArchR output and get a cell-by-peak matrix of Scanpy. 

## import

In [None]:
import scanpy as sc
import os
import pandas as pd

## Generate cell-by-peak matrix with Scanpy adata format

In [None]:
# - load the ArchR output
data_dir = './ArchR_files'

mtx_file=os.path.join(data_dir,'pm_matrix.mtx')
peak_file=os.path.join(data_dir,'pm_peak.csv')
barcode_file=os.path.join(data_dir,'pm_barcode.csv')

adata = sc.read_mtx(mtx_file)
peak=pd.read_csv(peak_file, sep=',',header=0,index_col=0)
barcode=pd.read_csv(barcode_file, sep=',',header=0,index_col=0)

# - the peak feature should be in format of 'chrxxx_xxx_xxx' 
peak.index=[peak['seqnames'].values[i]+'_'+str(peak['start'].values[i])+'_'+str(peak['end'].values[i]) for i in range(len(peak['seqnames']))]
adata_atac=adata.T
adata_atac.obs=barcode
adata_atac.var=peak

In [None]:
adata_atac

In [None]:
# - save the result
save_dir = './ATAC_data'
adata_atac.write(os.path.join(save_dir, 'adata_atac_raw.h5ad'))