In [13]:
import numpy as np
import pandas as pd
import scvelo as scv
import scanpy as sc
import anndata as ad

# get spliced and unspliced counts

In [14]:
adata_1 = sc.read_loom("../data/processed/plate1_fibroblast.loom", validate=False)
adata_2 = sc.read_loom("../data/processed/plate2_fibroblast.loom", validate=False)

In [15]:
adata = scv.utils.merge(adata_1, adata_2, copy = True)
temp = adata.var
adata = ad.concat([adata, adata_1], join="inner", label='plate')
adata = ad.concat([adata, adata_2], join="inner", label='plate')

adata.var = temp

In [19]:
adata.layers['unspliced']

<738x37315 sparse matrix of type '<class 'numpy.float64'>'
	with 3477095 stored elements in Compressed Sparse Row format>

In [4]:
# adata.X = adata.layers['spliced'] + adata.layers['unspliced']

adata.layers.total = adata.X.copy()

# get c57 and cast allele counts

In [5]:
UMI_c57 = pd.read_csv('../data/arme/umiSplit_c57.qc.csv', index_col=0)
UMI_cast = pd.read_csv('../data/arme/umiSplit_cast.qc.csv', index_col=0)

# get velo params dataframe for alleles (from arme)

In [6]:
c57_genes_with_params = pd.read_csv("../data/arme/umiSplit_c57.qc.maxl.ci.wfilt1.csv", index_col=0)
cast_genes_with_params = pd.read_csv("../data/arme/umiSplit_cast.qc.maxl.ci.wfilt1.csv", index_col=0)

# get common cells (682)

In [7]:
UMI_c57_cells = UMI_c57.columns
UMI_cast_cells = UMI_cast.columns
reprocessed_loom_file_cells = adata.obs.index

common_cells = (UMI_c57_cells.intersection(UMI_cast_cells)).intersection(reprocessed_loom_file_cells)

In [8]:
adata._inplace_subset_obs(common_cells)

# get common genes (7439)

In [9]:
# induce from the genes list having param values (in arme)
common_genes_with_param =  (cast_genes_with_params.index).intersection(c57_genes_with_params.index)
common_genes = common_genes_with_param.intersection(adata.var['Accession'])
common_genes_index_list = adata.var[adata.var['Accession'].isin(list(common_genes))]

adata._inplace_subset_var(common_genes_index_list.index)

In [10]:
UMI_c57_with_common_genes = UMI_c57[UMI_c57.index.isin(list(common_genes))]
UMI_cast_with_common_genes = UMI_cast[UMI_cast.index.isin(list(common_genes))]

UMI_c57_T = UMI_c57_with_common_genes.transpose(copy=True)
UMI_cast_T = UMI_cast_with_common_genes.transpose(copy=True)


adata.layers['allele_c57'] = UMI_c57_T
adata.layers['allele_cast'] = UMI_cast_T

In [11]:
adata

AnnData object with n_obs × n_vars = 682 × 7439
    obs: 'plate'
    var: 'Accession', 'Chromosome', 'End', 'Start', 'Strand'
    layers: 'matrix', 'ambiguous', 'spanning', 'spliced', 'unspliced', 'total', 'allele_c57', 'allele_cast'

In [12]:
adata.write('../data/processed/mus_musculus_682_cells_7439_genes.h5ad')