In [1]:
import numpy as np
import pandas as pd
from scipy.io import mmread
import os
import glob
import anndata
import tqdm
import zarr
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns

from dredFISH.Utils import basicu
from dredFISH.Utils import powerplots

In [2]:
data_dir = '/bigstore/GeneralStorage/fangming/projects/visctx/data_dump/'

In [3]:
%%time
adatasub = anndata.read('../data_dump/P38_glut_genes_v3.h5ad')
adatasub

CPU times: user 190 ms, sys: 164 ms, total: 354 ms
Wall time: 353 ms


AnnData object with n_obs × n_vars = 5837 × 83
    obs: 'batch', 'n_genes', 'percent_mito', 'n_counts', 'leiden', 'Doublet', 'Doublet Score', 'cluster', 'Class_broad', 'sample', 'Age', 'subclass', 'layer', 'Type', 'Subclass'
    var: 'gene_ids', 'feature_types', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'Type_colors', 'cluster_colors', "dendrogram_['leiden']", 'hvg', 'layer_colors', 'leiden', 'neighbors', 'pca', 'sample_colors', 'umap'
    obsm: 'X_harmony', 'X_pca', 'X_umap'
    varm: 'PCs'
    obsp: 'connectivities', 'distances'

In [4]:
adatasub.obs['sample'].unique()

['P38_1a', 'P38_2a', 'P38_2b']
Categories (3, object): ['P38_1a', 'P38_2a', 'P38_2b']

In [5]:
ddir = '/bigstore/GeneralStorage/fangming/projects/visctx/data_dump/counts/'
pths = [
    os.path.join(ddir, "scRNAseq_Cheng2022_Cell_v1ctx/P38_nr_1_a/filtered_feature_bc_matrix/counts.h5ad"),
    os.path.join(ddir, "scRNAseq_Cheng2022_Cell_v1ctx/P38_nr_2_a/filtered_feature_bc_matrix/counts.h5ad"),
]
renames = [
    "-P38_1a-P38",
    "-P38_2a-P38",
]

adata = []
for f, rn in zip(pths, renames):
    adata_ = anndata.read(f)
    adata_.obs.index = np.char.add(adata_.obs.index.values.astype(str), rn)
    adata.append(adata_)
adata = anndata.concat(adata, merge='same')
adata

AnnData object with n_obs × n_vars = 18201 × 53801
    var: 'id', 'name'

In [6]:
cov = np.asarray(adata.X.sum(axis=1)).reshape(-1,)
medcov = np.median(cov)
adata.obs['cov'] = cov
adata.obs['covfactor'] = cov/medcov

In [7]:
tmp = adata[adatasub[adatasub.obs['sample'].isin(['P38_1a', 'P38_2a'])].obs.index].copy()
unq, inv, cnt = np.unique(tmp.var['name'].values, return_counts=True, return_inverse=True)
tmp = tmp[:, cnt[inv] == 1]
tmp.var = tmp.var.set_index('name')
tmp.obs = tmp.obs.join(adatasub.obs)
tmp

AnnData expects .var.index to contain strings, but got values like:
    ['4933401J01Rik', 'Gm26206', 'Xkr4', 'Gm18956', 'Gm37180']

    Inferred to be: categorical

  value_idx = self._prep_dim_index(value.index, attr)


AnnData object with n_obs × n_vars = 3751 × 53547
    obs: 'cov', 'covfactor', 'batch', 'n_genes', 'percent_mito', 'n_counts', 'leiden', 'Doublet', 'Doublet Score', 'cluster', 'Class_broad', 'sample', 'Age', 'subclass', 'layer', 'Type', 'Subclass'
    var: 'id'

In [11]:
tmp.var

Unnamed: 0_level_0,id
name,Unnamed: 1_level_1
4933401J01Rik,ENSMUSG00000102693
Gm26206,ENSMUSG00000064842
Xkr4,ENSMUSG00000051951
Gm18956,ENSMUSG00000102851
Gm37180,ENSMUSG00000103377
...,...
CAAA01205117.1,ENSMUSG00000094431
CAAA01098150.1,ENSMUSG00000094621
CAAA01064564.1,ENSMUSG00000098647
Vmn2r122,ENSMUSG00000096730


In [8]:
tmp.var
tmp.obs

Unnamed: 0,cov,covfactor,batch,n_genes,percent_mito,n_counts,leiden,Doublet,Doublet Score,cluster,Class_broad,sample,Age,subclass,layer,Type,Subclass
AAACCCAAGTGACACG-1-P38_1a-P38,8590.0,1.188598,P38,2931,0.001048,8590.0,0,False,0.010833,L2/3_B,Excitatory,P38_1a,P38,L2/3,L2/3/4,L2/3_B,L2/3
AAACGCTAGTACAGAT-1-P38_1a-P38,13659.0,1.889996,P38,4451,0.000952,13659.0,6,False,0.061252,L4_A,Excitatory,P38_1a,P38,L4,L2/3/4,L4_C,L4
AAACGCTAGTGAGGCT-1-P38_1a-P38,16269.0,2.251142,P38,4543,0.001475,16269.0,6,False,0.048147,L4_A,Excitatory,P38_1a,P38,L4,L2/3/4,L4_C,L4
AAACGCTGTATGGAAT-1-P38_1a-P38,6618.0,0.915733,P38,2678,0.001813,6618.0,3,False,0.013447,L4_B,Excitatory,P38_1a,P38,L4,L2/3/4,L4_B,L4
AAAGAACAGTAGTCCT-1-P38_1a-P38,12608.0,1.744569,P38,3781,0.000159,12607.0,6,False,0.029642,L4_A,Excitatory,P38_1a,P38,L4,L2/3/4,L4_C,L4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGATCTCTACTGCC-1-P38_2a-P38,8016.0,1.109174,P38,2824,0.000499,8016.0,3,False,0.009797,L4_B,Excitatory,P38_2a,P38,L4,L2/3/4,L4_B,L4
TTTGGAGTCACCTACC-1-P38_2a-P38,13765.0,1.904663,P38,3905,0.000363,13764.0,9,False,0.008601,L2/3_B,Excitatory,P38_2a,P38,L2/3,L2/3/4,L2/3_A,L2/3
TTTGGTTCAAGCTCTA-1-P38_2a-P38,14529.0,2.010378,P38,4180,0.000275,14528.0,10,False,0.097022,L2/3_C,Excitatory,P38_2a,P38,L2/3,L2/3/4,L2/3_C,L2/3
TTTGGTTCAGCTTTCC-1-P38_2a-P38,16777.0,2.321434,P38,4303,0.000596,16777.0,9,False,0.011502,L2/3_B,Excitatory,P38_2a,P38,L2/3,L2/3/4,L2/3_A,L2/3


In [10]:
pth_res = os.path.join(ddir, "P38_1a2a_glut.h5ad")
tmp.write(pth_res)