In [1]:
import scanpy as sc
import memento
import pandas as pd
import os
import numpy as np
os.chdir("/data/TAK_981")

Testing memento pipeline for single cond below

In [2]:
adata = sc.read_h5ad("h5ad/batch_corrected.h5ad")
adata = adata.raw.to_adata()
adata = adata[(adata.obs.annot == "Macrophages") &
              (adata.obs.condition.isin(["combo", "cd40"]))]
capture_rates = {
    "cd40-1": 0.2189,
    "cd40-2": 0.3719,
    "combo-1": 0.4790,
    "combo-2": 0.4405,
    "ctrl-1": 0.4015,
    "ctrl-2": 0.4047,
    "tak-1": 0.5386,
    "tak-2": 0.4268
}
adata.obs["capture_rate"] = adata.obs.sample_id.map(capture_rates).astype(int)*0.25

  adata.obs["capture_rate"] = adata.obs.sample_id.map(capture_rates).astype(int)*0.25


In [3]:
adata.X=adata.X.astype(np.float64)

In [4]:
memento.setup_memento(adata, q_column="capture_rate")

In [5]:
adata.obs["treat"]=adata.obs.condition.apply(lambda x: 0 if x == "combo" else 1)
sample_dict = {"combo-1": 1,
               "combo-2": 2,
               "cd40-1": 1,
               "cd40-2": 2}
adata.obs["samples"] = adata.obs.sample_id.map(sample_dict)

In [6]:
memento.create_groups(adata, label_columns=['treat', "samples"])

In [7]:
memento.compute_1d_moments(adata,
    min_perc_group=.7)

In [8]:
sample_meta = memento.get_groups(adata)

  df[col] = pd.to_numeric(df[col], errors='ignore')


In [9]:
sample_meta

Unnamed: 0,treat,samples
sg^1^1,1,1
sg^1^2,1,2
sg^0^1,0,1
sg^0^2,0,2


In [10]:
cov_df = pd.DataFrame(index=sample_meta.index)
cov_df['intercept'] = 1

In [11]:
cov_df

Unnamed: 0,intercept
sg^1^1,1
sg^1^2,1
sg^0^1,1
sg^0^2,1


In [13]:
memento.ht_1d_moments(
    adata,
    sample_meta,
    covariate=cov_df,
    num_boot=10000, 
    verbose=1,
    num_cpus=6)

[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    6.3s
[Parallel(n_jobs=6)]: Done 188 tasks      | elapsed:   19.0s
[Parallel(n_jobs=6)]: Done 438 tasks      | elapsed:   39.4s
[Parallel(n_jobs=6)]: Done 788 tasks      | elapsed:  1.1min
[Parallel(n_jobs=6)]: Done 1238 tasks      | elapsed:  1.7min
[Parallel(n_jobs=6)]: Done 1788 tasks      | elapsed:  2.5min
[Parallel(n_jobs=6)]: Done 2438 tasks      | elapsed:  3.3min
[Parallel(n_jobs=6)]: Done 3188 tasks      | elapsed:  4.3min
[Parallel(n_jobs=6)]: Done 4038 tasks      | elapsed:  5.5min
[Parallel(n_jobs=6)]: Done 4988 tasks      | elapsed:  6.7min
[Parallel(n_jobs=6)]: Done 6038 tasks      | elapsed:  8.1min
[Parallel(n_jobs=6)]: Done 7188 tasks      | elapsed:  9.7min
[Parallel(n_jobs=6)]: Done 7252 out of 7252 | elapsed:  9.8min finished


In [16]:
result_1d = memento.get_1d_ht_result(adata)

In [17]:
result_1d

Unnamed: 0,gene,tx,de_coef,de_se,de_pval,dv_coef,dv_se,dv_pval
0,Mrpl15,treat,-0.255742,0.065364,0.000587,0.540113,0.320241,0.083092
1,Mrpl15,samples,0.254088,0.058966,0.000489,-0.524852,0.328978,0.097890
2,Lypla1,treat,0.073614,0.059518,0.217478,-0.098217,0.370938,0.767123
3,Lypla1,samples,-0.056645,0.056703,0.321868,-0.085346,0.311654,0.769523
4,Tcea1,treat,-0.253917,0.043384,0.000106,-0.221141,0.192181,0.200380
...,...,...,...,...,...,...,...,...
14499,mt-Nd6,samples,0.576249,0.093821,0.000282,-0.230162,0.228455,0.365263
14500,mt-Cytb,treat,0.079799,0.033791,0.017498,-0.072573,0.078220,0.360964
14501,mt-Cytb,samples,0.451033,0.035193,0.000003,-0.761556,0.091243,0.000252
14502,ENSMUSG00000095041,treat,0.028533,0.065870,0.664834,0.052893,0.191264,0.784422


In [18]:
result_1d.query('de_coef > 0').sort_values('de_pval').head(10)

Unnamed: 0,gene,tx,de_coef,de_se,de_pval,dv_coef,dv_se,dv_pval
5669,Iqgap1,samples,0.335956,0.025472,4.205047e-09,0.288808,0.052675,0.000119
13136,Mpp7,treat,0.664216,0.038883,1.330476e-08,-0.239944,0.066345,0.002035
5109,Emp1,samples,0.743887,0.045511,2.181975e-08,-0.452856,0.085847,7.7e-05
8171,Eef2,samples,0.593481,0.024282,2.928492e-08,-0.092435,0.051348,0.075092
11931,Pam16,samples,0.534071,0.069201,3.650575e-08,0.60208,0.709544,0.329767
7811,Rps12,samples,0.542171,0.017785,6.59476e-08,-0.162447,0.046012,0.00113
14311,Rps4x,samples,0.383193,0.018173,7.629596e-08,-0.139676,0.043036,0.0019
8415,Lrp1,samples,0.472033,0.032949,7.805913e-08,0.013278,0.071303,0.855314
7791,Map3k5,samples,0.886393,0.068054,9.169106e-08,0.836793,0.302723,0.006999
6864,Cotl1,treat,0.2993,0.024353,9.784982e-08,-0.158382,0.062313,0.011799


In [5]:
comp = ["ctrlvcd40", "ctrlvtak", "ctrlvcombo", "takvcombo", "takvcd40", "cd40vcombo"]
celltypes = ["Epithelial", "Macrophages", "T-Cells"]

In [None]:
for i in celltypes:
    for n in comp:
        treat = n.split('v')
        ad_treat = adata[(adata.condition.isin(treat)) & (adata.obs.annot == i)]
        ad_treat.obs.treat = ad_treat.obs["condition"].apply(lambda x: 0 if x == treat[0] else 1)