In [1]:
import time

In [2]:
import os

# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"] = "2"


import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
import scanpy as sc
import anndata as ad

import logging
from experiments_utils import load_scvi_model_if_exists

import matplotlib.pyplot as plt

%matplotlib inline
import torch.nn as nn
import torch
import logging
import matplotlib.pyplot as plt

from scvi.dataset import GeneExpressionDataset
from scvi.inference import UnsupervisedTrainer
from scvi.models import VAE, SCSphereFull

from experiments_utils import get_eb_full


N_EPOCHS = 300



In [5]:
data_path = "/data"
save_path = os.path.join(data_path, "annds_sparse")

In [6]:
ann_paths = os.listdir(save_path)
ann_paths = [os.path.join(save_path, path) for path in ann_paths]

In [7]:
do_sph_deep = True

mdl_sph_kwargs = dict(
#     n_genes=dataset.nb_genes,
#     n_batches=dataset.n_batches,
    n_latent=11,
    do_depth_reg=do_sph_deep,
    constant_pxr=True,
    cell_specific_px=False,
    scale_norm="softmax",
    library_nn=True,
    dropout_rate=0.0,
    deep_architecture=do_sph_deep,
)
trainer_sph_kwargs = dict(
    train_library=True,
    k=25,
    loss_type="IWELBO",
    batch_size=1024,
    weight_decay=1e-4,
    optimizer_type="adam",
    test_indices=[],
)

In [8]:
mdl_iw_kwargs = dict(
    n_latent=10,
    dropout_rate=0.0,
    decoder_dropout_rate=0.0,
    reconstruction_loss="nb",
    dispersion="gene",
    n_layers=1,
    use_batch_norm=False,
    use_weight_norm=False,
    use_layer_norm=True,
    with_activation=nn.ReLU(),
)
trainer_iw_kwargs = dict(
    train_library=True,
    k=25,
    batch_size=1024,
    weight_decay=1e-4,
    loss_type="IWELBO",
    optimizer_type="adam",
    test_indices=[],
)

In [13]:
# best_genes_smartseq2 = best_genes.copy()

In [15]:
pairs = [
    ("b", "monocyte"),
    ("b", "dc"),
    ("b", "pdc"),
    ("monocyte", "pdc"),
    ("dc", "pdc"),
    ("monocyte", "dc"),
]

In [16]:
def get_preds(idx_a, idx_b, trainer_iw):  
    if len(idx_a) >= 1000:
        idx_a = np.random.choice(idx_a, size=np.minimum(len(idx_a), 1000), replace=False)
    if len(idx_b) >= 1000:
        idx_b = np.random.choice(idx_b, size=np.minimum(len(idx_b), 1000), replace=False)
    iw_props_eb = get_eb_full(
        trainer=trainer_iw,
        idx_a=idx_a,
        idx_b=idx_b,
        offset=None,
        delta=None,
        filter_cts=True,
#         coef_truncate=0.5,
        n_samples=30000,
        do_batch_specific="separation",
        posterior_chunks=200,
    )

    res_loop = (
        pd.DataFrame(iw_props_eb)
        .assign(
#             lfc_gt=metadatab.logFC.values,
#             padj=metadatab["adj.P.Val"].values,
#             pval=metadatab["P.Value"].values,
#             GS=final_ann.var.index,
        )
    )
    return res_loop

## Quantitative analysis

In [22]:
ann_paths_arr = np.array(ann_paths) #[[0, 3, 5, 6]]
ann_paths_arr

array(['/data/yosef2/users/pierreboyeau/data/PbmcBench/Inter-dataset/PbmcBench/annds_sparse/Drop-Seq_annd.h5ad',
       '/data/yosef2/users/pierreboyeau/data/PbmcBench/Inter-dataset/PbmcBench/annds_sparse/Seq-Well_annd.h5ad',
       '/data/yosef2/users/pierreboyeau/data/PbmcBench/Inter-dataset/PbmcBench/annds_sparse/10Xv2_annd.h5ad',
       '/data/yosef2/users/pierreboyeau/data/PbmcBench/Inter-dataset/PbmcBench/annds_sparse/10Xv3_annd.h5ad',
       '/data/yosef2/users/pierreboyeau/data/PbmcBench/Inter-dataset/PbmcBench/annds_sparse/inDrop_annd.h5ad',
       '/data/yosef2/users/pierreboyeau/data/PbmcBench/Inter-dataset/PbmcBench/annds_sparse/Smart-Seq2_annd.h5ad',
       '/data/yosef2/users/pierreboyeau/data/PbmcBench/Inter-dataset/PbmcBench/annds_sparse/CEL-Seq_annd.h5ad'],
      dtype='<U104')

In [23]:
import itertools

groups = []
for r in np.arange(1, 6):
# for r in [2, 3, 4]:
    all_gps = list(itertools.combinations(ann_paths_arr, r))
    all_gps = [list(gp) for gp in all_gps]
    print(len(all_gps))
    groups += all_gps
    

7
21
35
35
21


### Run experiment per se

In [25]:
# n_total = len(groups[-1])
n_total = 7

In [26]:
n_total

7

In [27]:
pairs = [
    ("b", "monocyte"),
    ("b", "dc"),
    ("b", "pdc"),
    ("monocyte", "pdc"),
    ("dc", "pdc"),
    ("monocyte", "dc"),
]

##### Baselines

In [28]:
gp = groups[30]
sub_ann_paths = gp

In [29]:
experiment_path = "./exp/pbmc_multibatch_b"


In [30]:
pairs = [
    ("b", "monocyte"),
    ("b", "dc"),
    ("b", "pdc"),
    ("monocyte", "pdc"),
    ("dc", "pdc"),
    ("monocyte", "dc"),
]

In [31]:
import rpy2.robjects as ro

In [32]:
ro.r["library"]("BiocParallel")
ro.r("BiocParallel::register(BiocParallel::MulticoreParam(workers=40))")

0,1
MulticoreParam,[RTYPES.S4SXP]
SnowParam,[RTYPES.S4SXP]
SerialParam,[RTYPES.S4SXP]


In [33]:
def filter_indices(my_indices, my_batches):
    new_indices = []
    unique_batches = np.unique(my_batches)
    for batch in unique_batches:
        _indices = my_indices[my_batches == batch]
        if _indices.shape[0] >= 500:
            _indices = np.random.choice(_indices, size=500, replace=True)
        new_indices += list(_indices)
    new_indices = np.array(new_indices)
    return new_indices

In [34]:
def loop_procedure_baselines(sub_ann_paths):
    annds = []
    ann_names = []
    for ann_path in sub_ann_paths:
        ann_name = ann_path.split("/")[-1].split("_")[0]
        ann_names.append(ann_name)
    ann_names_pad = ann_names + (n_total - len(ann_names)) * [None]
    print(ann_names_pad)
    final_ann = adata[adata.obs.batch.isin(ann_names)]

    tagger = "_".join(ann_names)
    iw_filename = os.path.join(experiment_path, "{}_genes_from_full.pt".format(tagger))
    print(iw_filename)
    print("Dataset properties")
    print(final_ann)

    batch_indices, batch_cats = pd.factorize(final_ann.obs.final_batch)
    lbl_indices, lbl_cats = pd.factorize(final_ann.obs.x)
    dataset = GeneExpressionDataset()
    dataset.populate_from_data(
        X=np.asarray(final_ann.X.astype(float).todense()), 
        batch_indices=batch_indices.astype(float),
        labels=lbl_indices,
        cell_types=lbl_cats
    )
    print(dataset.X.min(), dataset.X.max())
    ## B / Monocytes
    np.random.seed(0)
    name_to_idx = {
        "b": np.where(final_ann.obs.x == "B cell")[0], 
        "monocyte": np.where(final_ann.obs.x.str.contains("monocyte"))[0],
        "dc": np.where(final_ann.obs.x == "Dendritic cell")[0],
        "pdc": np.where(final_ann.obs.x == "Plasmacytoid dendritic cell")[0],
    }
    res_loop_full = pd.DataFrame()
    for label_a, label_b in pairs:
        idx_a = name_to_idx[label_a]
        idx_b = name_to_idx[label_b]
        if (len(idx_a) == 0) or (len(idx_b) == 0):
            continue
        batches_a = dataset.batch_indices[idx_a].squeeze()
        batches_b = dataset.batch_indices[idx_b].squeeze()
        ba = np.unique(batches_a)
        bb = np.unique(batches_b)
        if len(np.intersect1d(ba, bb)) == 0:
            continue

        from R_interop import all_predictionsB

        idx_a = filter_indices(idx_a, batches_a)
        idx_b = filter_indices(idx_b, batches_b)

        TEST_INDICES = np.concatenate([idx_a, idx_b])
        print(TEST_INDICES.shape)

        x_test = dataset.X[TEST_INDICES, :]
        y_test = np.array([0] * len(idx_a) + [1] * len(idx_b))
        batch_test = dataset.batch_indices[TEST_INDICES, :].squeeze()
        x_test[:, -1] = 1.0 + x_test[:, -1]

        DIR_PATH = "exp/pbmc_multibatch_c/"
        # os.makedirs(DIR_PATH)
        filename = "temp_experiment.pickle"

        delta = 0.3
        PATH_TO_SCRIPTS = "MYPATHTOSCRIPTS"

        data_path = os.path.join(DIR_PATH, "data.npy")
        labels_path = os.path.join(DIR_PATH, "labels.npy")
        batches_path = os.path.join(DIR_PATH, "batch_indices.npy")

        np.save(data_path, x_test.squeeze().astype(int))
        np.savetxt(labels_path, y_test.squeeze())
        np.savetxt(batches_path, batch_test.squeeze())
        try:
            other_predictions = all_predictionsB(
                filename=filename,
                data_path=data_path,
                labels_path=labels_path,
                path_to_scripts=PATH_TO_SCRIPTS,
                lfc_threshold=delta,
                batches=batches_path,
                do_edger=False
            )
        except:
            continue

        df_other = pd.DataFrame()
        for _, row in other_predictions.iterrows():
            vals = row.to_dict()
            vals = {**dict(GS=final_ann.var.index), **row.to_dict()}

            dfi = (
                pd.DataFrame(vals)
            )
            df_other = df_other.append(dfi)
        df_other = df_other.assign(
            label_a=label_a, 
            label_b=label_b,
            n_batches=len(sub_ann_paths),
            batch0=ann_names_pad[0],
            batch1=ann_names_pad[1],
            batch2=ann_names_pad[2],
            batch3=ann_names_pad[3],
            batch4=ann_names_pad[4],
            batch5=ann_names_pad[5],
            batch6=ann_names_pad[6],
            nb=len(np.intersect1d(ba, bb))
        )
        res_loop_full = res_loop_full.append(df_other)
    return res_loop_full

In [35]:
experiment_path = "./exp/pbmc_multibatch_baselinesPROPER_SUBFILTER_part2_fourplus"

os.makedirs(experiment_path)

# df_path = os.path.join(experiment_path, "results_save_quantitative.pickle")
df_path = os.path.join(experiment_path, "results_save_quantitative_genes_from_full2.pickle")

In [36]:
from tqdm.auto import tqdm

In [37]:
change_d = dict(
    label_a="category",
    label_b="category",
    GS="category",
    batch0="category",
    batch1="category",
    batch2="category",
    batch3="category",
    batch4="category",
    batch5="category",
    batch6="category",
    n_batches=np.int16,
#     nb=np.int16,
)

In [38]:
# [idx for idx, gpi in enumerate(groups) if gpi == gp]

In [None]:
res_qt = pd.DataFrame()
for gp in tqdm(groups[63:]):
    res_ = loop_procedure_baselines(gp)
    res_qt = res_qt.append(res_).astype(change_d)
    res_qt.to_pickle(df_path)
#     break

  0%|          | 0/56 [00:00<?, ?it/s]

['Drop-Seq', 'Seq-Well', '10Xv2', '10Xv3', None, None, None]
./exp/pbmc_multibatch_baselinesPROPER_SUBFILTER_part2_fourplus/Drop-Seq_Seq-Well_10Xv2_10Xv3_genes_from_full.pt
Dataset properties
View of AnnData object with n_obs × n_vars = 77689 × 3000
    obs: 'x', 'batch', 'final_batch', 'batchb', 'annotation', 'transfer_rf', 'transfer_boost'
    var: 'n_counts', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'highly_variable_nbatches'
    uns: 'hvg'


INFO:scvi.dataset.dataset:Remapping labels to [0,N]
INFO:scvi.dataset.dataset:Remapping batch_indices to [0,N]


0.0 34710.0
(4000,)





                                                                              
Done!





                                                                              
Done!









-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2989 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up)       : 62, 2.1%
LFC < 0 (down)     : 117, 3.9%
outliers [1]       : 0, 0%
low counts [2]     : 2607, 87%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results











-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2989 with nonzero total read count
adjusted p-value < 0.05
LFC > 0.50 (up)    : 33, 1.1%
LFC < -0.50 (down) : 79, 2.6%
outliers [1]       : 0, 0%
low counts [2]     : 2781, 93%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results






[1] "condtX1"
[1] "(Intercept)" "condtX1"     "batchX1"     "batchX2"     "batchX3"    
(2836,)





                                                                              
Done!





                                                                              
Done!









-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2964 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up)       : 26, 0.88%
LFC < 0 (down)     : 278, 9.4%
outliers [1]       : 0, 0%
low counts [2]     : 1378, 46%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results











-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2964 with nonzero total read count
adjusted p-value < 0.05
LFC > 0.50 (up)    : 12, 0.4%
LFC < -0.50 (down) : 181, 6.1%
outliers [1]       : 0, 0%
low counts [2]     : 2182, 74%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results






[1] "condtX1"
[1] "(Intercept)" "condtX1"     "batchX1"     "batchX2"     "batchX3"    
(2306,)





                                                                              
Done!





                                                                              
Done!









-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2917 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up)       : 31, 1.1%
LFC < 0 (down)     : 75, 2.6%
outliers [1]       : 0, 0%
low counts [2]     : 2598, 89%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results











-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2917 with nonzero total read count
adjusted p-value < 0.05
LFC > 0.50 (up)    : 17, 0.58%
LFC < -0.50 (down) : 46, 1.6%
outliers [1]       : 0, 0%
low counts [2]     : 2429, 83%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results






[1] "condtX1"
[1] "(Intercept)" "condtX1"     "batchX1"     "batchX2"     "batchX3"    
(2306,)





                                                                              
Done!





                                                                              
Done!









-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2974 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up)       : 76, 2.6%
LFC < 0 (down)     : 60, 2%
outliers [1]       : 0, 0%
low counts [2]     : 2363, 79%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results











-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2974 with nonzero total read count
adjusted p-value < 0.05
LFC > 0.50 (up)    : 58, 2%
LFC < -0.50 (down) : 31, 1%
outliers [1]       : 0, 0%
low counts [2]     : 2478, 83%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results






[1] "condtX1"
[1] "(Intercept)" "condtX1"     "batchX1"     "batchX2"     "batchX3"    
(1142,)





                                                                              
Done!





                                                                              
Done!







   function: y = a/x + b, and a local regression fit was automatically substituted.
   specify fitType='local' or 'mean' to avoid this message next time.



-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2841 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up)       : 249, 8.8%
LFC < 0 (down)     : 26, 0.92%
outliers [1]       : 0, 0%
low counts [2]     : 1922, 68%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results









   function: y = a/x + b, and a local regression fit was automatically substituted.
   specify fitType='local' or 'mean' to avoid this message next time.



-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2841 with nonzero total read count
adjusted p-value < 0.05
LFC > 0.50 (up)    : 145, 5.1%
LFC < -0.50 (down) : 15, 0.53%
outliers [1]       : 0, 0%
low counts [2]     : 2416, 85%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results






[1] "condtX1"
[1] "(Intercept)" "condtX1"     "batchX1"     "batchX2"     "batchX3"    
(2836,)





                                                                              
Done!





                                                                              
Done!









-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2978 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up)       : 27, 0.91%
LFC < 0 (down)     : 270, 9.1%
outliers [1]       : 0, 0%
low counts [2]     : 2481, 83%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results











-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2978 with nonzero total read count
adjusted p-value < 0.05
LFC > 0.50 (up)    : 13, 0.44%
LFC < -0.50 (down) : 151, 5.1%
outliers [1]       : 0, 0%
low counts [2]     : 2366, 79%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results






[1] "condtX1"
[1] "(Intercept)" "condtX1"     "batchX1"     "batchX2"     "batchX3"    
['Drop-Seq', 'Seq-Well', '10Xv2', 'inDrop', None, None, None]
./exp/pbmc_multibatch_baselinesPROPER_SUBFILTER_part2_fourplus/Drop-Seq_Seq-Well_10Xv2_inDrop_genes_from_full.pt
Dataset properties
View of AnnData object with n_obs × n_vars = 76764 × 3000
    obs: 'x', 'batch', 'final_batch', 'batchb', 'annotation', 'transfer_rf', 'transfer_boost'
    var: 'n_counts', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'highly_variable_nbatches'
    uns: 'hvg'


INFO:scvi.dataset.dataset:Remapping labels to [0,N]
INFO:scvi.dataset.dataset:Remapping batch_indices to [0,N]


0.0 34710.0
(4000,)





                                                                              
Done!





                                                                              
Done!









-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2990 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up)       : 66, 2.2%
LFC < 0 (down)     : 116, 3.9%
outliers [1]       : 0, 0%
low counts [2]     : 2608, 87%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results











-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2990 with nonzero total read count
adjusted p-value < 0.05
LFC > 0.50 (up)    : 30, 1%
LFC < -0.50 (down) : 77, 2.6%
outliers [1]       : 0, 0%
low counts [2]     : 2376, 79%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results






[1] "condtX1"
[1] "(Intercept)" "condtX1"     "batchX1"     "batchX2"     "batchX3"    
(2627,)





                                                                              
Done!





                                                                              
Done!









-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2970 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up)       : 26, 0.88%
LFC < 0 (down)     : 260, 8.8%
outliers [1]       : 0, 0%
low counts [2]     : 2187, 74%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results











-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2970 with nonzero total read count
adjusted p-value < 0.05
LFC > 0.50 (up)    : 13, 0.44%
LFC < -0.50 (down) : 164, 5.5%
outliers [1]       : 0, 0%
low counts [2]     : 2532, 85%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results






[1] "condtX1"
[1] "(Intercept)" "condtX1"     "batchX1"     "batchX2"     "batchX3"    
(2408,)





                                                                              
Done!





                                                                              
Done!







   function: y = a/x + b, and a local regression fit was automatically substituted.
   specify fitType='local' or 'mean' to avoid this message next time.



-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2917 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up)       : 34, 1.2%
LFC < 0 (down)     : 83, 2.8%
outliers [1]       : 0, 0%
low counts [2]     : 2316, 79%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results









   function: y = a/x + b, and a local regression fit was automatically substituted.
   specify fitType='local' or 'mean' to avoid this message next time.



-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2917 with nonzero total read count
adjusted p-value < 0.05
LFC > 0.50 (up)    : 18, 0.62%
LFC < -0.50 (down) : 47, 1.6%
outliers [1]       : 0, 0%
low counts [2]     : 2259, 77%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results






[1] "condtX1"
[1] "(Intercept)" "condtX1"     "batchX1"     "batchX2"     "batchX3"    
(2408,)





                                                                              
Done!





                                                                              
Done!









-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2975 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up)       : 76, 2.6%
LFC < 0 (down)     : 60, 2%
outliers [1]       : 0, 0%
low counts [2]     : 0, 0%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results











-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2975 with nonzero total read count
adjusted p-value < 0.05
LFC > 0.50 (up)    : 59, 2%
LFC < -0.50 (down) : 36, 1.2%
outliers [1]       : 0, 0%
low counts [2]     : 2248, 76%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results






[1] "condtX1"
[1] "(Intercept)" "condtX1"     "batchX1"     "batchX2"     "batchX3"    
(1035,)





                                                                              
Done!





                                                                              
Done!







   function: y = a/x + b, and a local regression fit was automatically substituted.
   specify fitType='local' or 'mean' to avoid this message next time.



-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2841 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up)       : 242, 8.5%
LFC < 0 (down)     : 28, 0.99%
outliers [1]       : 0, 0%
low counts [2]     : 2142, 75%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results









   function: y = a/x + b, and a local regression fit was automatically substituted.
   specify fitType='local' or 'mean' to avoid this message next time.



-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2841 with nonzero total read count
adjusted p-value < 0.05
LFC > 0.50 (up)    : 139, 4.9%
LFC < -0.50 (down) : 18, 0.63%
outliers [1]       : 0, 0%
low counts [2]     : 2361, 83%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results






[1] "condtX1"
[1] "(Intercept)" "condtX1"     "batchX1"     "batchX2"     "batchX3"    
(2627,)





                                                                              
Done!





                                                                              
Done!









-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2977 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up)       : 18, 0.6%
LFC < 0 (down)     : 226, 7.6%
outliers [1]       : 0, 0%
low counts [2]     : 2192, 74%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results











-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2977 with nonzero total read count
adjusted p-value < 0.05
LFC > 0.50 (up)    : 10, 0.34%
LFC < -0.50 (down) : 140, 4.7%
outliers [1]       : 0, 0%
low counts [2]     : 2481, 83%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results






[1] "condtX1"
[1] "(Intercept)" "condtX1"     "batchX1"     "batchX2"     "batchX3"    
['Drop-Seq', 'Seq-Well', '10Xv2', 'Smart-Seq2', None, None, None]
./exp/pbmc_multibatch_baselinesPROPER_SUBFILTER_part2_fourplus/Drop-Seq_Seq-Well_10Xv2_Smart-Seq2_genes_from_full.pt
Dataset properties
View of AnnData object with n_obs × n_vars = 76662 × 3000
    obs: 'x', 'batch', 'final_batch', 'batchb', 'annotation', 'transfer_rf', 'transfer_boost'
    var: 'n_counts', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'highly_variable_nbatches'
    uns: 'hvg'


INFO:scvi.dataset.dataset:Remapping labels to [0,N]
INFO:scvi.dataset.dataset:Remapping batch_indices to [0,N]


0.0 34710.0
(4000,)





                                                                              
Done!





                                                                              
Done!









-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2987 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up)       : 61, 2%
LFC < 0 (down)     : 113, 3.8%
outliers [1]       : 0, 0%
low counts [2]     : 2374, 79%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results











-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2987 with nonzero total read count
adjusted p-value < 0.05
LFC > 0.50 (up)    : 33, 1.1%
LFC < -0.50 (down) : 78, 2.6%
outliers [1]       : 0, 0%
low counts [2]     : 2779, 93%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results






[1] "condtX1"
[1] "(Intercept)" "condtX1"     "batchX1"     "batchX2"     "batchX3"    
(2627,)





                                                                              
Done!





                                                                              
Done!









-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2970 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up)       : 26, 0.88%
LFC < 0 (down)     : 259, 8.7%
outliers [1]       : 0, 0%
low counts [2]     : 2188, 74%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results











-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2970 with nonzero total read count
adjusted p-value < 0.05
LFC > 0.50 (up)    : 12, 0.4%
LFC < -0.50 (down) : 154, 5.2%
outliers [1]       : 0, 0%
low counts [2]     : 2360, 79%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results






[1] "condtX1"
[1] "(Intercept)" "condtX1"     "batchX1"     "batchX2"     "batchX3"    
(2306,)





                                                                              
Done!





                                                                              
Done!









-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2916 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up)       : 34, 1.2%
LFC < 0 (down)     : 75, 2.6%
outliers [1]       : 0, 0%
low counts [2]     : 2654, 91%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results











-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2916 with nonzero total read count
adjusted p-value < 0.05
LFC > 0.50 (up)    : 16, 0.55%
LFC < -0.50 (down) : 36, 1.2%
outliers [1]       : 0, 0%
low counts [2]     : 1, 0.034%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results






[1] "condtX1"
[1] "(Intercept)" "condtX1"     "batchX1"     "batchX2"     "batchX3"    
(2306,)





                                                                              
Done!





                                                                              
Done!









-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2974 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up)       : 77, 2.6%
LFC < 0 (down)     : 59, 2%
outliers [1]       : 0, 0%
low counts [2]     : 2363, 79%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results











-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2974 with nonzero total read count
adjusted p-value < 0.05
LFC > 0.50 (up)    : 51, 1.7%
LFC < -0.50 (down) : 25, 0.84%
outliers [1]       : 0, 0%
low counts [2]     : 0, 0%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results






[1] "condtX1"
[1] "(Intercept)" "condtX1"     "batchX1"     "batchX2"     "batchX3"    
(933,)





                                                                              
Done!





                                                                              
Done!







   function: y = a/x + b, and a local regression fit was automatically substituted.
   specify fitType='local' or 'mean' to avoid this message next time.



-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2841 with nonzero total read count
adjusted p-value < 0.05
LFC > 0 (up)       : 249, 8.8%
LFC < 0 (down)     : 29, 1%
outliers [1]       : 0, 0%
low counts [2]     : 2142, 75%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results









   function: y = a/x + b, and a local regression fit was automatically substituted.
   specify fitType='local' or 'mean' to avoid this message next time.



-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)






out of 2841 with nonzero total read count
adjusted p-value < 0.05
LFC > 0.50 (up)    : 144, 5.1%
LFC < -0.50 (down) : 16, 0.56%
outliers [1]       : 0, 0%
low counts [2]     : 2416, 85%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results






[1] "condtX1"
[1] "(Intercept)" "condtX1"     "batchX1"     "batchX2"    
(2627,)





                                                                              
Done!







In [None]:
res_loop

##### DGMs

In [56]:
def loop_procedure(sub_ann_paths):
    annds = []
    ann_names = []
    for ann_path in sub_ann_paths:
        ann_name = ann_path.split("/")[-1].split("_")[0]
        ann_names.append(ann_name)
    ann_names_pad = ann_names + (n_total - len(ann_names)) * [None]
    print(ann_names_pad)
    final_ann = adata[adata.obs.batch.isin(ann_names)]    
    
    tagger = "_".join(ann_names)
    iw_filename = os.path.join(experiment_path, "{}_genes_from_full.pt".format(tagger))
    sph_filename = os.path.join(experiment_path, "{}_genes_from_full_sph.pt".format(tagger))
    print(iw_filename)
    
    print("Dataset properties")
    print(final_ann)
    
    batch_indices, batch_cats = pd.factorize(final_ann.obs.final_batch)
    lbl_indices, lbl_cats = pd.factorize(final_ann.obs.x)
    dataset = GeneExpressionDataset()
    dataset.populate_from_data(
        X=np.asarray(final_ann.X.astype(float).todense()), 
        batch_indices=batch_indices.astype(float),
        labels=lbl_indices,
        cell_types=lbl_cats
    )
    print(dataset.X.min(), dataset.X.max())

    #################
    # SCPHERE
    sph_start = time.time()
    mdl_sph = SCSphereFull(n_genes=dataset.nb_genes, n_batches=dataset.n_batches, **mdl_sph_kwargs)
    trainer_sph = UnsupervisedTrainer(
        model=mdl_sph, gene_dataset=dataset, **trainer_sph_kwargs
    )
    lr_sph = trainer_sph.find_lr()
    logging.info("Using learning rate {}".format(lr_sph))
    mdl_sph = SCSphereFull(n_genes=dataset.nb_genes, n_batches=dataset.n_batches, **mdl_sph_kwargs)
    trainer_sph = UnsupervisedTrainer(
        model=mdl_sph, gene_dataset=dataset, **trainer_sph_kwargs
    )
    trainer_sph.train(n_epochs=250, lr=lr_sph)
    train_sph_time = time.time() - sph_start
    mdl_sph.eval()
    # Empirical experiments show that without dropout regularization, models will overfit in terms of reconstruction error after a while
    # eval mode does not work as well as train mode

    torch.save(mdl_sph.state_dict(), sph_filename)

    #######################
    
    # IW model
    iw_start = time.time()
    mdl_iw = VAE(n_input=dataset.nb_genes, n_batch=dataset.n_batches, **mdl_iw_kwargs)
    trainer_iw = UnsupervisedTrainer(
        model=mdl_iw, gene_dataset=dataset, **trainer_iw_kwargs
    )
    lr_iw = trainer_iw.find_lr()
    mdl_iw = VAE(n_input=dataset.nb_genes, n_batch=dataset.n_batches, **mdl_iw_kwargs)
    trainer_iw = UnsupervisedTrainer(
        model=mdl_iw, gene_dataset=dataset, **trainer_iw_kwargs
    )
    mdl_iw, train_iw = load_scvi_model_if_exists(mdl_iw, filename=iw_filename)
    if train_iw:
        logging.info("Using learning rate {}".format(lr_iw))
        trainer_iw.train(n_epochs=250, lr=lr_iw)
    train_iw_time = time.time() - iw_start
    mdl_iw.eval()
    torch.save(mdl_iw.state_dict(), iw_filename)
    
    # SCPHERE
    
    
    ## B / Monocytes
    np.random.seed(0)
    name_to_idx = {
        "b": np.where(final_ann.obs.x == "B cell")[0], 
        "monocyte": np.where(final_ann.obs.x.str.contains("monocyte"))[0],
        "dc": np.where(final_ann.obs.x == "Dendritic cell")[0],
        "pdc": np.where(final_ann.obs.x == "Plasmacytoid dendritic cell")[0],
    }
    res_loop_full = pd.DataFrame()
    for label_a, label_b in pairs:
        idx_a = name_to_idx[label_a]
        idx_b = name_to_idx[label_b]
        if (len(idx_a) == 0) or (len(idx_b) == 0):
            continue
        ba = np.unique(dataset.batch_indices[idx_a].squeeze())
        bb = np.unique(dataset.batch_indices[idx_b].squeeze())
        if len(np.intersect1d(ba, bb)) == 0:
            continue
        cell_type_start = time.time()
        res_loop_iw = (
            get_preds(idx_a, idx_b, trainer_iw)
            .assign(
                label_a=label_a, 
                label_b=label_b,
                GS=final_ann.var.index,
                n_batches=len(sub_ann_paths),
                batch0=ann_names_pad[0],
                batch1=ann_names_pad[1],
                batch2=ann_names_pad[2],
                batch3=ann_names_pad[3],
                batch4=ann_names_pad[4],
                batch5=ann_names_pad[5],
                batch6=ann_names_pad[6],
                nb=len(np.intersect1d(ba, bb)),
                train_time=train_iw_time,
                algorithm="scvi-lvm",
            )
        )
        res_loop_full = res_loop_full.append(res_loop_iw.assign(inference_time=time.time() - cell_type_start))

        cell_type_start = time.time()
        res_loop_sph = (
            get_preds(idx_a, idx_b, trainer_sph)
            .assign(
                label_a=label_a, 
                label_b=label_b,
                GS=final_ann.var.index,
                n_batches=len(sub_ann_paths),
                batch0=ann_names_pad[0],
                batch1=ann_names_pad[1],
                batch2=ann_names_pad[2],
                batch3=ann_names_pad[3],
                batch4=ann_names_pad[4],
                batch5=ann_names_pad[5],
                batch6=ann_names_pad[6],
                nb=len(np.intersect1d(ba, bb)),
                train_time=train_sph_time,
                algorithm="scPhere-lvm",
            )
        )
        res_loop_full = res_loop_full.append(res_loop_sph.assign(inference_time=time.time() - cell_type_start))
    return res_loop_full

In [57]:
experiment_path = "./exp/pbmc_multibatch"

# os.makedirs(experiment_path)

# df_path = os.path.join(experiment_path, "results_save_quantitative.pickle")
df_path = os.path.join(experiment_path, "results_save_quantitative_genes_from_full_with_scphere_30plus.pickle")

In [None]:
from tqdm.auto import tqdm

In [59]:
change_d = dict(
    label_a="category",
    label_b="category",
    GS="category",
    batch0="category",
    batch1="category",
    batch2="category",
    batch3="category",
    batch4="category",
    batch5="category",
    batch6="category",
    n_batches=np.int16,
#     nb=np.int16,
)

In [60]:
torch.cuda.empty_cache()

In [None]:
res_qt

In [62]:
# groups

In [63]:
# [idx for idx in range(len(groups)) if groups[idx] == gp]

In [65]:
faulty_gps = []

In [None]:
res_qt = pd.DataFrame()
for gp in tqdm(groups[30:]):
    try:
        res_ = loop_procedure(gp)
        res_qt = res_qt.append(res_).astype(change_d)
        res_qt.to_pickle(df_path)
    except Exception as e:
        faulty_gps.append(gp)
        print(e)
#         pass
#         break
#     break

  0%|          | 0/89 [00:00<?, ?it/s]

['Drop-Seq', 'Seq-Well', 'inDrop', None, None, None, None]
./exp/pbmc_multibatch/Drop-Seq_Seq-Well_inDrop_genes_from_full.pt
Dataset properties
View of AnnData object with n_obs × n_vars = 56972 × 3000
    obs: 'x', 'batch', 'final_batch', 'batchb', 'annotation', 'transfer_rf', 'transfer_boost'
    var: 'n_counts', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'highly_variable_nbatches'
    uns: 'hvg'


INFO:scvi.dataset.dataset:Remapping labels to [0,N]
INFO:scvi.dataset.dataset:Remapping batch_indices to [0,N]
INFO:root:Using Deep architecture ...
INFO:root:Using deep architecture
INFO:root:Unique optim


0.0 34710.0


INFO:root:Problem with automatic LR choice, using 1e-4 as default
INFO:root:Using learning rate 0.0001
INFO:root:Using Deep architecture ...
INFO:root:Using deep architecture
INFO:root:Unique optim



training:   0%|          | 0/250 [00:00<?, ?it/s][A
training:   1%|          | 2/250 [00:06<12:41,  3.07s/it][A
training:   2%|▏         | 4/250 [00:12<12:32,  3.06s/it][A
training:   2%|▏         | 5/250 [00:18<16:38,  4.08s/it][A
training:   3%|▎         | 7/250 [00:24<14:41,  3.63s/it][A
training:   3%|▎         | 8/250 [00:31<17:20,  4.30s/it][A
training:   4%|▎         | 9/250 [00:37<19:17,  4.80s/it][A
training:   4%|▍         | 11/250 [00:43<16:25,  4.13s/it][A
training:   5%|▍         | 12/250 [00:50<18:13,  4.60s/it][A
training:   5%|▌         | 13/250 [00:56<19:55,  5.05s/it][A
training:   6%|▌         | 14/250 [01:02<20:55,  5.32s/it][A
training:   6%|▋         | 16/250 [01:08<17:12,  4.41s/it][A
training:   7%|▋         | 17/250 [01:15<18:43,  4.82s/it][A
training:   7%|▋         | 18/250 [01:21<19:55,  5.15s/it][A
training:   8%|▊         | 19/250 [01:27<21:12,  5.51s/it][A
training:   8%|▊         | 20/250 [01:33<21:49,  5.69s/it][A
training:   9%|▉      

INFO:root:Normal parameterization of the library
INFO:root:Scale decoder with Softmax normalization
INFO:root:Unique optim
INFO:root:Automatic LR choice 0.0012915496650148857
INFO:root:Normal parameterization of the library
INFO:root:Scale decoder with Softmax normalization
INFO:root:Pop A 1000 & Pop B 1000
INFO:root:Using mode separation
INFO:root:Filtering observations: Keeping (288,) cells from original 320 sample size
INFO:root:Using 35 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[18050  3935  6791  2246  2137 15935 16882  2324  2141  4062 12063 12129
  2317  2522  3909  2566 13655 16217  2218 19022 12115 14905  7080  2302
 18895  2233  2396 12078 18323  2418  6724 17481 12202 12224 16965 16044
 12324  3876 12060 16591  3905 19637  3918  2387  4071 18595 12260  4006
  6937 12270 15866  2269  2197  2247  2411  6959 16846  3955  4151  6908
  6787 12116 17340  2130 16789 19025 17618 17681 15124 12167  2113  4012
 16728  3886  6752 13524  6858  3903 12263  6712  4029  2553 18099  2132
  3896  6999 15268  2294  4018 15392 18656 16856  2546  7081 17824 12068
 14772 12228  4073  2327  7060 14367  6949  2148  6801  2391  6897 16290
 12037  2147  6885 12164 12082  4115 14414  2395  4111  6788  6890  3879
  2288 17920 14697  6923 18033 19096  2223  4109 12251 12155  2253 18365
  6777  6753  3949  6804  7008  7065 17559 19265 14442  7051 17575 16452
 16845  7062 13775  2564  3898 12038 16148  2242 19483 15007 15452 14587
 19612 17898  7003 15799  2574  7038 12133 12236 14

  0%|          | 0/3 [00:00<?, ?it/s]

INFO:root:Using 5000 samples for log-evidence estimation ...
INFO:root:... done!
INFO:root:px reweight
INFO:root:Step 3: Compute scales from original batches


torch.Size([1, 288])
torch.Size([10080, 288])
log_pz -22.044174194335938 -9.572366714477539
log_px_zs -2913.7724609375 -60.4764404296875
log_px -1201.668212890625 -67.41157531738281
log_qz_x -229.61260986328125 2.138530731201172
log_probas -61.78156280517578 2.990109443664551
tensor([0])


INFO:root:ESS: 505.23552636237883
INFO:root:Filtering observations: Keeping (314,) cells from original 349 sample size
INFO:root:Using 32 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[18504  5594  5641  3750 12560 14217  9449 14047  3820  8384 17302 16105
 12778 14735  9226 12564  1993  3514 12833  9479 13425  5591 18567  3501
 18345 19594 12909 19341 18443 15072  1995 17343 16067 15735 18375 13933
  9416  3523 19380  1941  2028 15516  9312 12581  6459  9451 16709 18605
  1858 19732 19159 14221 12708 15739 15342 13594 16958 17413  1872 14319
 15260 16790  3614  9440 13465  3693 16144  1875  3674 17047 18930 18304
 16830  9432  9225  6465  3594 16508  9321 15856  9373 16137 16116 15623
  5598 15449  6610 16673 14420  9573  3679  9480 18740 17716 16908  6478
 18370 13917  6525  9805  3832  3708 18288 12598 17808  9222  9458 17713
 18249  1888  3825 16680  1918 19740 16357 13616 16348 12753 17282 18835
  9223  6379  6571 13719  2053 13520 15227 12741  2063 17057 15463  5566
 15267 18276  9407 18109 18203 16800  9816  3774 14439 12544 15518  1894
 18973 15591 16305 14904 15522  6480 13438 12845 15194 19522 18960  3646
  5628  9524  3831 12563 12848  3723 16512 12892 17

  0%|          | 0/3 [00:00<?, ?it/s]

INFO:root:Using 5000 samples for log-evidence estimation ...
INFO:root:... done!
INFO:root:px reweight
INFO:root:Step 3: Compute scales from original batches


torch.Size([1, 314])
torch.Size([10048, 314])
log_pz -22.4487247467041 -9.86590576171875
log_px_zs -4792.8857421875 -33.38092803955078
log_px -1596.33642578125 -38.65690994262695
log_qz_x -248.4459686279297 2.7390894889831543
log_probas -42.68992614746094 3.9526071548461914
tensor([0])


INFO:root:ESS: 244.94280761457992


Data inferred offsets: 0.000246381777105853 0.00025797824491746724
using offset: 0.00025797824491746724


INFO:root:LFC MEAN in batch 0: 0.018808284774422646
INFO:root:Filtering observations: Keeping (302,) cells from original 336 sample size
INFO:root:Using 34 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[35236 31228 36207 20748 38047 21658 34403 22176 26242 31258 31154 34539
 21093 20321 23703 34196 34417 23707 21985 34223 30946 31177 34450 34259
 23892 26529 34155 26328 31070 21617 31027 34555 26336 34377 26421 31234
 22623 31125 20514 31235 23895 34524 34126 37190 22172 20881 31031 34243
 20015 34327 20904 23677 37072 21761 34490 34552 34141 23616 20343 36498
 31120 31221 38035 23801 22597 20565 30995 23672 34507 31176 23720 21294
 26422 34464 26274 23756 31172 22727 22817 26435 34422 21982 22207 36848
 34508 37319 35191 23754 34194 34328 34300 31008 34481 26373 26477 20693
 26399 26218 34406 34236 31085 38130 36079 26541 31263 22880 37603 30978
 34553 34188 36365 21741 36390 34118 31015 35132 37303 26250 30992 31188
 23838 34255 35827 34424 34522 37460 31117 26414 34112 34174 26227 31173
 35503 23820 34131 31237 26503 20571 31209 34442 36988 34102 26360 23875
 38112 26560 31143 26277 34415 20098 26318 23814 26381 34361 23830 23624
 31183 26270 26183 26464 35782 23669 26504 31007 31

  0%|          | 0/3 [00:00<?, ?it/s]

INFO:root:Using 5000 samples for log-evidence estimation ...
INFO:root:... done!
INFO:root:px reweight
INFO:root:Step 3: Compute scales from original batches


torch.Size([1, 302])
torch.Size([10268, 302])
log_pz -22.79201889038086 -9.849386215209961
log_px_zs -3107.4443359375 -54.301212310791016
log_px -1052.9320068359375 -58.48404312133789
log_qz_x -159.007080078125 1.0350136756896973
log_probas -69.82776641845703 2.8019790649414062
tensor([1])


INFO:root:ESS: 716.2640874959517
INFO:root:Filtering observations: Keeping (279,) cells from original 310 sample size
INFO:root:Using 36 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[37270 37781 25986 21435 35406 37853 36972 28677 28482 31745 21006 21649
 33945 22391 37026 20690 23311 20356 37612 21472 19943 20010 23355 26067
 19921 25870 23308 25988 20971 22063 20207 20887 20868 26077 35767 20059
 23450 37610 21310 35311 22466 21546 22321 28717 20663 21724 28783 21035
 37952 21116 23363 28768 22896 21230 31674 36254 22895 35345 36324 22472
 21326 20229 26101 36703 28442 36672 36953 19937 31615 35424 37926 22460
 23491 23550 36895 23430 31820 22451 23259 22934 21992 22844 37746 25941
 23476 28490 21375 21105 31626 20695 37015 36412 28566 36750 25853 21873
 20089 21750 37538 20418 21250 21292 37360 37452 31604 20520 20246 31769
 20665 20607 23317 36040 31507 28648 28511 23230 22169 31767 22518 23287
 26068 25932 21521 36843 20057 31560 28426 35464 21645 31784 21422 25937
 28778 28454 20699 35295 36470 21899 35344 23509 20625 20055 20185 20701
 37618 28670 31774 35427 21572 36617 21597 20458 25944 19862 35795 35705
 23289 25851 25959 21141 38111 36959 36474 28574 22

  0%|          | 0/3 [00:00<?, ?it/s]

INFO:root:Using 5000 samples for log-evidence estimation ...
INFO:root:... done!
INFO:root:px reweight
INFO:root:Step 3: Compute scales from original batches


torch.Size([1, 279])
torch.Size([10044, 279])
log_pz -23.305891036987305 -9.63528823852539
log_px_zs -6296.79296875 -33.29524612426758
log_px -1342.8193359375 -38.610870361328125
log_qz_x -275.76458740234375 1.6813337802886963
log_probas -47.08793258666992 4.090021133422852
tensor([1])


INFO:root:ESS: 240.40716585612358


Data inferred offsets: 0.00023447509738616653 0.0002663393970578909
using offset: 0.0002663393970578909


INFO:root:LFC MEAN in batch 1: 0.03834908455610275
INFO:root:Filtering observations: Keeping (309,) cells from original 344 sample size
INFO:root:Using 33 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[44939 53028 38685 54479 52989 44864 53201 54596 39455 45053 44816 52869
 52777 44779 53083 53147 49570 54155 54608 49758 53141 56735 53060 54110
 45042 52771 49584 44993 52954 56796 49725 54842 40508 44937 54878 40022
 42219 56450 53108 53180 42247 42199 49724 44791 49646 42097 53120 49730
 53091 42196 55255 44687 54135 44738 44737 49746 55056 44900 53181 40354
 42358 53045 49690 55448 53026 49548 53099 42293 49627 44793 42346 40954
 53221 52788 38579 54248 38575 39520 52775 44846 52905 53188 42233 44838
 56311 54829 52858 42315 42366 55116 39321 44999 45013 52964 44968 38503
 39303 56467 53002 52881 44850 55293 53215 44991 44930 49576 53218 44824
 54959 53056 49780 56092 49504 39313 52759 42229 45045 56890 52784 53015
 53008 56954 53107 38577 44832 56161 42239 53153 49666 40612 49596 56792
 44704 44712 52856 53159 44854 52928 54194 39769 38708 52953 53226 55576
 44885 49722 42224 52935 42235 45075 53012 49774 56408 45009 52770 53166
 42092 42286 42362 42150 44963 49799 52809 44978 52

  0%|          | 0/3 [00:00<?, ?it/s]

INFO:root:Using 5000 samples for log-evidence estimation ...
INFO:root:... done!
INFO:root:px reweight
INFO:root:Step 3: Compute scales from original batches


torch.Size([1, 309])
torch.Size([10197, 309])
log_pz -22.504180908203125 -9.6475248336792
log_px_zs -2413.3779296875 -59.46380615234375
log_px -1176.6451416015625 -64.43849182128906
log_qz_x -228.52325439453125 2.3234992027282715
log_probas -42.14110565185547 2.416177749633789
tensor([2])


INFO:root:ESS: 1079.2427919633114
INFO:root:Filtering observations: Keeping (306,) cells from original 341 sample size
INFO:root:Using 33 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[39660 44631 41906 44601 47229 40105 39539 50163 56006 44565 54977 41079
 56319 52614 44408 55327 56541 56653 52608 38266 40397 47217 44515 47542
 55669 47202 39073 41029 44469 56545 43672 41204 47298 50330 39681 52704
 54107 38280 39388 56847 50082 39404 41778 50315 50254 55292 38848 41195
 47124 44442 55474 46969 56002 47287 56579 47179 52544 54233 55070 38968
 38414 50296 47082 56908 41149 52591 38233 41065 44532 40072 41988 54012
 54584 40132 56399 50426 40035 55613 50471 40462 41750 41763 41776 40924
 50219 39951 54922 46970 40419 54192 55267 40155 47158 44468 38472 42027
 38547 41816 54552 55855 38939 40255 56575 47599 39027 40071 56916 56812
 38574 47561 40495 56923 54028 47020 47580 47071 47061 44535 52569 56464
 54825 44561 39808 44557 50090 39807 50097 50399 55179 52629 50055 54371
 38528 39722 39148 41119 55545 39901 47011 44496 38849 39926 50459 40716
 44571 40017 54453 56462 50294 41799 52683 52563 55989 38494 47309 44541
 47233 46942 43618 56344 54582 50063 41757 53705 44

  0%|          | 0/3 [00:00<?, ?it/s]

INFO:root:Using 5000 samples for log-evidence estimation ...
INFO:root:... done!
INFO:root:px reweight
INFO:root:Step 3: Compute scales from original batches


torch.Size([1, 306])
torch.Size([10098, 306])
log_pz -21.34146499633789 -9.639908790588379
log_px_zs -6333.0751953125 -33.145416259765625
log_px -1778.1881103515625 -38.6156120300293
log_qz_x -358.23480224609375 4.081642150878906
log_probas -43.70832824707031 3.0191965103149414
tensor([2])


INFO:root:ESS: 521.8089932490524


Data inferred offsets: 0.00020048878213856374 0.00017743538046488538
using offset: 0.00020048878213856374


INFO:root:LFC MEAN in batch 2: 0.04091579467058182
INFO:root:DELTA VALUE: 0.3179265897190183
INFO:root:Using mode coefficient 0.6
INFO:root:Pop A 1000 & Pop B 1000
INFO:root:Using mode separation
INFO:root:Filtering observations: Keeping (304,) cells from original 338 sample size
INFO:root:Using 33 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[ 2219 12030  2443  2572  6995  3934 19216  2552  2428  6909  2570 16982
  6828 14107  6849 12203  2247  2302 12317  6899  2558 12253 14870 14252
  4055  6784  3928 17819  2235 13473  2569  6996 12095  4140 14676  2506
 17657  3959 15007  2252  6774 17340  6934  2470 12076  7053  2376 18581
  7058 16481 12136 14456  7906 17517 17025  6898  3947  6698 12036  2478
 12037  2184  4074 12123  2190 17142 12195  2309  6791  6702 16443  2280
 14511 19591 17672  4144 18508 12011  7049  3953  7083  6803 12292  6805
 18384  6707  2555 12057  2294  2488  3920  2298  4137 12262 12252 12151
  6982 17739 12210  2385  2323  2502  6892 14154  4158  2293  4042  2121
  3906 11979  3905  6960  2209 16324 13943  6854 12085 12105 13867  4111
  3879  2343  4163  6985 12103  7074 12303  2398  3880 19353 12247 18167
 17603  6792  6754  3911  2380  4008  2122 17490  2232 12244 17135 17520
  3883  6908  3915  3895 15405  4047 18391 12021 18698 18241 12137 17792
 13411 14220  4037  7038 12072  4071 14781 17021  6

  0%|          | 0/3 [00:00<?, ?it/s]

INFO:root:Using 5000 samples for log-evidence estimation ...
INFO:root:... done!
INFO:root:px reweight
INFO:root:Step 3: Compute scales from original batches


torch.Size([1, 304])
torch.Size([10032, 304])
log_pz -3.0313475131988525 -3.0313475131988525
log_px_zs -3036.677978515625 -56.33503723144531
log_px -1375.2940673828125 -61.0022087097168
log_qz_x -62.814674377441406 5.1325273513793945
log_probas -27.087989807128906 2.384063482284546
tensor([0])


INFO:root:ESS: 590.0070582965068
INFO:root:Filtering observations: Keeping (312,) cells from original 347 sample size
INFO:root:Using 33 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[ 5556 17302  6460 16171 16036  5641 13784 14362  6555 12627  9473 15795
 15260  6411 13616  3693  3713 15345  3137  2029 19571 16813 17608  3569
 16879 18506 19337  6433 16986 18819 18287 19207 14372 12987 18120  9209
 14264 13224  9507  3811  6622 13542 17516 16463 13742 16612 14468 15761
 14331  3547  3847 12750  3538  6499 19069 13456  9215  5635 12626 16111
 12877  6516 18044  3628 14695 18432 19396 12803 15865 14659 12742 14268
 18503  2005  3833  3163 12798  9543 17740 18315 16348 19468 15606 13973
  9277 14946 12619 19727 16330 19424 13490  5632  9331 18094 17336 17752
  3546 13779 15995 18399  6543  1942 13680 19517  5623 13416 14462 16653
 16094 17699 16415 19771 16512 14054 16010  6377 16336  5610 14565  6588
 15953 17374 18424 16363 14868 14173 14806 13944 17957  2080 14557 12668
  3151  3668  1927 17406 12556 19529 19176  9324  3803  3587  9575  9316
 16941  9379 13447  3747  6623 18342 12709 19593 15987  9175  5629 18528
 19262 16359  9828 17965 16821 12871  1901  6454 12

  0%|          | 0/3 [00:00<?, ?it/s]

INFO:root:Using 5000 samples for log-evidence estimation ...
INFO:root:... done!
INFO:root:px reweight
INFO:root:Step 3: Compute scales from original batches


torch.Size([1, 312])
torch.Size([10296, 312])
log_pz -3.0313475131988525 -3.0313475131988525
log_px_zs -5730.96484375 -29.63066291809082
log_px -1804.646240234375 -32.799705505371094
log_qz_x -85.05693817138672 5.031487464904785
log_probas -29.676490783691406 5.066580772399902
tensor([0])


INFO:root:ESS: 75.46494130416922


Data inferred offsets: 0.00016811621317174285 0.0001602591539267451
using offset: 0.00016811621317174285


INFO:root:LFC MEAN in batch 0: 0.05490058287978172
INFO:root:Filtering observations: Keeping (313,) cells from original 348 sample size
INFO:root:Using 32 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[26323 35237 26264 23843 30967 26454 36980 30968 20508 35319 34533 26412
 26211 31150 37346 23763 20060 23630 34338 34106 36250 36764 34317 31054
 23803 31082 34245 37319 22399 34189 26408 26260 26368 35377 34158 34535
 26324 23847 20276 31201 23897 26401 34258 20073 34117 37421 26346 26419
 31162 23641 37156 34540 23791 31262 23846 21031 30918 34110 32203 20507
 26553 22865 26354 34508 34282 36998 31036 35184 22248 31210 21771 34248
 34118 26277 23729 34160 23656 34179 20571 22717 20581 23703 20735 34121
 20895 34170 35994 31145 34250 23824 26439 22045 23861 26175 36556 26250
 26245 34553 34349 26358 26457 35103 31059 34215 26208 23701 35693 23614
 31229 26444 31098 22050 34271 26433 26296 34144 34565 20538 26440 23769
 26329 22720 36174 26198 26378 19845 31066 34223 31115 30999 26441 22412
 31134 34181 31062 21874 34333 22773 35614 26304 31153 26223 31092 21709
 23739 20343 34471 31107 20067 30972 23666 26231 31055 37337 26252 26484
 30954 20779 34309 30981 34224 26416 20824 31057 26

  0%|          | 0/3 [00:00<?, ?it/s]

INFO:root:Using 5000 samples for log-evidence estimation ...
INFO:root:... done!
INFO:root:px reweight
INFO:root:Step 3: Compute scales from original batches


torch.Size([1, 313])
torch.Size([10016, 313])
log_pz -3.0313475131988525 -3.0313475131988525
log_px_zs -46648.1796875 -65.73119354248047
log_px -4979.9716796875 -69.54814910888672
log_qz_x -51.01958084106445 5.226874351501465
log_probas -43.41456985473633 3.7201807498931885
tensor([1])


INFO:root:ESS: 315.2610172636633
INFO:root:Filtering observations: Keeping (271,) cells from original 302 sample size
INFO:root:Using 37 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[20710 36927 35983 37115 23519 37079 20624 28563 37781 20500 31663 36056
 31583 36221 33978 21618 20420 33995 31570 20821 25882 35322 31717 31571
 23433 36638 36497 20010 28547 21353 28609 22556 25916 21033 23290 28456
 25999 28528 28479 23232 37867 28756 19799 38189 20679 23275 34084 35763
 31637 31792 19953 36123 21305 23338 37207 28483 20558 31588 31668 37235
 25942 31485 22157 25960 20669 21315 28489 31689 36220 28718 33963 35133
 35427 23277 36587 35630 23419 36974 23414 26040 20526 37270 35159 23395
 38084 37076 36302 23418 36069 32045 37833 20030 19804 37818 26081 23340
 26071 22532 35295 36351 25962 36375 35100 32046 23398 21843 31825 31653
 31775 33926 20103 37284 36152 33868 33900 31519 36575 22513 37067 35471
 23259 34040 31618 23423 28532 31748 28597 20023 20778 23530 31791 36655
 20705 19810 22468 21444 35705 28522 31501 36548 25877 25854 34047 28730
 28516 23438 26023 37775 20582 22350 31484 22269 37486 34000 22361 38022
 20482 22168 34058 37723 28596 31499 25965 33897 35

  0%|          | 0/3 [00:00<?, ?it/s]

INFO:root:Using 5000 samples for log-evidence estimation ...
INFO:root:... done!
INFO:root:px reweight
INFO:root:Step 3: Compute scales from original batches


torch.Size([1, 271])
torch.Size([10027, 271])
log_pz -3.0313475131988525 -3.0313475131988525
log_px_zs -131966.65625 -76.67637634277344
log_px -5602.65478515625 -81.04261016845703
log_qz_x -72.33055114746094 5.020333290100098
log_probas -106.39749908447266 5.17855167388916
tensor([1])


INFO:root:ESS: 57.445251463119625


Data inferred offsets: 0.00011642104509519414 0.00011261690087849277
using offset: 0.00011642104509519414


INFO:root:LFC MEAN in batch 1: 0.06202050298452377
INFO:root:Filtering observations: Keeping (282,) cells from original 314 sample size
INFO:root:Using 36 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[53032 44732 42311 53181 49731 49640 53017 42170 42122 40508 44759 54376
 44953 49525 42176 39383 49744 42273 52790 42277 42160 56781 44904 55758
 52828 52833 55045 52929 42240 44989 39594 45040 45033 53123 39793 49717
 44810 53067 49645 41056 53055 49541 55000 49490 55821 49562 40085 49569
 41344 44882 55844 41019 44875 44857 49687 52798 44954 53045 53107 44924
 53071 49482 42317 44862 55530 49755 40100 53001 52878 42208 40079 49539
 53046 53143 44912 45063 40912 55098 53044 45066 49600 39400 41238 50829
 54292 38980 44870 52877 44696 52855 44982 52775 39685 52764 39741 52812
 56311 49659 49590 53113 44739 44853 49736 53085 55939 49553 39645 44804
 45053 49779 49766 49686 53053 44864 52927 55771 44987 39507 49797 56824
 49712 42294 56831 53087 53127 44911 54097 49688 44835 53051 42195 49588
 40697 54821 42194 54527 44838 42199 49614 53163 42104 42224 52757 50839
 41240 44794 49521 44706 39255 54395 53135 49596 49761 49594 53134 44807
 40628 49572 44909 52970 53137 49656 44920 41265 42

  0%|          | 0/3 [00:00<?, ?it/s]

INFO:root:Using 5000 samples for log-evidence estimation ...
INFO:root:... done!
INFO:root:px reweight
INFO:root:Step 3: Compute scales from original batches


torch.Size([1, 282])
torch.Size([10152, 282])
log_pz -3.0313475131988525 -3.0313475131988525
log_px_zs -2904.72314453125 -54.787071228027344
log_px -1759.9412841796875 -57.643863677978516
log_qz_x -49.389915466308594 5.041563987731934
log_probas -40.6209602355957 3.0841004848480225
tensor([2])


INFO:root:ESS: 428.171922853499
INFO:root:Filtering observations: Keeping (315,) cells from original 351 sample size
INFO:root:Using 32 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[42002 40550 50409 41155 43586 38515 53738 41997 54537 44442 55930 54193
 54512 47058 41969 54028 38209 44553 54655 53766 41980 41783 53710 50365
 38338 43644 53715 56497 41929 54151 39360 50033 40793 41243 46963 54247
 55061 55597 39951 44575 41991 39115 55753 47039 52648 41233 41841 44425
 55565 55781 56073 54288 47078 52585 52685 53980 55074 43602 53864 55010
 54432 40571 50366 54075 38304 44466 47162 50045 46223 47583 40631 39496
 52739 44448 39224 55439 56350 56231 39064 52707 39070 54964 39162 40125
 50226 50083 38900 39797 52625 47169 50216 50158 38930 39806 47598 55802
 47284 52673 50392 54551 39467 53234 56120 40475 43592 47063 54098 41793
 41990 42013 39066 41977 44495 50202 38315 53889 53912 38285 40803 38232
 41973 41694 39962 44629 38621 42004 38282 47018 38804 47152 44571 38757
 50466 41150 41752 44586 56923 38613 44641 40918 50344 47031 54665 39233
 47574 44582 56230 56812 53676 56330 41143 42030 41807 47538 52598 55774
 53933 41927 50255 40222 39280 56580 38437 40788 54

  0%|          | 0/3 [00:00<?, ?it/s]

INFO:root:Using 5000 samples for log-evidence estimation ...
INFO:root:... done!
INFO:root:px reweight
INFO:root:Step 3: Compute scales from original batches


torch.Size([1, 315])
torch.Size([10080, 315])
log_pz -3.0313475131988525 -3.0313475131988525
log_px_zs -5455.365234375 -63.97245788574219
log_px -1494.616455078125 -68.3093490600586
log_qz_x -61.27598571777344 4.889876365661621
log_probas -50.762939453125 3.23616623878479
tensor([2])


INFO:root:ESS: 524.9149247587695


Data inferred offsets: 0.00012702996609732504 0.00012161921476945285
using offset: 0.00012702996609732504


INFO:root:LFC MEAN in batch 2: 0.05255758762359619
INFO:root:DELTA VALUE: 0.5946075769187634
INFO:root:Using mode coefficient 0.6
INFO:root:Pop A 1000 & Pop B 418
INFO:root:Using mode separation
INFO:root:Filtering observations: Keeping (310,) cells from original 345 sample size
INFO:root:Using 49 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[12179  2573  4130 12224 18117  7072 16789 18866  2289  3896  2525  6767
  2217  7088 11977  6970 16408  2453  2469 12146 12300  2504  4024 19223
  7000  6846 17649  6950 12267  6737 14425  6867  4088  2520  2164  3953
  6803  2208 18895 19109  6967  3967 18543  6949 15816  6868  3910  7043
  2283 14534 15529 16988 18863 12168 17041 17388 11993  2522 16983  2412
  4142 12296  3887 12014 14015 13872 14832 12276  2260  2448 18874 14026
  2203 19333  2446 15630  4010  6971  4164  6787  4089  2329 12077  4154
 18050 15459  4053  6711  2338 12007  7030  7035 17458 13426  6698 12049
  6945 18529 18255  4012  6991  6890  2130 10197  2104  6974 16949 18272
  6822  2553 19026  7025 13597 12088 18595  7068  6898  6731  6996  4078
  2116  3987 11996  2555  7060  6932  2570 11979  7024 12180 12087 19261
  4067 16425  2498  6814  4119 12258  2529  2397  4157 17401 19280  4054
  6779  6795 17042  4065  4052 12046  4019  6994  6882 12051 18967  6885
  6734  4140  6727 13629 14511 12064  6998  2367  2

  0%|          | 0/3 [00:00<?, ?it/s]

INFO:root:Using 5000 samples for log-evidence estimation ...
INFO:root:... done!
INFO:root:px reweight
INFO:root:Step 3: Compute scales from original batches


torch.Size([1, 310])
torch.Size([15190, 310])
log_pz -24.35669708251953 -9.59765625
log_px_zs -2741.902587890625 -58.411231994628906
log_px -1206.744873046875 -63.990501403808594
log_qz_x -201.1815643310547 1.1331943273544312
log_probas -46.67830276489258 3.089967727661133
tensor([0])


INFO:root:ESS: 1320.2887491944102
INFO:root:Filtering observations: Keeping (188,) cells from original 209 sample size
INFO:root:Using 80 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[ 2577  2578  2579  2580  2581  2583  2584  2585  2586  2587  2588  2589
  2590  2592  2593  2595  2596  2597  2598  2599  2601  2603  2606  2608
  2609  2612  2613  2615  2616  2618  2619  2620  2621  2622  2623  2624
  2625  3482  3578  3634  3702  6632  6633  6634  6635  6636  6637  6638
  6639  6640  6641  6642  6643  6644  6645  6646  6647  6648  6649  6650
  6651  6652  6653  6654  6655  6656  6657  6658  6659  6660  6661  6662
  6663  6664  6665  6666  6667  6668  6669  6670  6671  6672  6673  6674
  6675  6676  6677  6678  6679  6680  6681  6682  9434  9872  9873  9874
  9875  9876  9877  9878  9879  9880  9881  9882  9883  9884  9885  9886
  9887  9888  9889  9890  9891  9892  9893  9894  9895  9896  9897  9898
  9900  9901  9902  9904  9905 12855 13114 13115 13116 13117 13118 13119
 13120 13122 13123 13124 13125 13126 13127 13128 13129 13130 13131 13132
 13133 13134 13135 13136 13137 13138 13139 13140 16642 16758 16838 16855
 16981 17016 17092 17162 17352 17495 17574 17707 17

  0%|          | 0/2 [00:00<?, ?it/s]

INFO:root:Using 5000 samples for log-evidence estimation ...
INFO:root:... done!
INFO:root:px reweight
INFO:root:Step 3: Compute scales from original batches


torch.Size([1, 188])
torch.Size([15040, 188])
log_pz -20.689619064331055 -9.973600387573242
log_px_zs -3953.451171875 -245.77999877929688
log_px -2201.472900390625 -253.85336303710938
log_qz_x -360.96136474609375 7.456825256347656
log_probas -95.55873107910156 7.375105857849121
tensor([0])


INFO:root:ESS: 28.061671909763742


Data inferred offsets: 0.00029265944613143834 7.374584820354357e-05
using offset: 0.00029265944613143834


INFO:root:LFC MEAN in batch 0: 0.05613591521978378
INFO:root:Filtering observations: Keeping (301,) cells from original 335 sample size
INFO:root:Using 50 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[31072 23760 23875 21441 26208 34170 23722 23745 30956 21972 37487 37959
 26528 35359 20657 26413 36912 34257 22082 30978 26502 26472 34483 31197
 23621 35222 20184 26382 21237 31177 31028 37704 31044 23896 23753 35266
 30940 22400 26303 31162 31058 37303 21928 20430 35450 34369 35907 36752
 21617 20507 36585 22601 23757 31101 26215 30987 26384 22484 23687 21104
 23654 31071 19949 23793 22042 37749 21563 37912 26261 34212 30941 31076
 34235 36831 38043 34316 34339 34446 23717 36799 37949 34520 34325 23790
 23671 26341 21674 23733 34538 23846 23818 34158 23792 26510 20240 34288
 23798 26246 22717 34221 38024 20164 22440 34519 34114 34413 21322 31237
 32200 34457 26177 20241 26262 31073 34498 20015 31037 26226 34514 34150
 34199 37353 35555 34303 38065 35686 31191 26326 23824 35473 20311 34508
 22235 21027 34191 23713 23895 34240 35788 23781 26209 34273 26500 26419
 23617 21582 34125 26355 31160 26358 26200 37171 37549 22278 31034 23791
 35891 31106 31263 34367 23613 34473 35985 26428 26

  0%|          | 0/3 [00:00<?, ?it/s]

INFO:root:Using 5000 samples for log-evidence estimation ...
INFO:root:... done!
INFO:root:px reweight
INFO:root:Step 3: Compute scales from original batches


torch.Size([1, 301])
torch.Size([15050, 301])
log_pz -23.90810775756836 -9.805625915527344
log_px_zs -2764.24169921875 -53.67127227783203
log_px -1176.7852783203125 -59.56808090209961
log_qz_x -230.19171142578125 2.150341272354126
log_probas -50.74142837524414 3.250265121459961
tensor([1])


INFO:root:ESS: 780.3577456047083
INFO:root:Filtering observations: Keeping (188,) cells from original 209 sample size
INFO:root:Using 80 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[19818 20014 20031 20157 20268 20338 20671 20750 20883 20924 20975 21002
 21057 21068 21092 21153 21205 21233 21299 21438 21675 21766 21848 21941
 22138 22929 22946 22958 23220 23316 23372 23440 26102 26103 26104 26105
 26106 26107 26108 26109 26110 26111 26112 26113 26114 26115 26116 26117
 26118 26119 26120 26121 26122 26123 26124 26125 26126 26127 26128 26129
 26130 26131 26132 26133 26134 26135 26136 26137 26138 26139 26140 26141
 26142 26143 26144 26145 26146 26147 26148 26149 26150 26151 26152 28650
 29017 29018 29019 29020 29021 29022 29023 29024 29025 29026 29027 29028
 29029 29030 29031 29032 29033 29034 29035 29036 29037 29038 29039 29040
 29041 29042 29043 29045 29046 29047 29049 29050 30949 31000 31081 31249
 31795 31956 31957 31958 31959 31960 31961 31962 31963 31964 31965 31966
 31967 31968 31969 31970 31971 31972 31973 31974 31975 31976 31977 31978
 31979 31980 31981 31982 34571 34572 34574 34575 34577 34579 34580 34581
 34582 34583 34584 34586 34587 34588 34589 34590 34

  0%|          | 0/2 [00:00<?, ?it/s]

INFO:root:Using 5000 samples for log-evidence estimation ...
INFO:root:... done!
INFO:root:px reweight
INFO:root:Step 3: Compute scales from original batches


torch.Size([1, 188])
torch.Size([15040, 188])
log_pz -22.469736099243164 -9.723311424255371
log_px_zs -3599.49853515625 -246.34259033203125
log_px -2200.01123046875 -254.01400756835938
log_qz_x -398.6534423828125 7.381208419799805
log_probas -59.585811614990234 6.484046936035156
tensor([1])


INFO:root:ESS: 75.46073028833


Data inferred offsets: 0.00026089064776897437 9.756525905686432e-05
using offset: 0.00026089064776897437


INFO:root:LFC MEAN in batch 1: 0.06758348643779755
INFO:root:DELTA VALUE: 0.09476598442240906
INFO:root:Using mode coefficient 0.6
INFO:root:Pop A 1000 & Pop B 418
INFO:root:Using mode separation
INFO:root:Filtering observations: Keeping (288,) cells from original 320 sample size
INFO:root:Using 53 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[ 4057 17461 16099  3878 17517  2225  6945  2414 15166  4150 12014 17254
  7040  6790  2285  4068 14511  2145 15657 15350 12290  3998  4156 15501
 13389 18656  7026 12306  2498 12244  2531  4070  2296 15722  2143 18806
 16926  2544 18176  7068  2315  4144  6941  4025  2211 15527 11994  2108
 14114  2172 18967  2300 12093 12081  2557 19031  2420 12322  2191 17388
 17227 12099 13479  7002  4019  2179  7012  2182 11983  6839 15751 17819
 16942  2457 12005  6867 12121  4091  6737 12223  6729 12118 19608  2216
  6980 18498  2573  6942  6924 16965  2256 13489  4148 15209  6731  7080
  4132  3937 12067  2329 13495  2542 15033  3909  3986 14508  2576 12009
 12101 12145  2238 12051 13939  2283 19540 12138  7032 19282 12260  2378
  7086  2380  2146  6832 12038 12042  6777 17854  2549 17443 17792  3979
 12044  2408  3920  6815 12318  4022 16655  4043 19067 12185 18023 14456
 12315  2316 18796  6862  2223 18976 19011  6842 16339  6916  6878 13545
  7061 14980  3968  3948  2219  7056 13386 19541  2

  0%|          | 0/3 [00:00<?, ?it/s]

INFO:root:Using 5000 samples for log-evidence estimation ...
INFO:root:... done!
INFO:root:px reweight
INFO:root:Step 3: Compute scales from original batches


torch.Size([1, 288])
torch.Size([15264, 288])
log_pz -3.0313475131988525 -3.0313475131988525
log_px_zs -41278.77734375 -60.00580596923828
log_px -1762.1282958984375 -63.5880241394043
log_qz_x -58.42041015625 4.987053871154785
log_probas -31.82977867126465 3.1711857318878174
tensor([0])


INFO:root:ESS: 575.0784791943859
INFO:root:Filtering observations: Keeping (188,) cells from original 209 sample size
INFO:root:Using 80 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[ 2578  2579  2580  2581  2583  2584  2585  2586  2587  2588  2589  2592
  2593  2594  2595  2596  2597  2598  2599  2601  2603  2606  2608  2609
  2613  2615  2616  2617  2618  2619  2620  2622  2623  2624  2625  3482
  3578  3634  3702  6632  6633  6634  6635  6636  6637  6638  6639  6640
  6641  6642  6643  6644  6645  6646  6647  6648  6649  6650  6651  6652
  6653  6654  6655  6656  6657  6658  6659  6660  6661  6662  6663  6664
  6665  6666  6667  6668  6669  6670  6671  6672  6673  6674  6675  6676
  6677  6678  6679  6680  6681  6682  9434  9872  9873  9874  9875  9876
  9877  9878  9879  9880  9881  9882  9883  9884  9885  9886  9887  9888
  9889  9890  9891  9892  9893  9894  9895  9896  9897  9898  9900  9901
  9902  9904  9905 10149 12008 12059 12140 12148 12308 12855 13114 13115
 13116 13117 13118 13119 13120 13121 13122 13123 13124 13125 13126 13127
 13128 13129 13130 13131 13132 13133 13134 13135 13136 13137 13138 13139
 13140 16758 16838 16855 16981 17016 17092 17162 17

  0%|          | 0/2 [00:00<?, ?it/s]

INFO:root:Using 5000 samples for log-evidence estimation ...
INFO:root:... done!
INFO:root:px reweight
INFO:root:Step 3: Compute scales from original batches


torch.Size([1, 188])
torch.Size([15040, 188])
log_pz -3.0313475131988525 -3.0313475131988525
log_px_zs -7201.71533203125 -246.86956787109375
log_px -2299.905517578125 -254.2324676513672
log_qz_x -50.73216247558594 5.218379020690918
log_probas -214.2942657470703 5.558601379394531
tensor([0])


INFO:root:ESS: 128.45796969780534


Data inferred offsets: 0.00012745041749440134 8.366366964764891e-05
using offset: 0.00012745041749440134


INFO:root:LFC MEAN in batch 0: 0.10406170040369034
INFO:root:Filtering observations: Keeping (303,) cells from original 337 sample size
INFO:root:Using 50 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[23827 30939 34565 31101 21972 34422 26252 34209 20173 23613 30982 19965
 26347 23881 30976 19904 31188 23755 26236 34173 36801 31228 34240 26297
 34536 21832 20051 26435 31102 36906 34219 21490 34179 20982 31246 31019
 31032 23790 31013 20789 26170 34263 20430 20347 23897 37813 34356 26196
 35994 26558 20141 26294 23742 34121 20703 31037 23629 26470 26498 26556
 26233 37072 35215 34567 36492 34292 35172 21761 26553 30947 34528 20637
 20812 36980 34365 22832 26471 34249 23648 31133 22412 20241 26516 26185
 26463 31238 34448 30998 26409 31040 31066 30970 34411 34396 34097 34143
 37405 26530 37343 23875 26223 23730 23829 31043 34399 34144 38163 31156
 20605 35907 26464 21567 26429 23706 34523 26296 26520 23721 34223 30930
 23850 26210 20276 26264 31193 31126 26430 23870 31220 26366 20881 34525
 34108 35432 34535 34169 35118 37847 38076 34136 26510 34323 34093 34374
 35960 36078 34301 34478 35132 22719 26351 21713 23752 26238 36190 31241
 26324 30996 19801 35980 26261 35759 30933 26370 34

  0%|          | 0/3 [00:00<?, ?it/s]

INFO:root:Using 5000 samples for log-evidence estimation ...
INFO:root:... done!
INFO:root:px reweight
INFO:root:Step 3: Compute scales from original batches


torch.Size([1, 303])
torch.Size([15150, 303])
log_pz -3.0313475131988525 -3.0313475131988525
log_px_zs -4080.48193359375 -74.63328552246094
log_px -1593.4442138671875 -78.0679702758789
log_qz_x -57.79295349121094 5.024168968200684
log_probas -35.14990997314453 3.987196207046509
tensor([1])


INFO:root:ESS: 405.7830520843283
INFO:root:Filtering observations: Keeping (188,) cells from original 209 sample size
INFO:root:Using 80 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[19934 20014 20031 20157 20192 20268 20338 20528 20671 20750 20883 20924
 20975 21002 21057 21068 21098 21153 21205 21233 21299 21438 21661 21766
 21790 21848 21941 22138 22565 22929 22946 23220 23316 23372 23440 26102
 26103 26104 26105 26106 26107 26108 26109 26110 26111 26112 26113 26114
 26115 26116 26117 26118 26119 26120 26121 26122 26123 26124 26125 26126
 26127 26128 26129 26130 26131 26132 26133 26134 26135 26136 26137 26138
 26139 26140 26141 26142 26143 26144 26145 26146 26147 26148 26149 26150
 26151 26152 28650 29017 29018 29019 29020 29021 29022 29023 29024 29025
 29026 29027 29028 29029 29030 29031 29032 29033 29034 29035 29036 29037
 29038 29039 29040 29041 29042 29043 29045 29046 29047 29049 29050 29283
 30949 31000 31081 31089 31249 31795 31956 31957 31958 31959 31960 31961
 31962 31963 31964 31965 31966 31967 31968 31969 31970 31971 31972 31973
 31974 31975 31976 31977 31978 31979 31980 31981 31982 34572 34573 34574
 34575 34577 34578 34579 34580 34581 34582 34583 34

  0%|          | 0/2 [00:00<?, ?it/s]

INFO:root:Using 5000 samples for log-evidence estimation ...
INFO:root:... done!
INFO:root:px reweight
INFO:root:Step 3: Compute scales from original batches


torch.Size([1, 188])
torch.Size([15040, 188])
log_pz -3.0313475131988525 -3.0313475131988525
log_px_zs -9021.240234375 -247.9725341796875
log_px -2299.039306640625 -254.80380249023438
log_qz_x -48.68255615234375 5.43459415435791
log_probas -266.3778076171875 7.2132110595703125
tensor([1])


INFO:root:ESS: 36.63599798856177


Data inferred offsets: 0.00013124934048391879 8.419648511335253e-05
using offset: 0.00013124934048391879


INFO:root:LFC MEAN in batch 1: 0.10823715478181839
INFO:root:DELTA VALUE: 0.4160331746114834
INFO:root:Using mode coefficient 0.6
INFO:root:Pop A 1000 & Pop B 306
INFO:root:Using mode separation
INFO:root:Filtering observations: Keeping (294,) cells from original 327 sample size
INFO:root:Using 35 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[ 2203  6949 19558 18648 19447 12269  6864 17400 12283 14570 16982  6962
 11976  2424  2385 17481  4125  2413  4132  6971  4053 19704 17041  2559
 14116  6940 12310 15657 19264 12138  3985 12320 17021 15993 15191 18168
  3890 12208  2219  7080 12219 12229  2519 17245 12083 12021 16645 18508
 12282 14433 17395  4159  6859 18967 13949  4088 18089 12216 13388 13381
 12128 19333 12052 12288  6755  2461 18595  2117 15898 16448 16099 15802
  6997 13654 12322  2359 12258  4101  2220  7009  4091  6917  6704  6945
  2291  4001 13451  2487 12104 18752  2215  4096 18314  6705  3993 19353
 14163  2355  6774 14465  7083  6813  2509 16997  7067 17770  2530  2179
  6713  6941  2114 13764  4124 17167 12256  6826 13506  7077  6744  2467
 12020 18884  2136 19308 12233  2295 12087 12080 12272  4100  2113  2457
  2470 17517  2241 14189  2160  2397  2217  2183  4114 18023  4070  4024
 19681 17887  6701 16912 12278 18906 17042 19000  6823 14220 15935 17622
  2522  2381  2553 13706 12106 12169  6735 17840  2

  0%|          | 0/3 [00:00<?, ?it/s]

INFO:root:Using 5000 samples for log-evidence estimation ...
INFO:root:... done!
INFO:root:px reweight
INFO:root:Step 3: Compute scales from original batches


torch.Size([1, 294])
torch.Size([10290, 294])
log_pz -24.99059295654297 -9.898506164550781
log_px_zs -4082.760986328125 -71.755859375
log_px -1725.403564453125 -78.61653900146484
log_qz_x -391.15472412109375 5.792505264282227
log_probas -51.568389892578125 3.9127540588378906
tensor([0])


INFO:root:ESS: 212.37276307227356
INFO:root:Filtering observations: Keeping (91,) cells from original 102 sample size
INFO:root:Using 110 posterior samples per cell
INFO:root:Step 1: Getting posterior samples
INFO:root:Step 2: Compute overall importance weights


[ 3194  3196  3197  3198  3199  3200  3201  3202  3203  3204  3205  3206
  3207  3208  3210  3211  3212  3213  3214  3215  3216  3217  3218  3219
  3220  3221  3850  3851  3852  3853  3854  3855  3856  3857  3858  3859
  3860  3861  3862  3863  3864  3865  3866  3867  3868  3869  3870  3871
  3872  3873  3874  3875  6708  6709  6769  6831  6894  6957  6979  7039
  7045  7055  9899  9903 13924 14099 14146 14329 14537 15460 15661 16056
 16063 16782 16793 16868 16925 17049 17241 17521 17765 18256 18571 18642
 18654 18961 19300 19328 19490 19601 19651]
None


  0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
res_qt

In [None]:
res_qt.label_a.unique()