In [None]:
import numpy as np
import os
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.express as px
import plotly as py
import pandas as pd
from chart_studio.plotly import plot, iplot

# from plotly.offline import init_notebook_mode, iplot
from tqdm import tqdm_notebook

from scvi.dataset import PbmcDataset
from scvi.models import VAE, IAVAE
from scvi.inference import UnsupervisedTrainer
from scvi.utils import (
    demultiply,
    make_dir_if_necessary,
    predict_de_genes,
    save_fig,
    save_pickle,
    load_pickle,
    compute_hdi
)
from scvi_utils import (
    estimate_de_proba,
    estimate_lfc_density,
    estimate_lfc_mean,
    train_model,
    multi_train_estimates,
)
from R_interop import all_predictions, all_de_predictions


N_EPOCHS = 200
DELTA = 0.5
# SIZES = [5, 10, 20, 30, 50, 100]
MODE = "cloud"
SIZE = 100
SIZES = [SIZE]
N_SIZES = len(SIZES)

Q0 = 5e-2
N_TRAININGS = 5
N_PICKS = 10

np.random.seed(42)

PATH_TO_SCRIPTS = "/home/ubuntu/conquer_comparison/scripts"
DIR_PATH = "lfc_estimates/pbmc"
make_dir_if_necessary(DIR_PATH)

label_a = 0
label_b = 4

In [None]:
import chart_studio.plotly as py

py.sign_in("pierreboyeau", "2wvdnWZ2Qut1zD07ADVy")

# Import Dataset

In [None]:
dataset = PbmcDataset()

unique_elements, counts_elements = np.unique(
    dataset.labels.squeeze(), return_counts=True
)

df = pd.DataFrame(dict(counts=counts_elements, cell_types=dataset.cell_types))
fig = px.scatter(df, y="counts", x="cell_types")
fig.show()
n_genes = dataset.nb_genes

In [None]:
print("Cell types: ", dataset.cell_types)
print('Gene names: ', dataset.gene_names)

microarray_info = dataset.de_metadata.set_index('ENSG')
microarray_info = microarray_info.loc[dataset.gene_names]

display(dataset.de_metadata.head())
print(dataset.de_metadata.info())

In [None]:
n_examples = len(dataset)
labels = dataset.labels.squeeze()
# interesting_indices = np.where((labels == label_a) | (labels == label_b))[0]
# TEST_INDICES = np.random.permutation(interesting_indices)[:800]
TEST_INDICES = np.random.permutation(len(dataset))[:3000]

x_test, y_test = dataset.X[TEST_INDICES, :], dataset.labels[TEST_INDICES, :].squeeze()

data_path = os.path.join(DIR_PATH, 'data.npy')
labels_path = os.path.join(DIR_PATH, 'labels.npy')

np.save(
    data_path,
    np.array(x_test.todense()).squeeze().astype(int)
)
np.savetxt(
    labels_path,
    y_test.squeeze()
)

In [None]:
x_all, y_all = dataset.X, dataset.labels.squeeze()

data_all_path = os.path.join(DIR_PATH, 'data_all.npy')
labels_all_path = os.path.join(DIR_PATH, 'labels_all.npy')

np.save(
    data_all_path,
    np.array(x_all.todense()).squeeze().astype(int)
)
np.savetxt(
    labels_all_path,
    y_all.squeeze()
)

## Train parameters

In [None]:
EARLY_STOPPING_KWARGS = {
    "early_stopping_metric": "elbo_ratio_loss",
    "save_best_state_metric": "elbo_ratio_loss",
    "patience": 20,
    "threshold": 0,
    "reduce_lr_on_plateau": True,
    "lr_patience": 10,
    "lr_factor": 0.2,
}

In [None]:
mdl_params = dict(
    iaf=dict(n_hidden=128, n_layers=1, do_h=True, n_latent=10, t=4),
    mf=dict(n_hidden=128, n_layers=1, n_latent=10),
    iaf_k5=dict(n_hidden=128, n_layers=1, do_h=True, n_latent=10, t=4),
    mf_k5=dict(n_hidden=128, n_layers=1, n_latent=10),
)
train_params = dict(
    iaf=dict(ratio_loss=True, test_indices=TEST_INDICES),
    mf=dict(ratio_loss=True, test_indices=TEST_INDICES),
    base=dict(
        ratio_loss=True,
#         test_indices=TEST_INDICES,
#         frequency=1,
#         early_stopping_kwargs=EARLY_STOPPING_KWARGS,
    ),
    iaf_k5=dict(ratio_loss=True, test_indices=TEST_INDICES, k_importance_weighted=5),
    mf_k5=dict(ratio_loss=True, test_indices=TEST_INDICES, k_importance_weighted=5),
)
train_fn_params = dict(
    iaf=dict(n_epochs=N_EPOCHS, lr=1e-2),
    mf=dict(n_epochs=N_EPOCHS, lr=1e-2),
    base=dict(n_epochs=N_EPOCHS, lr=1e-2),
    iaf_k5=dict(n_epochs=N_EPOCHS, lr=1e-2),
    mf_k5=dict(n_epochs=N_EPOCHS, lr=1e-2),
)

# Competitors


In [None]:
SIZE

In [None]:
other_predictions = all_predictions(
    filename=os.path.join(DIR_PATH, "all_predictions_all_data2.pickle"),
    n_genes=n_genes,
    n_picks=N_PICKS,
    sizes=[SIZE],
#     data_path=data_path,
#     labels_path=labels_path,
    data_path=data_all_path,
    labels_path=labels_all_path,
    path_to_scripts=PATH_TO_SCRIPTS,
    label_a=label_a,
    label_b=label_b,
    all_nature=False
)

other_predictions = all_de_predictions(
    other_predictions, significance_level=Q0, delta=DELTA
)

In [None]:
other_predictions["mast"] = mast_res

In [None]:
other_predictions.keys()

In [None]:
# Just in case MAST does not work

import rpy2.robjects as ro
import rpy2.robjects.numpy2ri
import warnings
import numpy as np
from rpy2.rinterface import RRuntimeWarning
import scipy.sparse
import pandas as pd


class MAST(object):
    def __init__(self, A, B, data, labels, cluster):
        """
        A: number of cells in the first cluster
        B: number of cells in the second cluster
        data: dataset to look at
        labels: clusters
        cluster: list that tells which cluster to test ex. (0, 4)
        """
        self.A = A
        self.B = B
        self.data = data
        self.labels = labels
        self.cluster = cluster
        warnings.filterwarnings("ignore", category=RRuntimeWarning)
        rpy2.robjects.numpy2ri.activate()
        ro.r["library"]("RcppCNPy")
        ro.r["library"]("MAST")
        ro.r["library"]("BiocParallel")
        ro.r("BiocParallel::register(BiocParallel::MulticoreParam())")
        
        self.X_train = np.load(self.data)
        self.c_train = np.loadtxt(self.labels)
        
        # loading data
        ro.r(str("""fmat <- npyLoad("*""")[:-1] + self.data + str("""*", "integer")""")[1:])
        ro.r(str("""cmat <- read.table("*""")[:-1] + self.labels + str("""*")""")[1:])
        ro.r("cmat$V2 <- factor(cmat$V1)")

    def fit(self, return_fc=False):
        # computing data mask
        set_a = np.where(self.c_train == self.cluster[0])[0]
        subset_a = np.random.choice(set_a, self.A)
        set_b = np.where(self.c_train == self.cluster[1])[0]
        subset_b = np.random.choice(set_b, self.B)

        stochastic_set = np.hstack((subset_a, subset_b))

        # Mask 1D True False
        f = np.array([a in stochastic_set for a in np.arange(self.X_train.shape[0])])

        nr, nc = f[:, np.newaxis].shape
        f_r = ro.r.matrix(f[:, np.newaxis], nrow=nr, ncol=nc)
        ro.r.assign("f_", f_r)
        ro.r("f <- as.integer(rownames(cmat[f_,]))")

        ro.r("local_fmat <- log2(fmat[f, ] + 1)")
        ro.r("local_cmat <- cmat[f, ]")
        ro.r("local_cmat$V3 <- factor(local_cmat$V1)")

        ro.r("sca <- FromMatrix(t(data.frame(local_fmat)), data.frame(local_cmat$V3))")
        ro.r("zlmCond <- zlm(~local_cmat.V3, sca)")
        ro.r("""summaryCond <- summary(zlmCond, doLRT='local_cmat.V34')""")
        ro.r("summaryDt <- summaryCond$datatable")
        ro.r("""fcHurdle <- merge(
                summaryDt[contrast=='local_cmat.V34' & component=='H',.(primerid, `Pr(>Chisq)`)],
                    #hurdle P values
                summaryDt[contrast=='local_cmat.V34' & component=='logFC', .(primerid, coef, ci.hi, ci.lo)],
                by='primerid') #logFC coefficients""")
        # data = pd.DataFrame([ro.r("fcHurdle$primerid"), ro.r("""fcHurdle$'Pr(>Chisq)'"""), ro.r("fcHurdle$coef")]).T
        # data.columns = ["gene_index", "p_value", "coeff"]
        # # data["gene_index"] = data["gene_index"].apply(lambda x: int(str(x)[1:]))
        # data.sort_values("gene_index", inplace=True)

        index = [int(elem[1:]) for elem in list(ro.r("fcHurdle$primerid"))]
        p_value = list(ro.r("""fcHurdle$'Pr(>Chisq)'"""))
        coeff = list(ro.r("fcHurdle$coef"))
        data = pd.DataFrame(dict(pval=p_value, lfc=coeff), index=index).sort_index()
        return data


all_nature = False

lfcs_mast = np.zeros((1, N_PICKS, n_genes))
var_lfcs_mast = np.zeros((1, N_PICKS, n_genes))
pvals_mast = np.zeros((1, N_PICKS, n_genes))
for (size_ix, size) in enumerate(tqdm([SIZE])):
    for exp in range(N_PICKS):
        if all_nature:
            mast_inference = NMASTcpm(
                A=size,
                B=size,
                data=data_all_path,
                labels=labels_all_path,
                normalized_means=normalized_means,
                delta=DELTA,
                cluster=(0, 4),
                path_to_scripts=PATH_TO_SCRIPTS,
            )
            res_df = mast_inference.fit()
            print(res_df.info())
            var_lfcs_mast[size_ix, exp, :] = res_df["varLogFC"].values
            lfcs_mast[size_ix, exp, :] = res_df["logFC"].values

        else:
            mast_inference = MAST(
                A=size,
                B=size,
                data=data_all_path,
                labels=labels_all_path,
                cluster=(0, 4),
            )
            res_df = mast_inference.fit(return_fc=True)
            lfcs_mast[size_ix, exp, :] = res_df["lfc"].values
        pvals_mast[size_ix, exp, :] = res_df["pval"].values
mast_res = dict(
    lfc=lfcs_mast.squeeze(), pval=pvals_mast.squeeze(), var_lfc=var_lfcs_mast
)

# Experiments

In [None]:
def train_or_load(filepath, my_mdl_class, my_mdl_params, my_train_params, my_train_fn_params):
    if os.path.exists(filepath):
        tup = load_pickle(filepath)
    else:
        tup = train_model(
            mdl_class=my_mdl_class,
            dataset=dataset,
            mdl_params=my_mdl_params,
            train_params=my_train_params,
            train_fn_params=my_train_fn_params,
        )
        save_pickle(tup, filepath)
    return tup

In [None]:
for i in range(5):
    mdl_iaf, trainer_iaf = train_or_load(
        os.path.join(DIR_PATH, "iaf_mdl_{}final1.pickle".format(i)),
        IAVAE,
        mdl_params["iaf"],
        train_params["base"],
        train_fn_params["base"],
    )

    mdl_mf, trainer_mf = train_or_load(
        os.path.join(DIR_PATH, "mf_mdl_{}final1.pickle".format(i)),
        VAE,
        mdl_params["mf"],
        train_params["base"],
        train_fn_params["base"],
    )

mdl_iaf.cuda()
mdl_mf.cuda()
print()

## Microarray systematic comparison

In [None]:
other_predictions["mast"]["lfc"].shape

In [None]:
lfcs_gt = - microarray_info.BDC_logFC

In [None]:
import seaborn as sns
sns.set()

plt.scatter(lfcs_gt, -other_predictions["mast"]["lfc"][0], label="mast")
plt.scatter(lfcs_gt, other_predictions["deseq2"]["lfc"][0], label="deseq2")
plt.scatter(lfcs_gt, other_predictions["edger"]["lfc"][0], label="edger")

plt.legend()

In [None]:
other_predictions["mast"]["lfc"][np.isnan(other_predictions["mast"]["lfc"])] = 0.0

In [None]:
from sklearn.metrics import r2_score
import statsmodels.api as sm

def get_r2(preds, gt):
#     y = other_predictions["deseq2"]["lfc"][0]
#     X = lfcs_gt
#     X = sm.add_constant(preds)
    model = sm.OLS(gt, preds).fit()
    return model.rsquared


r2_mast = np.array([get_r2(-pred, lfcs_gt) for pred in other_predictions["mast"]["lfc"]])
r2_deseq2 = np.array([get_r2(pred, lfcs_gt) for pred in other_predictions["deseq2"]["lfc"]])
r2_edger = np.array([get_r2(pred, lfcs_gt) for pred in other_predictions["edger"]["lfc"]])

In [None]:
print(r2_mast.mean())
print(r2_deseq2.mean())
print(r2_edger.mean())

In [None]:
def subsampled_posterior(post, indices):
    post.data_loader.sampler.indices = indices
    return post

def compute_lfc(my_trainer, my_idx_a, my_idx_b, n_samples=1000, importance_sampling=False):
    post_a = subsampled_posterior(my_trainer.train_set, my_idx_a)
    outputs_a = post_a.get_latents(n_samples=n_samples, other=True, device="cpu")
    scales_a, weights_a = outputs_a["scale"], outputs_a["log_probas"]
    scales_a = scales_a.reshape((-1, dataset.nb_genes)).numpy()

    post_b = subsampled_posterior(my_trainer.train_set, my_idx_b)
    outputs_b = post_b.get_latents(n_samples=n_samples, other=True, device="cpu")
    scales_b, weights_b = outputs_b["scale"], outputs_b["log_probas"]
    scales_b = scales_b.reshape((-1, dataset.nb_genes)).numpy()

    if importance_sampling:
        weights_a = softmax(weights_a.reshape((-1)))
        weights_b = softmax(weights_b.reshape((-1)))
    else:
        weights_a = None
        weights_b = None
    scales_a, scales_b = demultiply(
        arr1=scales_a, arr2=scales_b, factor=3, weights_a=weights_a, weights_b=weights_b
    )

    lfc = np.log2(scales_a) - np.log2(scales_b)
    return lfc

In [None]:
from tqdm import tqdm

# means_mf = []
# means_iaf = []
# medians_mf = []
# medians_iaf = []


for i in range(1, 5):
    print(i)
    idx_a = np.where(y_all==0)[0]
    idx_b = np.where(y_all==4)[0]
    idx_a = np.random.choice(idx_a, 100)
    idx_b = np.random.choice(idx_b, 100)
    
    mdl_iaf, trainer_iaf = train_or_load(
        os.path.join(DIR_PATH, "iaf_mdl_{}final1.pickle".format(i)),
        IAVAE,
        mdl_params["iaf"],
        train_params["base"],
        train_fn_params["base"],
    )
    mdl_mf, trainer_mf = train_or_load(
        os.path.join(DIR_PATH, "mf_mdl_{}final1.pickle".format(i)),
        VAE,
        mdl_params["mf"],
        train_params["base"],
        train_fn_params["base"],
    )
    for _ in tqdm(range(N_PICKS)):
        lfc_iaf = compute_lfc(trainer_iaf, idx_a, idx_b, n_samples=500)
        lfc_mf = compute_lfc(trainer_mf, idx_a, idx_b, n_samples=500)

        means_mf.append(lfc_mf.mean(0))
        means_iaf.append(lfc_iaf.mean(0))
        medians_mf.append(np.median(lfc_mf, 0))
        medians_iaf.append(np.median(lfc_iaf, 0))

save_pickle(means_mf, os.path.join(DIR_PATH, "means_mf.pickle"))
save_pickle(means_iaf, os.path.join(DIR_PATH, "means_iaf.pickle"))
save_pickle(medians_mf, os.path.join(DIR_PATH, "medians_mf.pickle"))
save_pickle(medians_iaf, os.path.join(DIR_PATH, "medians_iaf.pickle"))

In [None]:
means_mf = np.array(load_pickle(os.path.join(DIR_PATH, "means_mf.pickle")))
means_iaf = np.array(load_pickle(os.path.join(DIR_PATH, "means_iaf.pickle")))
medians_mf = np.array(load_pickle(os.path.join(DIR_PATH, "medians_mf.pickle")))
medians_iaf = np.array(load_pickle(os.path.join(DIR_PATH, "medians_iaf.pickle")))

In [None]:
r2_mf = np.array([get_r2(pred, lfcs_gt) for pred in medians_mf])
r2_iaf = np.array([get_r2(pred, lfcs_gt) for pred in medians_iaf])

print("mf", r2_mf.mean())
print("iaf", r2_iaf.mean())

In [None]:
from scvi.utils import has_lower_mean

has_lower_mean(r2_iaf, r2_mf)

In [None]:
print(
    pd.Series(
        [
            r2_deseq2.mean(),
            r2_mast.mean(),
            r2_edger.mean(),
            r2_mf.mean(),
            r2_iaf.mean(),
        ],
        index=["DESeq2", "MAST", "edgeR", "MF", "IAF"],
    )
    .to_frame("RSquared")
    .T.round(3)
    .applymap(lambda x: "$ {} $".format(x))
    .to_latex(escape=False)
)

In [None]:
def get_coef(preds, gt):
    model = sm.OLS(gt, preds).fit()
    return model.params.x1

coef_mf = np.array([get_coef(pred, lfcs_gt) for pred in medians_mf])
coef_iaf = np.array([get_coef(pred, lfcs_gt) for pred in medians_iaf])
coef_mast = np.array([get_coef(-pred, lfcs_gt) for pred in other_predictions["mast"]["lfc"]])
coef_deseq2 = np.array([get_coef(pred, lfcs_gt) for pred in other_predictions["deseq2"]["lfc"]])
coef_edger = np.array([get_coef(pred, lfcs_gt) for pred in other_predictions["edger"]["lfc"]])



print(coef_mf.mean())
print(coef_iaf.mean())
print(coef_mast.mean())
print(coef_deseq2.mean())
print(coef_edger.mean())

## Microarray BIS

In [None]:
def subsampled_posterior(post, indices):
    post.data_loader.sampler.indices = indices
    return post

def compute_lfc(my_trainer, my_idx_a, my_idx_b, n_samples=1000, importance_sampling=False):
    post_a = subsampled_posterior(my_trainer.test_set, TEST_INDICES[my_idx_a])
    outputs_a = post_a.get_latents(n_samples=n_samples, other=True, device="cpu")
    scales_a, weights_a = outputs_a["scale"], outputs_a["log_probas"]
    scales_a = scales_a.reshape((-1, dataset.nb_genes)).numpy()

    post_b = subsampled_posterior(my_trainer.test_set, TEST_INDICES[my_idx_b])
    outputs_b = post_b.get_latents(n_samples=n_samples, other=True, device="cpu")
    scales_b, weights_b = outputs_b["scale"], outputs_b["log_probas"]
    scales_b = scales_b.reshape((-1, dataset.nb_genes)).numpy()

    if importance_sampling:
        weights_a = softmax(weights_a.reshape((-1)))
        weights_b = softmax(weights_b.reshape((-1)))
    else:
        weights_a = None
        weights_b = None
    scales_a, scales_b = demultiply(
        arr1=scales_a, arr2=scales_b, factor=3, weights_a=weights_a, weights_b=weights_b
    )

    lfc = np.log2(scales_a) - np.log2(scales_b)
    return lfc

In [None]:
# 0 / 4

In [None]:
idx_a = np.where(y_test == 0)[0][:70]
idx_b = np.where(y_test == 4)[0][:70]
# idx_a = np.where(y_test == 0)[0][:70]
# idx_b = np.where(y_test == 4)[0][:70]

In [None]:
lfc_mf = compute_lfc(trainer_mf, idx_a, idx_b, n_samples=500)
lfc_iaf = compute_lfc(trainer_iaf, idx_a, idx_b, n_samples=500)

In [None]:
lfcs_gt = - microarray_info.BDC_logFC
# lfcs_gt = - microarray_info.CD_logFC

In [None]:
random_genes = np.random.permutation(n_genes)[:500]
lfc_ground_truth = lfcs_gt[random_genes]
mean_mf = lfc_mf.mean(0)[random_genes]
mean_iaf = lfc_iaf.mean(0)[random_genes]
# hdis_mf = compute_hdi(lfc_mf, credible_interval=0.95)[random_genes]
# hdis_iaf = compute_hdi(lfc_iaf, credible_interval=0.95)[random_genes]


fig = go.Figure()
trace_mf = go.Scatter(
    x=lfc_ground_truth,
    y=mean_mf,
    mode="markers",
#     error_y=dict(
#         type="data",
#         symmetric=False,
#         array=hdis_mf[:, 1] - mean_mf,
#         arrayminus=mean_mf - hdis_mf[:, 0],
#     ),
)

trace_iaf = go.Scatter(
    x=lfc_ground_truth,
    y=mean_iaf,
    mode="markers",
#     error_y=dict(
#         type="data",
#         symmetric=False,
#         array=hdis_iaf[:, 1] - mean_iaf,
#         arrayminus=mean_iaf - hdis_iaf[:, 0],
#     ),
)
trace_gt = go.Scatter(
    x=[-3, 3],
    y=[-3, 3],
    mode="lines",
    line=dict(color="black", width=4, dash="dash"),
    showlegend=False,
)
fig.add_traces([trace_mf, trace_iaf, trace_gt])
fig.show()

In [None]:
mast_predictions = other_predictions["mast"]
lfcs_mast = -mast_predictions["lfc"]

lfcs_mast[np.isnan(lfcs_mast)] = 0.0

lfcs_deseq2 = other_predictions["deseq2"]["lfc"]
lfcs_deseq2[np.isnan(lfcs_deseq2)] = 0.0

lfcs_edger = -other_predictions["edger"]["lfc"]
lfcs_edger[np.isnan(lfcs_edger)] = 0.0

In [None]:
h_a = x_test[idx_a].mean(axis=0)
h_b = x_test[idx_b].mean(axis=0)
lfc_baseline = np.array(np.log2(h_a) - np.log2(h_b))
lfc_baseline = np.clip(lfc_baseline, a_min=-5, a_max=5).squeeze()
lfc_baseline[np.isnan(lfc_baseline)] = 0.0

In [None]:
import statsmodels.api as sm
mdls = [
    (-lfc_mf.mean(0), "MF"),
    (-lfc_iaf.mean(0), "IAF"),
#     (-lfcs_mast[-1, :], "MAST"),
#     (-lfcs_deseq2[-1, :], "DESeq2"),
#     (lfcs_edger[-1, :], "EdgeR"),
#     (lfc_baseline, "Baseline"),
]




reg_results = dict()
for preds, name in mdls:
    y = preds
    X = lfcs_gt
#     X = sm.add_constant(X)

    model = sm.OLS(y, X).fit()
    display(name, model.summary())
    reg_results[name] = dict(rsquared=model.rsquared_adj, 
#                              coef=model.params.BDC_logFC
                             coef=model.params.CD_logFC
                            )

## Microarray

In [None]:
from sklearn.preprocessing import StandardScaler

new_n_genes = 110
std_scaler = StandardScaler(with_mean=False)
std_scaler.fit(dataset.X.astype(np.float64))
subset_genes = np.argsort(std_scaler.var_)[::-1][:new_n_genes]

# subset_genes = np.arange(n_genes)

### BDT : size 100

In [None]:
def bdt_densities(
    filename, mdl_class, dataset, mdl_params, train_params, train_fn_params, sz=SIZE
):
    res = estimate_lfc_density(
        filename=filename,
        mdl_class=mdl_class,
        dataset=dataset,
        mdl_params=mdl_params,
        train_params=train_params,
        train_fn_params=train_fn_params,
        sizes=[sz],
        n_picks=N_PICKS,
        label_a=0,
        label_b=4,
        n_samples=100
    )[sz].squeeze()
    return res


lfcs_mf = bdt_densities(
    filename=os.path.join(DIR_PATH, "bdt100MF_new2.pickle"),
    mdl_class=VAE,
    dataset=dataset,
    mdl_params=mdl_params["mf"],
    train_params=train_params["mf"],
    train_fn_params=train_fn_params["mf"],
)

lfcs_ia = bdt_densities(
    filename=os.path.join(DIR_PATH, "bdt100IAF_new2.pickle"),
    mdl_class=IAVAE,
    dataset=dataset,
    mdl_params=mdl_params["iaf"],
    train_params=train_params["iaf"],
    train_fn_params=train_fn_params["iaf"],
)

# lfcs_iwia = estimate_lfc_density(
#     IAVAE,
#     dataset=dataset,
#     mdl_params=mdl_params["iaf_k5"],
#     train_params=train_params["iaf_k5"],
#     train_fn_params=train_fn_params["iaf_k5"],
#     sizes=[SIZE],
#     n_picks=1,
#     label_a=label_a,
#     label_b=label_b
# )[SIZE].squeeze()

# lfcs_iwmf = estimate_lfc_density(
#     IAVAE,
#     dataset=dataset,
#     mdl_params=mdl_params["mf_k5"],
#     train_params=train_params["mf_k5"],
#     train_fn_params=train_fn_params["mf_k5"],
#     sizes=[SIZE],
#     n_picks=1,
#     label_a=label_a,
#     label_b=label_b
# )[SIZE].squeeze()

In [None]:
lfcs_ia_100_all = lfcs_ia.reshape((-1, n_genes))
lfcs_mf_100_all = lfcs_mf.reshape((-1, n_genes))

In [None]:
from plotly.subplots import make_subplots

lfcs_mf_est = lfcs_ia.reshape((-1, n_genes))[:, subset_genes]
lfcs_ia_est = lfcs_mf.reshape((-1, n_genes))[:, subset_genes]
lfcs_mf_est_100 = lfcs_mf_est.copy()
lfcs_ia_est_100 = lfcs_ia_est.copy()
lfcs_gt = - microarray_info.BDC_logFC[subset_genes]

print(lfcs_mf_est.shape)
print(lfcs_ia_est.shape)
print(lfcs_gt.shape)

fig = make_subplots(
    rows=1,
    cols=2,
    subplot_titles=("Mean Field", "Inverse Autoregressive Flows"),
    shared_xaxes=True,
    shared_yaxes=True,
)


def add_plot(fig, lfcs_est_m, lfcs_est_err, row, col):
    fig.add_trace(
        go.Scatter(
            x=lfcs_gt,
            y=lfcs_est_m,
            error_y=dict(type="data", array=lfcs_est_err, visible=True),
            mode="markers",
        ),
        row=row,
        col=col,
    )
    return


add_plot(fig, lfcs_mf_est.mean(0), 2.0*lfcs_mf_est.std(0), row=1, col=1)
fig.add_trace(
    go.Scatter(
        x=[-5, 5],
        y=[-5, 5],
        mode="lines",
        line=dict(color="black", width=4, dash="dash"),
    ),
    row=1,
    col=1,
)
add_plot(fig, lfcs_ia_est.mean(0), 2.0*lfcs_ia_est.std(0), row=1, col=2)
fig.add_trace(
    go.Scatter(
        x=[-3, 3],
        y=[-3, 3],
        mode="lines",
        line=dict(color="black", width=4, dash="dash"),
    ),
    row=1,
    col=2,
)

fig.update_xaxes(title_text="Ground Truth LFC", row=1, col=1)
# fig.update_xaxes(title_text="Ground Truth LFC", row=2, col=1)
fig.update_yaxes(title_text="Predicted LFC", row=1, col=1)
# fig.update_yaxes(title_text="Predicted LFC", row=2, col=1)


fig.update_layout(
    height=600, width=1000, title_text="LFC estimation for {} sample cells B cells/DT cells".format(SIZE)
)
# iplot(fig, filename="pbmc_microarray_lfc_with_uncertainty_{}cells_BDT".format(SIZE), sharing="private")
fig.show()

### BDT Other techniques

In [None]:
lfcs_gt = -microarray_info.BDC_logFC

#### Computations

In [None]:
where_a = np.where(y_test == label_a)[0]
where_b = np.where(y_test == label_b)[0]
idx_a = np.random.permutation(where_a)[:100]
idx_b = np.random.permutation(where_b)[:100]

h_a = x_test[idx_a].mean(axis=0)
h_b = x_test[idx_b].mean(axis=0)
lfc_baseline = np.array(np.log2(h_a) - np.log2(h_b))
lfc_baseline = np.clip(lfc_baseline, a_min=-5, a_max=5).squeeze()
lfc_baseline[np.isnan(lfc_baseline)] = 0.0

In [None]:
mast_predictions = other_predictions["mast"]
lfcs_mast = -mast_predictions["lfc"]
stds_mast = np.sqrt(mast_predictions["var_lfc"].squeeze())

lfcs_mast[np.isnan(lfcs_mast)] = 0.0
stds_mast[np.isnan(stds_mast)] = 0.0

lfcs_deseq2 = other_predictions["deseq2"]["lfc"]
lfcs_deseq2[np.isnan(lfcs_deseq2)] = 0.0

lfcs_edger = -other_predictions["edger"]["lfc"]
lfcs_edger[np.isnan(lfcs_edger)] = 0.0

In [None]:
import statsmodels.api as sm
lfcs_mf_100_all
mdls = [
    (-lfcs_mf_100_all.mean(0), "MF"),
    (-lfcs_ia_100_all.mean(0), "IAF"),
    (-lfcs_mast[-1, :], "MAST"),
    (-lfcs_deseq2[-1, :], "DESeq2"),
    (lfcs_edger[-1, :], "EdgeR"),
    (lfc_baseline, "Baseline"),
]


reg_results = dict()
for preds, name in mdls:
    y = preds
    X = microarray_info.BDC_logFC
    X = sm.add_constant(X)

    model = sm.OLS(y, X).fit()
    display(name, model.summary())
    reg_results[name] = dict(rsquared=model.rsquared_adj, coef=model.params.BDC_logFC)

#### Plots

In [None]:
reg_results.keys()

In [None]:
"{0:.2f}, {1:.2f}".format(1000.01022, 0.0100020)

In [None]:
layout = go.Layout(title_text="LFC point Predictions")
fig = go.Figure(layout=layout)

fig.add_traces(
    [
        go.Scatter(
            x=lfcs_gt[subset_genes],
            y=lfcs_mf_est.mean(0),
            #             error_y=dict(type="data", array=2.0*stds_mast, visible=True),
            mode="markers",
            name="MF @R^2 : {0:.2f}, Slope: {1:.2f}".format(
                reg_results["MF"]["rsquared"], reg_results["MF"]["coef"]
            ),
        ),
        go.Scatter(
            x=lfcs_gt[subset_genes],
            y=lfcs_deseq2[-1, subset_genes],
            mode="markers",
            name="DESeq2 @R^2 : {0:.2f}, Slope: {1:.2f}".format(
                reg_results["DESeq2"]["rsquared"], reg_results["DESeq2"]["coef"]
            ),
        ),
        go.Scatter(
            x=lfcs_gt[subset_genes],
            y=lfc_baseline,
            mode="markers",
            name="Baseline @R^2 : {0:.2f}, Slope: {1:.2f}".format(
                reg_results["Baseline"]["rsquared"], reg_results["Baseline"]["coef"]
            ),
        ),
        go.Scatter(
            x=[-5, 5],
            y=[-5, 5],
            mode="lines",
            line=dict(color="black", width=4, dash="dash"),
            name="Reference",
            showlegend=False
        ),
    ]
)


#         go.Scatter(
#             x=lfcs_gt[subset_genes],
#             y=lfcs_edger[-1, subset_genes],
#             mode="markers",
#             name="EdgeR",
#             text=[
#                 "R^2 : {0:.2f}, Slope: {1:.2f}".format(
#                     reg_results["EdgeR"]["rsquared"], reg_results["EdgeR"]["coef"]
#                 )
#             ],
#         ),

#         go.Scatter(
#             x=lfcs_gt[subset_genes],
#             y=lfcs_mast[-1, subset_genes],
#             #             error_y=dict(type="data", array=2.0*stds_mast, visible=True),
#             mode="markers",
#             name="MAST",
#             text=[
#                 "R^2 : {0:.2f}, Slope: {1:.2f}".format(
#                     reg_results["MAST"]["rsquared"], reg_results["MAST"]["coef"]
#                 )
#             ],
#         ),

fig.show()
# iplot(fig, filename="pbmc_microarray_diags", sharing="private")

**When you take all genes into account, scVI clearly better predicts LFC than its competitors**

## Overlap

Voronoi Graph AKA Venn Graph

In [None]:
de_probas_mf = estimate_de_proba(
    filename=os.path.join(DIR_PATH, "de_probas_mfNEW.npy"),
    mdl_class=VAE,
    dataset=dataset,
    mdl_params=mdl_params["mf"],
    train_params=train_params["mf"],
    train_fn_params=train_fn_params["mf"],
    sizes=[100],
    n_trainings=1,
    n_picks=1,
    label_a=label_a,
    n_samples=300,
    label_b=label_b
).squeeze()

de_probas_iaf = estimate_de_proba(
    filename=os.path.join(DIR_PATH, "de_probas_iafNEW.npy"),
    mdl_class=IAVAE,
    dataset=dataset,
    mdl_params=mdl_params["iaf"],
    train_params=train_params["iaf"],
    train_fn_params=train_fn_params["iaf"],
    sizes=[100],
    n_trainings=1,
    delta=0.5,
    n_picks=1,
    label_a=label_a,
    n_samples=300,
    label_b=label_b
).squeeze()

In [None]:
is_pred_de_mf = predict_de_genes(de_probas_mf, desired_fdr=Q0)
is_pred_de_iaf = predict_de_genes(de_probas_iaf, desired_fdr=Q0)

is_pred_de_mf = de_probas_mf >= 0.5
is_pred_de_iaf = de_probas_iaf >= 0.5

In [None]:
print(other_predictions["deseq2"]["pval"].shape, other_predictions["deseq2"]["lfc"].shape)

In [None]:
is_pred_deseq2 = other_predictions["deseq2"]["is_de"][0]
is_pred_edger = other_predictions["edger"]["is_de"][0]
is_pred_mast = other_predictions["mast"]["is_de"][0]

In [None]:
plt.hist(de_probas_iaf)

In [None]:
from matplotlib_venn import venn3, venn3_circles


labels = np.arange(n_genes)
de_genes_scvi = set(labels[is_pred_de_mf])
de_genes_scvi_iaf = set(labels[is_pred_de_iaf])

de_genes_deseq2 = set(labels[is_pred_deseq2])
de_genes_edger = set(labels[is_pred_edger])
de_genes_mast = set(labels[is_pred_mast])

# venn_diagram = venn3(subsets=[de_genes_scvi, de_genes_deseq2, de_genes_mast])
# plt.show()
venn_diagram = venn3(
    subsets=[de_genes_scvi_iaf, de_genes_mast, de_genes_edger],
    set_labels=["IAF", "MAST", "EdgeR"],
)
plt.show()

In [None]:
x0 = 3*[0]
x1 = 3*[0]
y0 = 3*[0]
y1 = 3*[0]

labels = []
colors = ["red", "green", "blue"]

for i in range(3):
    r = venn_diagram.radii[i]
    cx, cy = venn_diagram.centers[i]
    x0[i] = cx - r
    x1[i] = cx + r
    y0[i] = cy - r
    y1[i] = cy + r

labels_x, labels_y, labels_text = [], [], []
for annotation in venn_diagram.set_labels:
    x, y = annotation.get_position()
    text = annotation.get_text()  
    labels_x.append(x)
    labels_y.append(y)
    labels_text.append(text)
    
ann_x, ann_y, ann_text = [], [], []
for annotation in venn_diagram.subset_labels:
    x, y = annotation.get_position()
    text = annotation.get_text()  
    ann_x.append(x)
    ann_y.append(y)
    ann_text.append(text)

In [None]:
fig = go.Figure()

shapes = []
for i in range(3):
    shape = go.layout.Shape(
        type="circle",
        xref="x",
        yref="y",
        x0=x0[i],
        y0=y0[i],
        x1=x1[i],
        y1=y1[i],
        fillcolor=colors[i],
        line_color=colors[i],
        opacity=0.3,
    )
    shapes.append(shape)


trace_subsets = go.Scatter(x=ann_x, y=ann_y, text=ann_text, mode="text", showlegend=False)
trace_sets = go.Scatter(x=labels_x, y=labels_y, text=labels_text, mode="text", showlegend=False)
fig.add_traces([trace_subsets, trace_sets])
fig.update_layout(
    shapes=shapes,
    xaxis=dict(showgrid=False, zeroline=False),
    yaxis=dict(scaleanchor="x", scaleratio=1, showgrid=False, zeroline=False),
    width=800,
    height=800,
)
fig.show()
iplot(fig, filename="pbmc_venn", sharing="private")

## FDR and FNR (sanity check)

In [None]:
# def get_fnr_fdr(y_true, y_pred):
#     return dict(
#         fnr=(y_true * (~y_pred)).sum() / (y_true).sum(),
#         fdr=((~y_true) * (y_pred)).sum() / (y_pred).sum(),
#     )


# res_df = pd.DataFrame(
#     dict(
#         MF=get_fnr_fdr(is_pred_de_mf, is_significant_de),
#         DESeq2=get_fnr_fdr(is_pred_deseq2, is_significant_de),
#         EdgeR=get_fnr_fdr(is_pred_edger, is_significant_de),
#         MAST=get_fnr_fdr(is_pred_mast, is_significant_de),
#     )
# ).T

# res_df.plot.bar()

## PR Curves (sanity check)

In [None]:
# from sklearn.metrics import precision_recall_curve

# preds_mf = de_probas_mf
# preds_iaf = de_probas_iaf
# preds_deseq2 = -other_predictions['deseq2']['pval'][0, :]
# preds_edger = -other_predictions['edger']['pval'][0, :]
# preds_mast = -other_predictions['mast']['pval'][0, :]

In [None]:
# dataset.de_metadata.info()

In [None]:
# is_significant_de = (dataset.de_metadata["BDC_adj.P.Val"] <= Q0) 
# * (dataset.de_metadata["BDC_logFC"].abs() >= DELTA)

In [None]:
# from sklearn.metrics import precision_recall_curve, average_precision_score

# def plot_pr(fig, preds, y_true, name):
#     average_precision = average_precision_score(y_true, preds)
#     preds[np.isnan(preds)] = np.min(preds[~np.isnan(preds)])
#     precs, recs, _ = precision_recall_curve(y_true=y_true, probas_pred=preds)
#     fig.add_trace(
#         go.Scatter(
#             x=recs,
#             y=precs,
#             name=name+'@AP: {0:0.2f}'.format(average_precision)
#         )
#     )
#     return
# layout = go.Layout(
#     title='Precision Recall Curves',
#     xaxis=dict(title='Recall'),
#     yaxis=dict(title='Precision'),
#     width=800,
#     height=600,
# )
# fig = go.Figure(layout=layout)
# plot_pr(fig=fig, preds=preds_mf, y_true=is_significant_de, name='MF')
# plot_pr(fig=fig, preds=preds_iaf, y_true=is_significant_de, name='IAF')
# plot_pr(fig=fig, preds=preds_deseq2, y_true=is_significant_de, name='DESeq2')
# plot_pr(fig=fig, preds=preds_edger, y_true=is_significant_de, name='EdgeR')
# plot_pr(fig=fig, preds=preds_mast, y_true=is_significant_de, name='MAST')

# iplot(fig, filename="pbmc_microarray_pr_curves", sharing="private")

## LFC VS Means

In [None]:
mdl_mf, trainer_mf = train_model(
    mdl_class=VAE,
    dataset=dataset,
    mdl_params=mdl_params["mf"],
    train_params=train_params["base"],
    train_fn_params=train_fn_params["base"],
)

mdl_iaf, trainer_iaf = train_model(
    mdl_class=IAVAE,
    dataset=dataset,
    mdl_params=mdl_params["iaf"],
    train_params=train_params["base"],
    train_fn_params=train_fn_params["base"],
)

In [None]:
def subsampled_posterior(post, indices):
    post.data_loader.sampler.indices = indices
    return post


def sample_random_indices(sz):
    where_a = np.where(y_test == 0)[0]
    where_b = np.where(y_test == 1)[0]
    idx_a = np.random.choice(where_a, size=sz)
    idx_b = np.random.choice(where_b, size=sz)
    return idx_a, idx_b


def compute_lfc(my_trainer, my_idx_a, my_idx_b, n_samples=1000, importance_sampling=False):
    post_a = subsampled_posterior(my_trainer.test_set, TEST_INDICES[my_idx_a])
    outputs_a = post_a.get_latents(n_samples=n_samples, other=True, device="cpu")
    scales_a, weights_a = outputs_a["scale"], outputs_a["log_probas"]
    scales_a = scales_a.reshape((-1, dataset.nb_genes)).numpy()

    post_b = subsampled_posterior(my_trainer.test_set, TEST_INDICES[my_idx_b])
    outputs_b = post_b.get_latents(n_samples=n_samples, other=True, device="cpu")
    scales_b, weights_b = outputs_b["scale"], outputs_b["log_probas"]
    scales_b = scales_b.reshape((-1, dataset.nb_genes)).numpy()

    if importance_sampling:
        weights_a = softmax(weights_a.reshape((-1)))
        weights_b = softmax(weights_b.reshape((-1)))
    else:
        weights_a = None
        weights_b = None
    scales_a, scales_b = demultiply(
        arr1=scales_a, arr2=scales_b, factor=3, weights_a=weights_a, weights_b=weights_b
    )

    lfc = np.log2(scales_a) - np.log2(scales_b)
    return lfc


In [None]:
sz = 50

random_genes = np.random.permutation(n_genes)[:100]
idx_a, idx_b = sample_random_indices(sz)
lfc_mf = compute_lfc(trainer_mf, idx_a, idx_b, n_samples=500)
lfc_iaf = compute_lfc(trainer_iaf, idx_a, idx_b, n_samples=500)

In [None]:
pop_a_indices = np.where(dataset.labels == 0)[0]

In [None]:
mean_mf = lfc_mf.mean(0)
mean_iaf = lfc_iaf.mean(0)
hdis_mf = compute_hdi(lfc_mf, credible_interval=0.95)
hdis_iaf = compute_hdi(lfc_iaf, credible_interval=0.95)
means = np.array(dataset.X[pop_a_indices].mean(0)).squeeze()

In [None]:
is_de_mf = ((np.abs(lfc_mf) >= 0.5).mean(0) >= .5).astype(int)
is_de_iaf = ((np.abs(lfc_iaf) >= 0.5).mean(0) >= .5).astype(int)

In [None]:
print(means.shape, mean_mf.shape, is_de_mf.shape)

In [None]:
fig = go.Figure()
trace = go.Scatter(
    x=means,
    y=mean_mf,
    mode="markers",
    marker_color=is_de_mf
)
trace_gt0 = go.Scatter(
    x=[0, 100],
    y=[0.5, 0.5],
    mode="lines",
    line=dict(color="red", width=4),
    showlegend=False,
)
trace_gt1 = go.Scatter(
    x=[0, 100],
    y=[-0.5, -0.5],
    mode="lines",
    line=dict(color="red", width=4),
    showlegend=False,
)
fig.add_traces([trace, trace_gt0, trace_gt1])
fig.update_layout(xaxis_type="log", xaxis_title="")
fig.show()

In [None]:
fig = go.Figure()
trace = go.Scatter(
    x=means,
    y=mean_iaf,
    mode="markers",
    marker_color=is_de_iaf
)
trace_gt0 = go.Scatter(
    x=[0, 100],
    y=[0.5, 0.5],
    mode="lines",
    line=dict(color="red", width=4),
    showlegend=False,
)
trace_gt1 = go.Scatter(
    x=[0, 100],
    y=[-0.5, -0.5],
    mode="lines",
    line=dict(color="red", width=4),
    showlegend=False,
)
fig.add_traces([trace, trace_gt0, trace_gt1])
fig.update_layout(
    xaxis=dict(type="log", title="Posterior Mean"),
    yaxis=dict(type="")
)
fig.show()

In [None]:
fig = go.Figure()
trace_mf = go.Scatter(
    x=means[random_genes],
    y=mean_mf[random_genes],
    mode="markers",
    error_y=dict(
        type="data",
        symmetric=False,
        array=(hdis_mf[:, 1] - mean_mf)[random_genes],
        arrayminus=(mean_mf - hdis_mf[:, 0])[random_genes],
    ),
)

trace_iaf = go.Scatter(
    x=means[random_genes],
    y=mean_iaf[random_genes],
    mode="markers",
    error_y=dict(
        type="data",
        symmetric=False,
        array=(hdis_iaf[:, 1] - mean_iaf)[random_genes],
        arrayminus=(mean_iaf - hdis_iaf[:, 0])[random_genes],
    ),
)

fig.add_traces([trace_mf, trace_iaf])
fig.update_layout(xaxis_type="log")

fig.show()

##  Predictions comparison

In [None]:
is_pred_deseq2 = other_predictions["deseq2"]["is_de"][0]
is_pred_edger = other_predictions["edger"]["is_de"][0]
is_pred_mast = other_predictions["mast"]["is_de"][0]

In [None]:
is_pred_mf = de_probas_mf >= 0.5
is_pred_iaf = de_probas_iaf >= 0.5

In [None]:
from matplotlib_venn import venn3

In [None]:
is_pred_deseq2.shape

In [None]:
genes = np.arange(n_genes)

In [None]:
venn_diagram = venn3(
    subsets=[
        set(genes[is_pred_mast]),
        set(genes[is_pred_deseq2]),
        set(genes[is_pred_mf]),
    ],
    set_labels=["MAST", "DESeq2", "scVI"],
)

## Concordance

In [None]:
from sklearn.metrics import precision_recall_curve

preds_mf = de_probas_mf
preds_iaf = de_probas_iaf
preds_deseq2 = -other_predictions['deseq2']['pval'][0, :]
preds_edger = -other_predictions['edger']['pval'][0, :]
preds_mast = -other_predictions['mast']['pval'][0, :]

In [None]:
print(preds_mf.shape)
print(preds_iaf.shape)
print(preds_deseq2.shape)
print(preds_edger.shape)
print(preds_mast.shape)

### K best

In [None]:
K = 100

def get_K_best(preds):
    sorted_best =  np.argsort(-preds) # From highest to lowest scores
    k_best = sorted_best[:K]
    return k_best

best_mf = get_K_best(preds_mf)
best_iaf = get_K_best(preds_iaf)
best_deseq2 = get_K_best(preds_deseq2)
best_edger = get_K_best(preds_edger)
best_mast = get_K_best(preds_mast)

def get_aucc_couple(best1, best2, k_val=K):
    k_vals = np.arange(1, k_val)
    concordances = []
    for k in k_vals:
        common_genes = len(np.intersect1d(best1[:k], best2[:k]))
        concordances.append(common_genes)
    concordances = np.array(concordances)
    aucc = concordances.sum() / (k_val*k_val/2)
    return aucc

print(np.arange(K).sum() / (K*K/2))  # Ensure normalization OK

concs_mat = np.eye(5)
methods = [
    best_mf,
    best_iaf,
    best_deseq2,
    best_edger,
    best_mast,
]
labels = [
    "MF",
    "IAF",
    "DESeq2",
    "EdgeR",
    "MAST",
]
for (idx_a, method_a) in enumerate(tqdm_notebook(methods)):
    for (idx_b, method_b) in enumerate(methods):
        if idx_a == idx_b:
            continue
        elif idx_b <= idx_a:
            continue
        aucc = get_aucc_couple(method_a, method_b)
        concs_mat[idx_a, idx_b] = aucc
        concs_mat[idx_b, idx_a] = aucc

In [None]:
# import plotly.figure_factory as ff


# ff.create_dendrogram(X=concs_mat, labels=labels)

In [None]:
# X = np.random.rand(10, 1)
# names = ['Jack', 'Oxana', 'John', 'Chelsea', 'Mark', 'Alice', 'Charlie', 'Rob', 'Lisa', 'Lily']
# fig = ff.create_dendrogram(X, orientation='left', labels=names)
# fig.update_layout(width=800, height=800)
# fig.show()

In [None]:
import plotly.figure_factory as ff

fig = ff.create_annotated_heatmap(concs_mat, x=labels, y=labels, colorscale='Viridis', showscale=True)
fig.show()

### Significance

In [None]:
best_mf = np.argsort(-preds_mf)
best_iaf = np.argsort(-preds_iaf)
best_deseq2 = np.argsort(-preds_deseq2)
best_edger = np.argsort(-preds_edger)
best_mast = np.argsort(-preds_mast)

concs_mat = np.eye(5)
methods = [
    best_mf,
    best_iaf,
    best_deseq2,
    best_edger,
    best_mast,
]
labels = [
    "MF",
    "IAF",
    "DESeq2",
    "EdgeR",
    "MAST",
]
for (idx_a, method_a) in enumerate(tqdm_notebook(methods)):
    for (idx_b, method_b) in enumerate(methods):
        if idx_a == idx_b:
            continue
        elif idx_b <= idx_a:
            continue
        aucc = get_aucc_couple(method_a, method_b, k_val=n_genes)
        concs_mat[idx_a, idx_b] = aucc
        concs_mat[idx_b, idx_a] = aucc

In [None]:
import plotly.figure_factory as ff

fig = ff.create_annotated_heatmap(concs_mat, x=labels, y=labels, colorscale='Viridis', showscale=True)
fig.show()