In [None]:
import numpy as np
import os
import torch
import pickle
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.express as px
import plotly as py
import pandas as pd
from chart_studio.plotly import plot, iplot

# from plotly.offline import init_notebook_mode, iplot
from tqdm import tqdm_notebook

from scvi.dataset import (
    PowSimSynthetic,
    LatentLogPoissonDataset,
    SignedGamma,
    GeneExpressionDataset,
)
from scvi.models import VAE, IAVAE
from scvi.inference import UnsupervisedTrainer
from scvi.utils import (
    demultiply,
    make_dir_if_necessary,
    predict_de_genes,
    save_fig,
    load_pickle,
    save_pickle,
    has_lower_mean,
    softmax,
    compute_hdi
)
from scvi_utils import (
    estimate_de_proba,
    estimate_lfc_density,
    estimate_lfc_mean,
    multi_train_estimates,
    train_model
)
from R_interop import all_predictions, all_de_predictions


N_EPOCHS = 200
DELTA = 0.5
SIZES = [5, 10, 20, 30, 50, 100]
SIZE = 100
N_SIZES = len(SIZES)
DO_CLOUD = True
Q0 = 5e-2
N_TRAININGS = 5
N_PICKS = 10
n_genes = 1000
FREQUENCY = 1
np.random.seed(42)
torch.manual_seed(42)

PATH_TO_SCRIPTS = "/home/ubuntu/conquer_comparison/scripts"
DIR_PATH = "lfc_estimates/lognormal2"
DF_PATH = "/home/ubuntu/scVI/scvi/dataset/kolodziejczk_param.csv"
make_dir_if_necessary(DIR_PATH)

# Generate Dataset

In [None]:
    import chart_studio.plotly as py

py.sign_in("pierreboyeau", "2wvdnWZ2Qut1zD07ADVy")

## Constructing mu and sigma

In [None]:
selected = pd.read_csv(DF_PATH).sample(n_genes)
means = selected["means"].values

means[means >= 1000] = 1000
go.Figure([go.Histogram(x=means)]).show()

lfc_sampler = SignedGamma(dim=2, proba_pos=0.5)
lfcs = lfc_sampler.sample(n_genes).numpy()
non_de_genes = np.random.choice(n_genes, size=300)
lfcs[non_de_genes, :] = 0.0
go.Figure([go.Histogram(x=lfcs[:, 0])]).show()

log2_mu0 = lfcs[:, 0] + np.log2(means)
log2_mu1 = lfcs[:, 1] + np.log2(means)

loge_mu0 = log2_mu0 / np.log2(np.e)
loge_mu1 = log2_mu1 / np.log2(np.e)

In [None]:
# DEMO
a = (2.0 * np.random.random(size=(100, 1)) - 1).astype(float)
# sigma = 2.0*a.dot(a.T) + (1.0 + 0.5*(2.0*np.random.random(100)-1.0)) * np.eye(100)
sigma = 0.5 * a.dot(a.T) + (1.0 + 0.5 * (2.0 * np.random.random(100) - 1.0)) * np.eye(
    100
)

fig = go.Figure(data=go.Heatmap(z=sigma))
fig.show()

In [None]:
a = (2.0 * np.random.random(size=(n_genes, 1)) - 1).astype(float)
sigma = 2.0 * a.dot(a.T) + 0.5 * (
    1.0 + 0.5 * (2.0 * np.random.random(n_genes) - 1.0)
) * np.eye(n_genes)
sigma0 = 0.1 * sigma

# sigma = 0.5 *a.dot(a.T) + (1.0 + 0.5*(2.0*np.random.random(n_genes)-1.0)) * np.eye(n_genes)
# sigma0 = 0.05*sigma

a = (2.0 * np.random.random(size=(n_genes, 1)) - 1).astype(float)
sigma = 2.0 * a.dot(a.T) + 0.5 * (
    1.0 + 0.5 * (2.0 * np.random.random(n_genes) - 1.0)
) * np.eye(n_genes)
sigma1 = 0.1 * sigma
sigma1 = sigma0

# sigma1 = sigma

# u, s, vh = np.linalg.svd(sigma)
# perturbations = s.min() + (s.max() - s.min()) * np.random.random(len(s))
# sigma1 = u @ (np.diag(perturbations)) @ vh

In [None]:
h0 = torch.distributions.MultivariateNormal(
    loc=torch.tensor(loge_mu0), covariance_matrix=torch.tensor(sigma0)
).sample((5000,))
h1 = torch.distributions.MultivariateNormal(
    loc=torch.tensor(loge_mu1), covariance_matrix=torch.tensor(sigma1)
).sample((5000,))

h = torch.cat([h0, h1])

In [None]:
x_obs = torch.distributions.Poisson(rate=h.exp()).sample()
# is_zi = np.random.random(x_obs.shape) >= 0.9
is_zi = (np.random.random(x_obs.shape) <= np.exp(-0.5 * x_obs.numpy())) | (
    np.random.random(x_obs.shape) <= 0.4
)
print(is_zi.mean())
x_obs[is_zi] = 0.0
labels = torch.zeros((10000, 1))
labels[5000:] = 1

not_null_cell = x_obs.sum(1) != 0
x_obs = x_obs[not_null_cell]
labels = labels[not_null_cell]

trace1 = go.Histogram(x=x_obs.mean(0))
fig = go.Figure(data=[trace1])
fig.show()

In [None]:
_ = plt.hist(x_obs[:, 500], bins=100)
plt.show()
_ = plt.hist(x_obs[:, 20], bins=100)
plt.show()
_ = plt.hist(x_obs[:, 28], bins=100)
plt.show()
_ = plt.hist(x_obs[:, 100], bins=100)
plt.show()

In [None]:
lfcs0 = lfcs.copy()

In [None]:
# assert False
dataset_path = os.path.join(DIR_PATH, "dataset.pickle")
if not os.path.exists(dataset_path):
    dataset = GeneExpressionDataset()
    dataset.populate_from_data(X=x_obs.numpy(), labels=labels.numpy())
    dataset.lfc = lfcs
    save_pickle(data=dataset, filename=dataset_path)
else:
    dataset = load_pickle(dataset_path)
    lfcs = dataset.lfc

In [None]:
lfcs == lfcs0

In [None]:
h0_bis, h1_bis = demultiply(h0, h1, factor=6)
lfc_orig = h0_bis.exp().log2() - h1_bis.exp().log2()
lfc_gt = lfc_orig.mean(0)
lfc_gt = lfc_gt.numpy()

# is_significant_de = (np.abs(lfc_orig) >= DELTA).numpy().mean(0) >= 0.5
is_significant_de = np.abs(lfcs[:, 0] - lfcs[:, 1]) >= DELTA
n_genes = dataset.nb_genes
trace1 = go.Histogram(x=lfcs[:, 1] - lfcs[:, 0])
fig = go.Figure(data=[trace1])
# save_fig(fig, filename="powsimR_properties", do_cloud=DO_CLOUD)
# fig.show()
iplot(fig, filename="lognormal2_properties")

In [None]:
n_examples = len(dataset)
TEST_INDICES = np.random.permutation(n_examples)[:2000]

x_test, y_test = dataset.X[TEST_INDICES, :], dataset.labels[TEST_INDICES, :].squeeze()
data_path = os.path.join(DIR_PATH, "data.npy")
labels_path = os.path.join(DIR_PATH, "labels.npy")
means_path = os.path.join(DIR_PATH, "means.npy")

np.save(data_path, x_test.squeeze().astype(int))
np.save(means_path, h[TEST_INDICES].exp())
np.savetxt(labels_path, y_test.squeeze())

## Train parameters

In [None]:
EARLY_STOPPING_KWARGS = {
    "early_stopping_metric": "elbo_ratio_loss",
    "save_best_state_metric": "elbo_ratio_loss",
    "patience": 20,
    "threshold": 0,
    "reduce_lr_on_plateau": True,
    "lr_patience": 10,
    "lr_factor": 0.2,
}

In [None]:
mdl_params = dict(
    iaf_b=dict(n_hidden=128, n_layers=2, do_h=False, n_latent=12, t=3, dropout_rate=0.2),
    mf_b=dict(n_hidden=128, n_layers=1, n_latent=5, dropout_rate=0.1),

    
    iaf=dict(n_hidden=128, n_layers=1, do_h=True, n_latent=10, t=4, dropout_rate=0.2),
    iaf_res=dict(
        n_hidden=128,
        n_layers=1,
        do_h=True,
        n_latent=10,
        t=4,
        dropout_rate=0.2,
        n_blocks_encoder=1,
        res_connection_decoder=False,
    ),
    mf=dict(n_hidden=128, n_layers=1, n_latent=10, dropout_rate=0.2),
    mf_skip=dict(
        n_hidden=128,
        n_layers=1,
        n_latent=10,
        dropout_rate=0.2,
        n_blocks=1,
        decoder_do_last_skip=True,
    ),
    mf_skip2=dict(
        n_hidden=128,
        n_layers=1,
        n_latent=10,
        dropout_rate=0.2,
        n_blocks=2,
        decoder_do_last_skip=True,
    ),
    iaf_at=dict(
        n_hidden=128, n_layers=1, do_h=False, n_latent=10, t=2, dropout_rate=0.2
    ),
    mf_at=dict(n_hidden=128, n_layers=1, n_latent=5, dropout_rate=0.1),
    iaf_k5=dict(n_hidden=128, n_layers=1, do_h=True, n_latent=10, t=4),
    iaf_skip=dict(
        n_hidden=128,
        n_layers=1,
        do_h=True,
        n_latent=10,
        t=4,
        dropout_rate=0.2,
        n_blocks=1,
        decoder_do_last_skip=True,
    ),
    iaf_skip2=dict(
        n_hidden=128,
        n_layers=1,
        do_h=True,
        n_latent=10,
        t=4,
        dropout_rate=0.2,
        n_blocks=2,
        decoder_do_last_skip=True,
    ),
    mf_k5=dict(n_hidden=128, n_layers=1, n_latent=10),
)
train_params = dict(
    iaf=dict(
        ratio_loss=True,
        test_indices=TEST_INDICES,
        #         frequency=FREQUENCY,
        #         early_stopping_kwargs=EARLY_STOPPING_KWARGS,
    ),
    base=dict(
        ratio_loss=True,
        test_indices=TEST_INDICES,
        frequency=FREQUENCY,
        early_stopping_kwargs=EARLY_STOPPING_KWARGS,
    ),
    iaf_b=dict(
        ratio_loss=True,
        test_indices=TEST_INDICES,
        frequency=FREQUENCY,
        early_stopping_kwargs=EARLY_STOPPING_KWARGS,
    ),
    mf=dict(
        ratio_loss=True,
        test_indices=TEST_INDICES,
        #         frequency=FREQUENCY,
        #         early_stopping_kwargs=EARLY_STOPPING_KWARGS,
    ),
    iaf_k5=dict(
        ratio_loss=True,
        test_indices=TEST_INDICES,
        k_importance_weighted=5,
        single_backward=False,
        frequency=FREQUENCY,
        early_stopping_kwargs=EARLY_STOPPING_KWARGS,
    ),
    mf_k5=dict(
        ratio_loss=True,
        test_indices=TEST_INDICES,
        k_importance_weighted=5,
        single_backward=False,
        frequency=FREQUENCY,
        early_stopping_kwargs=EARLY_STOPPING_KWARGS,
    ),
)
train_fn_params = dict(
    iaf=dict(n_epochs=N_EPOCHS, lr=1e-2),
    base=dict(n_epochs=600, lr=1e-2),
    iaf_b=dict(n_epochs=N_EPOCHS, lr=1e-2),
    mf=dict(n_epochs=N_EPOCHS, lr=1e-2),
    iaf_k5=dict(n_epochs=N_EPOCHS, lr=1e-2),
    mf_k5=dict(n_epochs=N_EPOCHS, lr=1e-2),
)

# Compute competitors scores

In [None]:
os.listdir(DIR_PATH)

In [None]:
other_predictions = all_predictions(
    filename=os.path.join(DIR_PATH, "other_predictions_double_check.pickle"),
    n_genes=n_genes,
    n_picks=N_PICKS,
    sizes=SIZES,
    data_path=data_path,
    labels_path=labels_path,
    normalized_means=means_path,
    delta=DELTA,
    path_to_scripts=PATH_TO_SCRIPTS,
)

other_predictions = all_de_predictions(
    other_predictions, significance_level=Q0, delta=DELTA
)

Check sign of LFC 

In [None]:
other_predictions["edger"].keys()

In [None]:
other_predictions["edger"]["lfc"].shape

In [None]:
from scvi.utils import plot_identity

# lfc_gt = -(lfcs[:, 1] - lfcs[:, 0])
# lfc_gt = - (h[:, 1] - lfcs0[:, 0])
plt.scatter(lfc_gt, other_predictions["edger"]["lfc"][-1, -1, :])
plot_identity()
plt.show()

plt.scatter(lfc_gt, other_predictions["deseq2"]["lfc"][-1, -1, :])
plot_identity()
plt.show()


plt.scatter(lfc_gt, other_predictions["mast"]["lfc"][-1, -1, :])
plot_identity()
plt.show()

In [None]:
other_predictions["edger"]["lfc"] = -other_predictions["edger"]["lfc"]
other_predictions["mast"]["lfc"] = -other_predictions["mast"]["lfc"]

# Experiments

In [None]:
os.listdir(DIR_PATH)

In [None]:
# TODO: redo experiments, already done for MF (called 2)

In [None]:
res_mf = multi_train_estimates(
    filename=os.path.join(DIR_PATH, "res_mf_final1_high_lr_epochs.pickle"),
#     filename=os.path.join(DIR_PATH, "res_mf_final1_high_lr_epochs3.pickle"),
#         filename=os.path.join(DIR_PATH, "res_mf.pickle"),
    mdl_class=VAE,
    dataset=dataset,
    mdl_params=mdl_params["mf"],
    train_params=train_params["mf"],
    train_fn_params=train_fn_params["mf"],
    sizes=SIZES,
    n_trainings=N_TRAININGS,
    n_picks=N_PICKS,
    n_samples=500,
    label_a=0,
    normalized_means=h.exp(),
    label_b=1,
).assign(algorithm="MF")

# res_mf_skip = multi_train_estimates(
#     filename=os.path.join(DIR_PATH, "res_mf_skip_final1_high_lr_epochs.pickle"),
#     #     filename=os.path.join(DIR_PATH, "res_mf.pickle"),
#     mdl_class=VAE,
#     dataset=dataset,
#     mdl_params=mdl_params["mf_skip"],
#     train_params=train_params["mf"],
#     train_fn_params=train_fn_params["mf"],
#     sizes=SIZES,
#     n_trainings=N_TRAININGS,
#     n_picks=N_PICKS,
#     n_samples=500,
#     label_a=0,
#     label_b=1,
# ).assign(algorithm="MF")

# res_mf_skip2 = multi_train_estimates(
#     filename=os.path.join(DIR_PATH, "res_mf_skip2_final1_high_lr_epochs.pickle"),
#     #     filename=os.path.join(DIR_PATH, "res_mf.pickle"),
#     mdl_class=VAE,
#     dataset=dataset,
#     mdl_params=mdl_params["mf_skip2"],
#     train_params=train_params["mf"],
#     train_fn_params=train_fn_params["mf"],
#     sizes=SIZES,
#     n_trainings=N_TRAININGS,
#     n_picks=N_PICKS,
#     n_samples=500,
#     label_a=0,
#     label_b=1,
# ).assign(algorithm="MF")



res_iaf = multi_train_estimates(
    filename=os.path.join(DIR_PATH, "res_iaf_final1_high_lr_epochs.pickle"),
#     filename=os.path.join(DIR_PATH, "res_iaf_final1_high_lr_epochs3.pickle"),
    mdl_class=IAVAE,
    dataset=dataset,
    mdl_params=mdl_params["iaf"],
    train_params=train_params["iaf"],
    train_fn_params=train_fn_params["iaf"],
    sizes=SIZES,
    n_trainings=N_TRAININGS,
    n_picks=N_PICKS,
    n_samples=500,
    normalized_means=h.exp(),
    label_a=0,
    label_b=1,
).assign(algorithm="IAF")

# res_iafk5 = multi_train_estimates(
#     filename=os.path.join(DIR_PATH, "res_iafk5_final1_high_lr_epochs.pickle"),
#     mdl_class=IAVAE,
#     dataset=dataset,
#     mdl_params=mdl_params["iaf"],
#     train_params=train_params["iaf"],
#     train_fn_params=train_fn_params["iaf"],
#     sizes=SIZES,
#     n_trainings=N_TRAININGS,
#     n_picks=N_PICKS,
#     n_samples=500,
#     label_a=0,
#     label_b=1,
# ).assign(algorithm="IAF K5")

# res_iaf_res = multi_train_estimates(
#     filename=os.path.join(DIR_PATH, "res_iaf_res_at_final1_high_lr_epochs.pickle"),
#     mdl_class=IAVAE,
#     dataset=dataset,
#     mdl_params=mdl_params["iaf_res"],
#     train_params=train_params["iaf"],
#     train_fn_params=train_fn_params["iaf"],
#     sizes=SIZES,
#     n_trainings=N_TRAININGS,
#     n_picks=N_PICKS,
#     n_samples=500,
#     label_a=0,
#     label_b=1,
# ).assign(algorithm="IAF AT")

# res_iaf_at = multi_train_estimates(
#     filename=os.path.join(DIR_PATH, "res_iaf__at_final1_high_lr_epochs.pickle"),
#     mdl_class=IAVAE,
#     dataset=dataset,
#     mdl_params=mdl_params["iaf_at"],
#     train_params=train_params["iaf"],
#     train_fn_params=train_fn_params["iaf"],
#     sizes=SIZES,
#     n_trainings=N_TRAININGS,
#     n_picks=N_PICKS,
#     n_samples=500,
#     label_a=0,
#     label_b=1,
# ).assign(algorithm="IAF AT")

# res_iaf_skip = multi_train_estimates(
#     filename=os.path.join(DIR_PATH, "res_iaf_skip_final1_high_lr_epochs.pickle"),
#     mdl_class=IAVAE,
#     dataset=dataset,
#     mdl_params=mdl_params["iaf_skip"],
#     train_params=train_params["iaf"],
#     train_fn_params=train_fn_params["iaf"],
#     sizes=SIZES,
#     n_trainings=N_TRAININGS,
#     n_picks=N_PICKS,
#     n_samples=500,
#     label_a=0,
#     label_b=1,
# ).assign(algorithm="IAF SKIP")

# res_iaf_skip2 = multi_train_estimates(
#     filename=os.path.join(DIR_PATH, "res_iaf_skip2_final1_high_lr_epochs.pickle"),
#     mdl_class=IAVAE,
#     dataset=dataset,
#     mdl_params=mdl_params["iaf_skip2"],
#     train_params=train_params["iaf"],
#     train_fn_params=train_fn_params["iaf"],
#     sizes=SIZES,
#     n_trainings=N_TRAININGS,
#     n_picks=N_PICKS,
#     n_samples=500,
#     label_a=0,
#     label_b=1,
# ).assign(algorithm="IAF SKIP")
# res_iaf_is = multi_train_estimates(
#     filename=os.path.join(DIR_PATH, "res_iaf_is_final1_high_lr_epochs.pickle"),
#     mdl_class=IAVAE,
#     dataset=dataset,
#     mdl_params=mdl_params["iaf"],
#     train_params=train_params["iaf"],
#     train_fn_params=train_fn_params["iaf"],
#     sizes=SIZES,
#     n_trainings=N_TRAININGS,
#     n_picks=N_PICKS,
#     n_samples=500,
#     label_a=0,
#     label_b=1,
#     importance_sampling=True
# ).assign(algorithm="IAF IS")


# res_mf = multi_train_estimates(
#     filename=os.path.join(DIR_PATH, "res_mf_at.pickle"),
#     mdl_class=VAE,
#     dataset=dataset,
#     mdl_params=mdl_params["mf_at"],
#     train_params=train_params["mf"],
#     train_fn_params=train_fn_params["mf"],
#     sizes=SIZES,
#     n_trainings=N_TRAININGS,
#     n_picks=N_PICKS,
#     n_samples=500,
#     label_a=0,
#     label_b=1
# ).assign(algorithm="MF")

# res_iaf = multi_train_estimates(
#     filename=os.path.join(DIR_PATH, "res_iaf_at.pickle"),
#     mdl_class=IAVAE,
#     dataset=dataset,
#     mdl_params=mdl_params["iaf_at"],
#     train_params=train_params["iaf"],
#     train_fn_params=train_fn_params["iaf"],
#     sizes=SIZES,
#     n_trainings=N_TRAININGS,
#     n_picks=N_PICKS,
#     n_samples=500,
#     label_a=0,
#     label_b=1
# ).assign(algorithm="IAF")

In [None]:
res_iaf.loc[res_iaf.sample_size == 5, "de_proba"].hist()
plt.show()
res_iaf.loc[res_iaf.sample_size == 100, "de_proba"].hist()

In [None]:
def algos_comparison(my_df, key1, other_keys, key_values="error"):
    vals_key1 = my_df.loc[my_df["algorithm"] == key1, key_values].values
    algo1_is_better = True
    for key2 in other_keys:
        vals_other = my_df.loc[my_df["algorithm"] == key2, key_values].values
        try:
            key1_better = has_lower_mean(vals_key1, vals_other)
        except ValueError:
            key1_better = False
            break
        if not key1_better:
            algo1_is_better = False
            break
    return key1_better

## FDR / Power Control and PR Curves

In [None]:
def train_model(
    mdl_class, dataset, mdl_params: dict, train_params: dict, train_fn_params: dict
):
    """

    :param mdl_class: Class of algorithm
    :param dataset: Dataset
    :param mdl_params:
    :param train_params:
    :param train_fn_params:
    :return:
    """
    my_vae = mdl_class(dataset.nb_genes, n_batch=dataset.n_batches, **mdl_params)
    my_trainer = UnsupervisedTrainer(my_vae, dataset, **train_params)
    print(my_trainer.test_set.data_loader.sampler.indices)
    my_trainer.train(**train_fn_params)
    print(my_trainer.train_losses)
    return my_vae, my_trainer

### FDR and TPR Control

#### Posterior Expected FDR

TODO: Comparer flows avec MF pour les mêmes 5 cells et comparer PE FDR a FDR
Montrer que FDR mieux estimer avec flows est super cool

Dans papier, Ok d'utiliser deux decision rules. Dire que PE FDR overconservative ok
Dire investigation futur papier



In [None]:
def get_fdr(probas):
    sorted_genes = np.argsort(-probas)
    sorted_pgs = probas[sorted_genes]
    cumulative_fdr = (1.0 - sorted_pgs).cumsum() / (1.0 + np.arange(len(sorted_pgs)))
    d = (cumulative_fdr <= 5e-2).sum() - 1
    return cumulative_fdr, sorted_genes


def get_fdr_gt(my_sorted_genes):
    fdr_k = []
    for k in range(n_genes):
        predictions = np.zeros(n_genes)
        predictions[my_sorted_genes[: (k + 1)]] = 1
        fdr = ((~is_significant_de) * predictions).sum() / (k + 1)
        fdr_k.append(fdr)
    return np.array(fdr_k)

**Sizes effects**

In [None]:
# probas_5 = res_mf.loc[
#     lambda x: (x.experiment == 3) & (x.training == 0) & (x.sample_size == 5), "de_proba"
# ].values
# probas_100 = res_mf.loc[
#     lambda x: (x.experiment == 3) & (x.training == 0) & (x.sample_size == 100),
#     "de_proba",
# ].values

# cumulative_fdr_5, sorted_genes_5 = get_fdr(probas_5)
# fdr_k = get_fdr_gt(sorted_genes_5)

# cumulative_fdr_100, sorted_genes_100 = get_fdr(probas_100)
# fdr_k_100 = get_fdr_gt(sorted_genes_100)

# fig = go.Figure(
#     [
#         go.Scatter(y=cumulative_fdr_5, name="Posterior Expected FDR"),
#         go.Scatter(y=fdr_k, name="Ground-Truth FDR"),
#     ]
# )

# iplot(fig, sharing="private", filename="logpoisson_pe_fdr5")

# fig = go.Figure(
#     [
#         go.Scatter(y=cumulative_fdr_100, name="Posterior Expected FDR"),
#         go.Scatter(y=fdr_k_100, name="Ground-Truth FDR"),
#     ]
# )

# fig.show()
# # iplot(fig, sharing="private", filename="logpoisson_pe_fdr100")

**IAF vs MF**

In [None]:
probas_mf = res_mf.loc[
    lambda x: (x.experiment == 2) & (x.training == 0) & (x.sample_size == 5), "de_proba"
].values
probas_iaf = res_iaf.loc[
    lambda x: (x.experiment == 2) & (x.training == 0) & (x.sample_size == 5), "de_proba"
].values

cumulative_fdr_mf, sorted_genes_mf = get_fdr(probas_mf)
fdr_mf = get_fdr_gt(sorted_genes_mf)

cumulative_fdr_iaf, sorted_genes_iaf = get_fdr(probas_iaf)
fdr_iaf = get_fdr_gt(sorted_genes_iaf)

plt.plot(cumulative_fdr_mf, label="PE FDR MF")
plt.plot(cumulative_fdr_iaf, label="PE FDR IAF")
plt.plot(fdr_mf, label="True MF")
plt.plot(fdr_iaf, label="True IAF")
plt.legend()
plt.show()

all_res = pd.concat([res_mf, res_iaf], ignore_index=True)

def apply_fdr_compute(my_df):
    cum_fdr, sorted_genes = get_fdr(my_df["de_proba"].values)
    return pd.Series(dict(cum_pefdr=cum_fdr, sorted_genes=sorted_genes))


pfdr_study_all = (
    all_res.groupby(["experiment", "training", "sample_size", "algorithm"])
    .apply(apply_fdr_compute)
    .reset_index()
)
pfdr_study_all.loc[:, "fdr_gt"] = pfdr_study_all.loc[:, "sorted_genes"].apply(
    get_fdr_gt
)

pfdr_study_all.loc[:, "diff"] = pfdr_study_all["cum_pefdr"] - pfdr_study_all["fdr_gt"]
pfdr_study_all.loc[:, "L2error"] = pfdr_study_all["diff"].apply(np.linalg.norm)
pfdr_study_all.loc[:, "L1error"] = pfdr_study_all["diff"].apply(np.max)

my_key = "L1error"

gped = pfdr_study_all.groupby("sample_size")
fdr_mf_better = gped.apply(
    algos_comparison, key1="MF", other_keys=["IAF"], key_values=my_key
)
fdr_iaf_better = gped.apply(
    algos_comparison, key1="IAF", other_keys=["MF"], key_values=my_key
)

res_table = (
    pfdr_study_all.groupby(["sample_size", "algorithm"])[my_key]
    .mean()
    .round(3)
    .reset_index()
)

res_table.loc[res_table["algorithm"] == "MF", "err_better"] = fdr_mf_better.values
res_table.loc[res_table["algorithm"] == "IAF", "err_better"] = fdr_iaf_better.values

res_table.loc[res_table["err_better"], my_key] = res_table.loc[
    res_table["err_better"], my_key
].apply(lambda x: "\mathbf{{ {} }}".format(x))

res_table.loc[:, my_key] = res_table.loc[:, my_key].apply(lambda x: "$ {} $".format(x))

res_table.pivot(index="algorithm", columns="sample_size", values=[my_key]).T

In [None]:
pfdr_study_all.groupby("sample_size")

#### FDR vals

In [None]:
CONTROL_ALPHA = False


def fdr_fnr(my_df, control_alpha=True):
    my_df = my_df.sort_values("gene")
    assert len(my_df) == n_genes
    if control_alpha:
        is_pred_de = predict_de_genes(my_df.de_proba.values, desired_fdr=Q0)
        alpha = my_df.de_proba.values[is_pred_de].min()
    else:
        is_pred_de = my_df.de_proba.values >= 0.5
        alpha = 0.5
    true_fdr = ((1.0 - is_significant_de) * is_pred_de).sum() / is_pred_de.sum()
    n_positives = is_significant_de.sum()
    true_fnr = (is_significant_de * (1.0 - is_pred_de)).sum() / n_positives
    return pd.Series(dict(fdr=true_fdr, fnr=true_fnr, alpha=alpha))


fdr_fnr_mf = (
    res_mf.groupby(["experiment", "training", "sample_size"])
    .apply(fdr_fnr, control_alpha=CONTROL_ALPHA)
    .reset_index()
    .assign(algorithm="MF")
)
fdr_fnr_iaf = (
    res_iaf.groupby(["experiment", "training", "sample_size"])
    .apply(fdr_fnr, control_alpha=CONTROL_ALPHA)
    .reset_index()
    .assign(algorithm="IAF")
)
# fdr_fnr_iaf_at = (
#     res_iaf_at.groupby(["experiment", "training", "sample_size"])
#     .apply(fdr_fnr, control_alpha=CONTROL_ALPHA)
#     .reset_index()
#     .assign(algorithm="IAF")
# )
# fdr_fnr_iaf_res = (
#     res_iaf_res.groupby(["experiment", "training", "sample_size"])
#     .apply(fdr_fnr, control_alpha=CONTROL_ALPHA)
#     .reset_index()
#     .assign(algorithm="IAF")
# )
# fdr_fnr_iaf_is = (
#     res_iaf_is.groupby(["experiment", "training", "sample_size"])
#     .apply(fdr_fnr, control_alpha=CONTROL_ALPHA)
#     .reset_index()
#     .assign(algorithm="IAF")
# )
# fdr_fnr_iafk5 = (
#     res_iafk5.groupby(["experiment", "training", "sample_size"])
#     .apply(fdr_fnr, control_alpha=CONTROL_ALPHA)
#     .reset_index()
#     .assign(algorithm="IAF")
# )

# fdr_fnr_iaf_skip = (
#     res_iaf_skip.groupby(["experiment", "training", "sample_size"])
#     .apply(fdr_fnr, control_alpha=CONTROL_ALPHA)
#     .reset_index()
#     .assign(algorithm="IAF")
# )

df = pd.concat(
    [
        fdr_fnr_mf,
        fdr_fnr_iaf,
#         fdr_fnr_iaf_res
        #         fdr_fnr_iaf_at,
        #         fdr_fnr_iaf_is,
        #         fdr_fnr_iafk5,
        #         fdr_fnr_iaf_skip.assign(algorithm="IAF SKIP")
#         fdr_fnr_iaf_skip,
    ],
    ignore_index=True,
)


fig = px.box(
    df,
    x="sample_size",
    y="fdr",
    color="algorithm",
    title="Control on False Discovery Rate",
)
fig.show()
# iplot(fig, filename="powsimr_fdr_control")

fig = px.box(
    df,
    x="sample_size",
    y="fnr",
    color="algorithm",
    title="Control on False Negative Rate",
)
fig.show()
# iplot(fig, filename="powsimr_power_control")

In [None]:
df.loc[lambda x: x.sample_size == 100].groupby(["algorithm", "training"])[
    "fdr", "fnr"
].mean()

Other algorithms

In [None]:
##

In [None]:
# ['deseq2', 'edger', 'mast']


def get_fdr_fnr(y_pred, y_true):
    """
        y_pred: (n_sz, n_picks, n_genes) bool predictions
        y_true: (n_genes) gt vals
    """
    n_sz, n_picks, _ = y_pred.shape
    fnrs = np.zeros((n_sz, n_picks))
    fdrs = np.zeros((n_sz, n_picks))
    for sz in range(n_sz):
        for pick in range(n_picks):
            y_pred_it = y_pred[sz, pick, :]
            fnr = ((~y_true) * y_pred_it).sum() / y_pred_it.sum()
            fdr = (y_true * (~y_pred_it)).sum() / y_true.sum()
            fnrs[sz, pick] = fnr
            fdrs[sz, pick] = fdr
    fnrs[np.isnan(fnrs)] = 0.0
    return dict(fnr=fnrs, fdr=fdrs)


print(other_predictions["mast"]["pval"].shape)
print(other_predictions["deseq2"]["pval"].shape)
print(other_predictions["edger"]["pval"].shape)

is_de_mast = other_predictions["mast"]["is_de"]
is_de_deseq2 = other_predictions["deseq2"]["is_de"]
is_de_edger = other_predictions["edger"]["is_de"]
# is_de_edgerr = other_predictions["edger_robust"]["is_de"]


res_mast = get_fdr_fnr(is_de_mast, y_true=is_significant_de)
res_deseq2 = get_fdr_fnr(is_de_deseq2, y_true=is_significant_de)
res_edger = get_fdr_fnr(is_de_edger, y_true=is_significant_de)
# res_edgerr = get_fdr_fnr(is_de_edgerr, y_true=is_significant_de)

In [None]:
# res_mast

In [None]:
preds_mf = res_mf[
    (res_mf.experiment == 0) & (res_mf.training == 0) & (res_mf.sample_size == 100)
]
preds_iaf = res_iaf[
    (res_iaf.experiment == 0) & (res_iaf.training == 0) & (res_iaf.sample_size == 100)
]

# preds_mf = preds_mf.sort_values("de_proba").set_index("gene")
# preds_iaf = preds_iaf.set_index("gene").reindex(index=preds_mf.index)
# preds_iaf[]

preds = pd.concat([preds_mf, preds_iaf], ignore_index=True)
preds.head()

#### Tables

Importance sampling marche moins bien

In [None]:
# df = df[df.training == 0]

In [None]:
def algos_comparison(my_df, key1, other_keys, key_values="error"):
    vals_key1 = my_df.loc[my_df["algorithm"] == key1, key_values].values
    algo1_is_better = True
    for key2 in other_keys:
        vals_other = my_df.loc[my_df["algorithm"] == key2, key_values].values
        try:
            key1_better = has_lower_mean(vals_key1, vals_other)
        except ValueError:
            key1_better = False
            break
        if not key1_better:
            algo1_is_better = False
            break
    return key1_better


gped = df.groupby("sample_size")
fdr_mf_better = gped.apply(
    algos_comparison, key1="MF", other_keys=["IAF"], key_values="fdr"
)
fdr_iaf_better = gped.apply(
    algos_comparison, key1="IAF", other_keys=["MF"], key_values="fdr"
)

fnr_mf_better = gped.apply(
    algos_comparison, key1="MF", other_keys=["IAF"], key_values="fnr"
)
fnr_iaf_better = gped.apply(
    algos_comparison, key1="IAF", other_keys=["MF"], key_values="fnr"
)

res_table = (
    df.groupby(["sample_size", "algorithm"])["fdr", "fnr"].mean().round(3).reset_index()
)

res_table.loc[res_table["algorithm"] == "MF", "fdr_better"] = fdr_mf_better.values
res_table.loc[res_table["algorithm"] == "IAF", "fdr_better"] = fdr_iaf_better.values
res_table.loc[res_table["algorithm"] == "MF", "fnr_better"] = fnr_mf_better.values
res_table.loc[res_table["algorithm"] == "IAF", "fnr_better"] = fnr_iaf_better.values

res_table.loc[res_table["fdr_better"], "fdr"] = res_table.loc[
    res_table["fdr_better"], "fdr"
].apply(lambda x: "\mathbf{{ {} }}".format(x))

res_table.loc[res_table["fnr_better"], "fnr"] = res_table.loc[
    res_table["fnr_better"], "fnr"
].apply(lambda x: "\mathbf{{ {} }}".format(x))

res_table.loc[:, "fdr"] = res_table.loc[:, "fdr"].apply(lambda x: "$ {} $".format(x))
res_table.loc[:, "fnr"] = res_table.loc[:, "fnr"].apply(lambda x: "$ {} $".format(x))

res_table.pivot(index="algorithm", columns="sample_size", values=["fdr", "fnr"]).T

In [None]:
(
    res_table
# .loc[lambda x: x["sample_size"].isin([5, 20, 100])]
.pivot(
    index="algorithm", columns="sample_size", values=["fdr", "fnr"]
).T
)

In [None]:
print(
    res_table.loc[lambda x: x["sample_size"].isin([20, 50, 100])]
    .pivot(index="algorithm", columns="sample_size", values=["fdr", "fnr"])
    .T.to_latex(escape=False)
)

In [None]:
print(
    res_table.pivot(index="algorithm", columns="sample_size", values="fdr")
    .loc[:, [5, 20, 100]]
    .to_latex(escape=True)
)

### PR Curves

#### PR Curve

In [None]:
selected_training = 2

preds_md = res_mf.loc[
    lambda x: (x.experiment == 2)
    & (x.training == selected_training)
    & (x.sample_size == 100)
].sort_values("gene")["de_proba"]

preds_iaf = res_iaf.loc[
    lambda x: (x.experiment == 2)
    & (x.training == selected_training)
    & (x.sample_size == 100)
].sort_values("gene")["de_proba"]

# preds_iaf_res = res_iaf_res.loc[
#     lambda x: (x.experiment == 2)
#     & (x.training == selected_training)
#     & (x.sample_size == 100)
# ].sort_values("gene")["de_proba"]

# preds_iafk5 = res_iafk5.loc[
#     lambda x: (x.experiment == 0)
#     & (x.training == selected_training)
#     & (x.sample_size == 100)
# ].sort_values("gene")["de_proba"]

# preds_iaf_skip = res_iaf_skip.loc[
#     lambda x: (x.experiment == 0)
#     & (x.training == selected_training)
#     & (x.sample_size == 100)
# ].sort_values("gene")["de_proba"]

# preds_iaf_is = res_iaf_is.loc[
#     lambda x: (x.experiment == 0) & (x.training == selected_training) & (x.sample_size == 100)
# ].sort_values("gene")["de_proba"]

In [None]:
other_predictions["deseq2"]["pval"].shape

In [None]:
from sklearn.metrics import precision_recall_curve

preds_deseq2 = 1.0 - other_predictions["deseq2"]["pval"][-1, 0, :] + 1e-10*np.random.randn(n_genes)
preds_edger = 1.0 - other_predictions["edger"]["pval"][-1, 0, :] #+ 1e-10*np.random.randn(n_genes)
preds_mast = 1.0 - other_predictions["mast"]["pval"][-1, 0, :] #+ 1e-10*np.random.randn(n_genes)

# preds_deseq2 = 1.0 - other_predictions['deseq2']['pval'][:]
# preds_edger = 1.0 - other_predictions['edger']['pval'][:]
# preds_mast = 1.0 - other_predictions['mast']['pval'][:]

In [None]:
print(np.isnan(preds_md).mean())
print(np.isnan(preds_iaf).mean())
print(np.isnan(preds_deseq2).mean())
print(np.isnan(preds_deseq2).mean())
print(np.isnan(preds_edger).mean())
print(np.isnan(preds_mast).mean())

In [None]:
df = pd.concat(
    [
        fdr_fnr_mf,
        fdr_fnr_iaf,
    ],
    ignore_index=True,
)

df.loc[:, "recall"] = 1.0 - df.loc[:, "fnr"]
recall_ranges = df.loc[lambda x: x.sample_size == 100].groupby(["algorithm"])["recall"]

# max_recalls = recall_ranges.min()
# min_recalls = recall_ranges.max()
# display(min_recalls, max_recalls)

min_recalls = recall_ranges.mean() - 2.0*recall_ranges.std() 
max_recalls = recall_ranges.mean() + 2.0*recall_ranges.std() 
display(min_recalls, max_recalls)


In [None]:
from scipy.stats import mannwhitneyu

stat, pval = mannwhitneyu(
    df.loc[lambda x: x.algorithm=="IAF", "recall"],
    df.loc[lambda x: x.algorithm=="MF", "recall"],
    alternative="greater"
)

print(stat, pval)

In [None]:
df.loc[:, "precision"] = 1.0 - df.loc[:, "fdr"]
precision_ranges = df.loc[lambda x: x.sample_size == 100].groupby(["algorithm"])["precision"]

# max_recalls = recall_ranges.min()
# min_recalls = recall_ranges.max()
# display(min_recalls, max_recalls)

min_precisions = precision_ranges.mean() - 2.0*precision_ranges.std() 
max_precisions = precision_ranges.mean() + 2.0*precision_ranges.std()
max_precisions[max_precisions>=1.0] = 1.0
display(min_precisions, max_precisions)

In [None]:
from scipy.stats import mannwhitneyu

stat, pval = mannwhitneyu(
    df.loc[lambda x: x.algorithm=="IAF", "precision"],
    df.loc[lambda x: x.algorithm=="MF", "precision"],
    alternative="greater"
)

print(stat, pval)


In [None]:
from sklearn.metrics import precision_score, recall_score

prec_iaf, rec_iaf = (
    precision_score(is_significant_de, preds_iaf >= 0.5),
    recall_score(is_significant_de, preds_iaf >= 0.5),
)
prec_mf, rec_mf = (
    precision_score(is_significant_de, preds_md >= 0.5),
    recall_score(is_significant_de, preds_md >= 0.5),
)

In [None]:
opacity = 0.6

In [None]:
from sklearn.metrics import precision_recall_curve, average_precision_score


def plot_pr(fig, preds, y_true, name):
    average_precision = average_precision_score(y_true, preds)
    preds[np.isnan(preds)] = np.min(preds[~np.isnan(preds)])
    precs, recs, _ = precision_recall_curve(y_true=y_true, probas_pred=preds)
    fig.add_trace(
        go.Scatter(
            x=recs, y=precs, name=name + "@AP: {0:0.2f}".format(average_precision)
        )
    )
    return


layout = go.Layout(
    title="Precision Recall Curves",
    xaxis=dict(title="Recall"),
    yaxis=dict(title="Precision"),
    width=800,
    height=600,
)
fig = go.Figure(layout=layout)
plot_pr(fig=fig, preds=preds_md, y_true=is_significant_de, name="MF")
plot_pr(fig=fig, preds=preds_iaf, y_true=is_significant_de, name="IAF")
# plot_pr(fig=fig, preds=preds_iaf_res, y_true=is_significant_de, name="IAF RES")
# plot_pr(fig=fig, preds=preds_iaf_skip, y_true=is_significant_de, name="IAF SKIP")
# plot_pr(fig=fig, preds=preds_iafk5, y_true=is_significant_de, name="IAFK5")
# plot_pr(fig=fig, preds=preds_iaf_is, y_true=is_significant_de, name='IAF IS')


plot_pr(fig=fig, preds=preds_deseq2, y_true=is_significant_de, name="DESeq2")
plot_pr(fig=fig, preds=preds_edger, y_true=is_significant_de, name="EdgeR")
plot_pr(fig=fig, preds=preds_mast, y_true=is_significant_de, name="MAST")


# trace = go.Scatter(
#     y=(prec_mf, prec_iaf),
#     x=(rec_mf, rec_iaf),
#     marker=dict(size=2 * [14], color=["blue", "red"]),
#     showlegend=False,
#     mode="markers"
# )
# fig.add_trace(trace)

layouts = [
    go.layout.Shape(
        type="rect",
        x0=min_recalls.MF,
        y0=min_precisions.MF,
        x1=max_recalls.MF,
        y1=max_precisions.MF,
        line=dict(color="blue", width=2),
        fillcolor="blue",
        opacity=opacity
    ),
    go.layout.Shape(
        type="rect",
        x0=min_recalls.IAF,
        y0=min_precisions.IAF,
        x1=max_recalls.IAF,
        y1=max_precisions.IAF,
        line=dict(color="red", width=2),
        fillcolor="red",
        opacity=opacity
    ),
]

fig.update_layout(shapes=layouts)
fig.update_xaxes(range=[0.5, 1.01])
fig.update_yaxes(range=[0.5, 1.01])
fig.show()
iplot(fig, filename="lognormal_pr_curves4", sharing="private")

#### MAP

In [None]:
def do_ap(my_df):
    my_df = my_df.sort_values("gene")
    average_precision = average_precision_score(is_significant_de, my_df.de_proba)
    return pd.Series(dict(AP=average_precision))


ap_mf = (
    res_mf.groupby(["experiment", "training", "sample_size"])
    .apply(do_ap)
    .reset_index()
    .assign(algorithm="MF")
)
ap_iaf = (
    res_iaf.groupby(["experiment", "training", "sample_size"])
    .apply(do_ap)
    .reset_index()
    .assign(algorithm="IAF")
)

# ap_iafk5 = (
#     res_iaf_skip.groupby(["experiment", "training", "sample_size"])
#     .apply(do_ap)
#     .reset_index()
#     .assign(algorithm="IAF K5")
# )

# ap_iaf_is = (
#     res_iaf_is.groupby(["experiment", "training", "sample_size"])
#     .apply(do_ap)
#     .reset_index()
#     .assign(algorithm="IAF IS")
# )

all_ap = pd.concat(
    [
        ap_mf,
        ap_iaf,
#         ap_iafk5,
        #     ap_iaf_is
    ],
    ignore_index=True,
)

px.box(all_ap, x="sample_size", y="AP", color="algorithm")

In [None]:
for sz in SIZES:
    res = has_lower_mean(
        all_ap.loc[lambda x: (x.algorithm == "IAF") & (x.sample_size == sz), "AP"],
        all_ap.loc[lambda x: (x.algorithm == "MF") & (x.sample_size == sz), "AP"],
    )
    print(res)

In [None]:
# all_ap.groupby(["algorithm", "sample_size"]).agg(dict(AP=["mean", "std"]))

## Diagonal Curve

In [None]:
# lfc_gt = -(lfcs[:, 1] - lfcs[:, 0])

In [None]:
res_mf.head()

In [None]:
selected_training = 0
subsample_genes = np.sort(np.random.permutation(n_genes)[:120])

lfcs_mf = (
    res_mf.loc[
        lambda x: (x.experiment == 0)
        & (x.training == selected_training)
        & (x.sample_size == 100)
        & (x.gene.isin(subsample_genes))
    ]
    .sort_values("gene")[
        ["lfc_mean", "hdi99_low", "hdi99_high", "hdi64_low", "hdi64_high", "algorithm"]
    ]
    .assign(
        err_minus=lambda x: x.lfc_mean - x.hdi99_low,
        err_pos=lambda x: x.hdi99_high - x.lfc_mean,
        #         err_minus=lambda x: x.lfc_mean - x.hdi64_low,
        #         err_pos=lambda x: x.hdi64_high - x.lfc_mean,
        lfc_gt=lfc_gt[subsample_genes],
    )
)

lfcs_ia = (
    res_iaf.loc[
        lambda x: (x.experiment == 0)
        & (x.training == selected_training)
        & (x.sample_size == 100)
        & (x.gene.isin(subsample_genes))
    ]
    .sort_values("gene")[
        ["lfc_mean", "hdi99_low", "hdi99_high", "hdi64_low", "hdi64_high", "algorithm"]
    ]
    .assign(
        err_minus=lambda x: x.lfc_mean - x.hdi99_low,
        err_pos=lambda x: x.hdi99_high - x.lfc_mean,
        #         err_minus=lambda x: x.lfc_mean - x.hdi64_low,
        #         err_pos=lambda x: x.hdi64_high - x.lfc_mean,
        lfc_gt=lfc_gt[subsample_genes],
    )
)

lfcs_iak5 = (
    res_iafk5.loc[
        lambda x: (x.experiment == 0)
        & (x.training == selected_training)
        & (x.sample_size == 100)
        & (x.gene.isin(subsample_genes))
    ]
    .sort_values("gene")[
        ["lfc_mean", "hdi99_low", "hdi99_high", "hdi64_low", "hdi64_high", "algorithm"]
    ]
    .assign(
        #         err_minus=lambda x: x.lfc_mean - x.hdi64_low,
        #         err_pos=lambda x: x.hdi64_high - x.lfc_mean,
        err_minus=lambda x: x.lfc_mean - x.hdi99_low,
        err_pos=lambda x: x.hdi99_high - x.lfc_mean,
        lfc_gt=lfc_gt[subsample_genes],
    )
)


all_lfcs = pd.concat([lfcs_mf, lfcs_ia, lfcs_iak5], ignore_index=True)

In [None]:
fig = px.scatter(
    all_lfcs,
    x="lfc_gt",
    y="lfc_mean",
    color="algorithm",
    error_y="err_pos",
    error_y_minus="err_minus",
)

fig.add_trace(
    go.Scatter(
        x=[-3, 3],
        y=[-3, 3],
        mode="lines",
        line=dict(color="black", width=4, dash="dash"),
        showlegend=False,
    )
)

fig.show()
# iplot(fig, sharing="private", filename="logpoisson2_diagonal")

In [None]:
# 5 against 100

lfcs_a = (
    res_iaf.loc[
        lambda x: (x.experiment == 0)
        & (x.training == selected_training)
        & (x.sample_size == 5)
        & (x.gene.isin(subsample_genes))
    ]
    .sort_values("gene")[
        ["lfc_mean", "hdi99_low", "hdi99_high", "hdi64_low", "hdi64_high", "algorithm"]
    ]
    .assign(
        #         err_minus=lambda x: x.lfc_mean - x.hdi64_low,
        #         err_pos=lambda x: x.hdi64_high - x.lfc_mean,
        err_minus=lambda x: x.lfc_mean - x.hdi99_low,
        err_pos=lambda x: x.hdi99_high - x.lfc_mean,
        lfc_gt=lfc_gt[subsample_genes],
        legend="5",
    )
)

lfcs_b = (
    res_iaf.loc[
        lambda x: (x.experiment == 0)
        & (x.training == selected_training)
        & (x.sample_size == 100)
        & (x.gene.isin(subsample_genes))
    ]
    .sort_values("gene")[
        ["lfc_mean", "hdi99_low", "hdi99_high", "hdi64_low", "hdi64_high", "algorithm"]
    ]
    .assign(
        #         err_minus=lambda x: x.lfc_mean - x.hdi64_low,
        #         err_pos=lambda x: x.hdi64_high - x.lfc_mean,
        err_minus=lambda x: x.lfc_mean - x.hdi99_low,
        err_pos=lambda x: x.hdi99_high - x.lfc_mean,
        lfc_gt=lfc_gt[subsample_genes],
        legend="100",
    )
)

all_lfcs = pd.concat([lfcs_a, lfcs_b], ignore_index=True)

fig = px.scatter(
    all_lfcs,
    x="lfc_gt",
    y="lfc_mean",
    color="legend",
    error_y="err_pos",
    error_y_minus="err_minus",
)

fig.add_trace(
    go.Scatter(
        x=[-3, 3],
        y=[-3, 3],
        mode="lines",
        line=dict(color="black", width=4, dash="dash"),
        showlegend=False,
    )
)

fig.show()

## Study of LFC errors

In [None]:
def compute_l2_err(diff):
    res = 0.5 * (diff ** 2) ** (0.5)
    res = np.nanmean(res, axis=-1)
    return res


def l2_err_competitor(vals: np.ndarray, other: np.ndarray = None):
    vals[np.isnan(vals)] = 0.0
    if other is None:
        diff = vals
    else:
        diff = vals - other
    res = compute_l2_err(diff)
    assert res.shape == (N_SIZES, N_PICKS)
    data = []
    for (size_ix, size) in enumerate(SIZES):
        for pick in range(N_PICKS):
            data.append(
                dict(
                    experiment=pick,
                    training=0,
                    sample_size=size,
                    error=res[size_ix, pick],
                )
            )
    return pd.DataFrame(data)


lfcs_errs_deseq2 = l2_err_competitor(
    other_predictions["deseq2"]["lfc"], other=lfc_gt
).assign(algorithm="DESeq2")
lfcs_errs_edger = l2_err_competitor(
    other_predictions["edger"]["lfc"], other=lfc_gt
).assign(algorithm="EdgeR")
lfcs_errs_mast = l2_err_competitor(
    other_predictions["mast"]["lfc"], other=lfc_gt
).assign(algorithm="MAST")

In [None]:
def pd_l2_err(my_df):
    diff = my_df.sort_values("gene")["lfc_mean"] - lfc_gt
    error = 0.5 * (diff ** 2) ** (0.5)
    error = np.nanmean(error)
    return pd.Series(dict(error=error))


lfcs_errs_mf = (
    res_mf.groupby(["experiment", "sample_size", "training", "algorithm"])
    .apply(pd_l2_err)
    .reset_index()
)

lfcs_errs_iaf = (
    res_iaf.groupby(["experiment", "sample_size", "training", "algorithm"])
    .apply(pd_l2_err)
    .reset_index()
    .assign(algorithm="IAF")
)

In [None]:
all_errs = pd.concat(
    [lfcs_errs_mf, lfcs_errs_iaf, lfcs_errs_deseq2, lfcs_errs_edger, lfcs_errs_mast],
    ignore_index=True,
)

px.box(all_errs, x="sample_size", y="error", color="algorithm")

### Tables

**Skip connections in the decoder are without doubt beneficial**

In [None]:
def algos_comparison(my_df, key1, other_keys):
    vals_key1 = my_df.loc[my_df["algorithm"] == key1, "error"].values
    algo1_is_better = True
    for key2 in other_keys:
        vals_other = my_df.loc[my_df["algorithm"] == key2, "error"].values
        key1_better = has_lower_mean(vals_key1, vals_other)
        if not key1_better:
            algo1_is_better = False
            break
    return key1_better


gped = all_errs.groupby("sample_size")
mf_or_iaf_better = gped.apply(
    algos_comparison, key1="MF", other_keys=["DESeq2", "EdgeR", "MAST"]
) & gped.apply(algos_comparison, key1="IAF", other_keys=["DESeq2", "EdgeR", "MAST"])
mf_better = gped.apply(
    algos_comparison, key1="MF", other_keys=["IAF", "DESeq2", "EdgeR", "MAST"]
)
iaf_better = gped.apply(
    algos_comparison, key1="IAF", other_keys=["MF", "DESeq2", "EdgeR", "MAST"]
)

In [None]:
res_table = (
    all_errs.groupby(["sample_size", "algorithm"])
    .error.agg(dict(err_mean="mean", err_std="std"))
    .reset_index()
    .assign(
        displayed=lambda x: x.apply(lambda y: "{:.3f}".format(y.err_mean), axis=1),
        is_better=False,
        one_of_best=False,
    )
)
res_table.loc[res_table["algorithm"] == "MF", "is_better"] = mf_better.values
res_table.loc[res_table["algorithm"] == "IAF", "is_better"] = iaf_better.values
res_table.loc[res_table["algorithm"] == "IAF", "one_of_best"] = mf_or_iaf_better.values
res_table.loc[res_table["algorithm"] == "MF", "one_of_best"] = mf_or_iaf_better.values


res_table.loc[lambda x: x.one_of_best, "displayed"] = (
    res_table.loc[lambda x: x.one_of_best, "displayed"] + "^*"
)
res_table.loc[lambda x: x.is_better, "displayed"] = res_table.loc[
    lambda x: x.is_better, "displayed"
].apply(lambda x: "\mathbf{{ {} }}".format(x))

res_table.loc[:, "displayed"] = res_table.loc[:, "displayed"].apply(
    lambda x: "$ {} $".format(x)
)

In [None]:
res_table.pivot(index="algorithm", columns="sample_size", values="displayed").loc[
    ["DESeq2", "EdgeR", "MAST", "MF", "IAF"], #[20, 50, 100]
]

In [None]:
print(
    res_table.pivot(index="algorithm", columns="sample_size", values="displayed")
    .loc[["DESeq2", "EdgeR", "MAST", "MF", "IAF"], [5, 20, 100]]
    .to_latex(escape=False)
)

In [None]:
mdl_params.keys()

Test to see if poorer performance of models for important number of cells is linked to mixing factors

## Coverage

In [None]:
mdl_mf, trainer_mf = train_model(
    mdl_class=VAE,
    dataset=dataset,
    mdl_params=mdl_params["mf"],
    train_params=train_params["mf"],
    train_fn_params=train_fn_params["mf"],
)

mdl_iaf, trainer_iaf = train_model(
    mdl_class=IAVAE,
    dataset=dataset,
    mdl_params=mdl_params["iaf"],
    train_params=train_params["iaf"],
    train_fn_params=train_fn_params["iaf"],
)

In [None]:
plt.plot(trainer_mf.train_losses[10:])
plt.plot(trainer_iaf.train_losses[10:])
plt.yscale("log")

In [None]:
def subsampled_posterior(post, indices):
    post.data_loader.sampler.indices = indices
    return post


def sample_random_indices(sz):
    where_a = np.where(y_test == 0)[0]
    where_b = np.where(y_test == 1)[0]
    idx_a = np.random.choice(where_a, size=sz)
    idx_b = np.random.choice(where_b, size=sz)
    return idx_a, idx_b


def compute_lfc(my_trainer, my_idx_a, my_idx_b, n_samples=1000, importance_sampling=False):
    post_a = subsampled_posterior(my_trainer.test_set, TEST_INDICES[my_idx_a])
    outputs_a = post_a.get_latents(n_samples=1000, other=True, device="cpu")
    scales_a, weights_a = outputs_a["scale"], outputs_a["log_probas"]
    scales_a = scales_a.reshape((-1, dataset.nb_genes)).numpy()

    post_b = subsampled_posterior(my_trainer.test_set, TEST_INDICES[my_idx_b])
    outputs_b = post_b.get_latents(n_samples=1000, other=True, device="cpu")
    scales_b, weights_b = outputs_b["scale"], outputs_b["log_probas"]
    scales_b = scales_b.reshape((-1, dataset.nb_genes)).numpy()

    if importance_sampling:
        weights_a = softmax(weights_a.reshape((-1)))
        weights_b = softmax(weights_b.reshape((-1)))
    else:
        weights_a = None
        weights_b = None
    scales_a, scales_b = demultiply(
        arr1=scales_a, arr2=scales_b, factor=3, weights_a=weights_a, weights_b=weights_b
    )

    lfc = np.log2(scales_a) - np.log2(scales_b)
    return lfc


def compute_lfc_gt(my_idx_a, my_idx_b):
    n_cells_a = len(my_idx_a)
    n_cells_b = len(my_idx_b)
    h_a_gt = h[TEST_INDICES][my_idx_a].exp().log2()
    h_b_gt = h[TEST_INDICES][my_idx_b].exp().log2()
    my_lfc_orig = torch.zeros((n_cells_a, n_cells_b, 1000))
    for i in range(n_cells_a):
        for j in range(n_cells_b):
            my_lfc_orig[i, j, :] = h_a_gt[i] - h_b_gt[j]
    my_lfc_orig = my_lfc_orig.mean((0, 1))
    return my_lfc_orig


CREDIBLE_LEVELS = [5, 10, 15, 20]


def get_coverage(lfc_pred, lfc_gt):
    errs = []
    for q in CREDIBLE_LEVELS:
        hdi = compute_hdi(lfc_pred, 2*q / 100.0)
        hdi_low = hdi[:, 0]
        hdi_high = hdi[:, 1]
        lfc_ground_truth_np = lfc_gt.numpy()
        gene_is_covered = (lfc_ground_truth_np >= hdi_low) & (lfc_ground_truth_np <= hdi_high)
        mean_cov = gene_is_covered.mean()
        
#         hdi_low = np.percentile(lfc_pred, q=q)
#         hdi_high = np.percentile(lfc_pred, q=100 - q)
#         gene_is_covered = (lfc_gt >= hdi_low) & (lfc_gt <= hdi_high)
#         mean_cov = gene_is_covered.numpy().mean()

        mean_cov = gene_is_covered.mean()
        print(2 * q / 100.0, mean_cov)
        errs.append(((2 * q / 100.0 - mean_cov) ** 2.0) * 0.5)
    return np.mean(errs)

In [None]:
from tqdm import tqdm

# errs_iaf = []
# errs_iaf_is = []
# errs_mf = []

sz = 25
for _ in tqdm(range(10)):
    idx_a, idx_b = sample_random_indices(sz)
    lfc_mf = compute_lfc(trainer_mf, idx_a, idx_b, n_samples=300)
    lfc_iaf = compute_lfc(trainer_iaf, idx_a, idx_b, n_samples=300, importance_sampling=False)
#     lfc_iaf_is = compute_lfc(trainer_iaf, idx_a, idx_b, n_samples=2000, importance_sampling=True)
    lfc_ground_truth = compute_lfc_gt(idx_a, idx_b)
    print("IAF")
    errs_iaf.append(get_coverage(lfc_iaf, lfc_ground_truth))
#     errs_iaf_is.append(get_coverage(lfc_iaf_is, lfc_ground_truth))
    print("MF")
    errs_mf.append(get_coverage(lfc_mf, lfc_ground_truth))

In [None]:
plt.hist(errs_iaf, label="IAF", alpha=0.5)
# plt.hist(errs_iaf_is, label="IAF IS", alpha=0.5)
plt.hist(errs_mf, label='MF', alpha=0.5)
plt.legend()

print(has_lower_mean(samp_a=errs_iaf, samp_b=errs_mf))
print(np.mean(errs_iaf))
print(np.mean(errs_mf))

new FDR control (not very convincing!)

In [None]:
# sz = 100

# idx_a, idx_b = sample_random_indices(sz)
# lfc_mf = compute_lfc(trainer_mf, idx_a, idx_b, n_samples=5000)
# lfc_iaf = compute_lfc(trainer_iaf, idx_a, idx_b, n_samples=5000)

# probas_mf = np.abs(lfc_mf >= 0.5).mean(0)
# probas_iaf = np.abs(lfc_iaf >= 0.5).mean(0)

# lfc_ground_truth = compute_lfc_gt(idx_a, idx_b).numpy()
# is_significant_de_local = lfc_ground_truth >= 0.5

# def get_fdr(probas):
#     sorted_genes = np.argsort(-probas)
#     sorted_pgs = probas[sorted_genes]
#     cumulative_fdr = (1.0 - sorted_pgs).cumsum() / (1.0 + np.arange(len(sorted_pgs)))
#     d = (cumulative_fdr <= 5e-2).sum() - 1
#     return cumulative_fdr, sorted_genes


# def get_fdr_gt(is_de, my_sorted_genes):
#     fdr_k = []
#     for k in range(n_genes):
#         predictions = np.zeros(n_genes)
#         predictions[my_sorted_genes[: (k + 1)]] = 1
#         fdr = ((~is_de) * predictions).sum() / (k + 1)
#         fdr_k.append(fdr)
#     return np.array(fdr_k)

# cumulative_fdr, sorted_genes = get_fdr(probas_mf)
# fdr_gt = get_fdr_gt(is_significant_de_local, sorted_genes)

# d = (cumulative_fdr <= 5e-2).sum() - 1
# print(fdr_gt[d])

# plt.plot(fdr_gt)
# plt.plot(cumulative_fdr)

# cumulative_fdr, sorted_genes = get_fdr(probas_iaf)
# fdr_gt = get_fdr_gt(is_significant_de_local, sorted_genes)

# d = (cumulative_fdr <= 5e-2).sum() - 1
# print(fdr_gt[d])


# plt.plot(fdr_gt)
# plt.plot(cumulative_fdr)

Diagonal

In [None]:
sz = 50

# random_genes = np.random.permutation(n_genes)[:100]

random_genes = np.argsort(-np.abs(lfc_gt))[:900]
random_genes = np.random.choice(random_genes, 100)
idx_a, idx_b = sample_random_indices(sz)
lfc_mf = compute_lfc(trainer_mf, idx_a, idx_b, n_samples=2000)
lfc_iaf = compute_lfc(trainer_iaf, idx_a, idx_b, n_samples=2000)
lfc_ground_truth = compute_lfc_gt(idx_a, idx_b).numpy()[random_genes]

mean_mf = lfc_mf.mean(0)[random_genes]
mean_iaf = lfc_iaf.mean(0)[random_genes]
mean_mf = np.median(lfc_mf, 0)[random_genes]
mean_iaf = np.median(lfc_iaf, 0)[random_genes]
hdis_mf = compute_hdi(lfc_mf, credible_interval=0.95)[random_genes]
hdis_iaf = compute_hdi(lfc_iaf, credible_interval=0.95)[random_genes]

fig = go.Figure()
trace_mf = go.Scatter(
    x=lfc_ground_truth-0.001,
    y=mean_mf,
    mode="markers",
    error_y=dict(
        type="data",
        symmetric=False,
        array=hdis_mf[:, 1] - mean_mf,
        arrayminus=mean_mf - hdis_mf[:, 0],
    ),
    name="MF"
)

trace_iaf = go.Scatter(
    x=lfc_ground_truth+0.001,
    y=mean_iaf,
    mode="markers",
    error_y=dict(
        type="data",
        symmetric=False,
        array=hdis_iaf[:, 1] - mean_iaf,
        arrayminus=mean_iaf - hdis_iaf[:, 0],
    ),
    name="IAF"
)
trace_gt = go.Scatter(
    x=[-6, 8],
    y=[-6, 8],
    mode="lines",
    line=dict(color="black", width=4, dash="dash"),
    showlegend=False,
)
fig.add_traces([trace_mf, trace_iaf, trace_gt])
fig.show()

In [None]:
# iplot(fig, filename="diagonal_logpoisson_new_low_LFC", sharing="private")

In [None]:
idx = random_genes = np.argsort(-np.abs(lfc_gt))[10]

fig = go.Figure()
fig.add_traces([
    go.Histogram(x=lfc_mf[:, idx], name="MF"),
    go.Histogram(x=lfc_iaf[:, idx], name="IAF"),
])

fig.show()

In [None]:



plt.hist(lfc_mf[:, idx], alpha=0.5, bins=100)
plt.hist(lfc_iaf[:, idx], alpha=0.5, bins=100)
plt.axvline(compute_lfc_gt(idx_a, idx_b).numpy()[idx])
plt.show()


## Volcano

In [None]:
selected_training = 0

preds_md = (
    res_mf.loc[
        lambda x: (x.experiment == 0)
        & (x.training == selected_training)
        & (x.sample_size == 100)
    ]
    .sort_values("gene")["de_proba"]
    .values
)

preds_iaf = (
    res_iaf.loc[
        lambda x: (x.experiment == 0)
        & (x.training == selected_training)
        & (x.sample_size == 100)
    ]
    .sort_values("gene")["de_proba"]
    .values
)

# preds_iaf_at = res_iaf_is.loc[
#     lambda x: (x.experiment == 0) & (x.training == selected_training) & (x.sample_size == 100)
# ].sort_values("gene")["de_proba"].values

In [None]:
subsampled_genes = np.random.permutation(n_genes)[:300]

fig = go.Figure(
    layout=go.Layout(
        yaxis=dict(title="Estimated probabily of DE"),
        xaxis=dict(title="Ground-Truth LFC"),
    )
)
fig.add_traces(
    [
        go.Scatter(
            x=lfc_gt[subsampled_genes],
            #             y=np.log10(preds_md + 1e-12)[subsampled_genes],
            y=preds_md[subsampled_genes],
            mode="markers",
        ),
        go.Scatter(
            x=lfc_gt[subsampled_genes],
            #             y=np.log10(preds_iaf + 1e-12)[subsampled_genes],
            y=preds_iaf[subsampled_genes],
            mode="markers",
        ),
        #         go.Scatter(
        #             x=lfc_gt[subsampled_genes],
        #             y=np.log10(preds_iaf_at + 1e-12)[subsampled_genes],
        #             mode="markers"
        #         ),
        #         go.Scatter(
        #             x=[-0.5, -0.5], y=[-6, 0.0], mode="lines", line=dict(color="black", width=2)
        #         ),
        #         go.Scatter(
        #             x=[0.5, 0.5], y=[-6, 0.0], mode="lines", line=dict(color="black", width=2)
        #         ),
    ]
)

fig.show()

In [None]:
preds_md = (
    res_mf.loc[lambda x: (x.sample_size == 100)].sort_values("gene")["de_proba"].values
)

preds_iaf = (
    res_iaf.loc[lambda x: (x.sample_size == 100)].sort_values("gene")["de_proba"].values
)

In [None]:
# preds_iaf.shape

In [None]:
import seaborn as sns

ax = sns.kdeplot(lfc_gt, preds_md, cut=1, cmap="Reds", shade=True, shade_lowest=False)

In [None]:
ax = sns.kdeplot(lfc_gt, preds_iaf, cut=1, cmap="Blues", shade=True, shade_lowest=False)

### Proportion of detected genes that have LFC abs above O.5

In [None]:
other_predictions["edger"].keys()

In [None]:
def counter(my_df):
    df = (my_df.sort_values("gene")["de_proba"] >= 0.5).values
    assert df.shape == (n_genes,)
    return (df & (~is_significant_de)).mean()

preds_md = res_mf.loc[lambda x: (x.sample_size == 100)]
preds_iaf = res_iaf.loc[lambda x: (x.sample_size == 100)]

mf_de_genes_not_sig = preds_md.groupby(["experiment", "training"]).apply(counter).values
iaf_de_genes_not_sig = preds_iaf.groupby(["experiment", "training"]).apply(counter).values
edger_de_genes_not_sig = ((other_predictions["edger"]["pval"][-1] <= 0.05) & (~is_significant_de)).mean(1)
mast_de_genes_not_sig = ((other_predictions["mast"]["pval"][-1] <= 0.05) & (~is_significant_de)).mean(1)
deseq2_de_genes_not_sig = ((other_predictions["deseq2"]["pval"][-1] <= 0.05) & (~is_significant_de)).mean(1)

In [None]:
couples = [
    ("MF", mf_de_genes_not_sig), 
    ("IAF", iaf_de_genes_not_sig), 
    ("edgeR", edger_de_genes_not_sig), 
    ("MAST", mast_de_genes_not_sig), 
    ("DESeq2", deseq2_de_genes_not_sig), 
]

res = []
for key, vals in couples:
    res.append({"Algorithm": key, "Portion": "$ {} $".format(round(vals.mean(), 3))})
my_df = pd.DataFrame(res).set_index("Algorithm").loc[["DESeq2", "edgeR", "MAST", "MF", "IAF"]]

In [None]:
deseq2_de_genes_not_sig.mean()

In [None]:
has_lower_mean(iaf_de_genes_not_sig, mf_de_genes_not_sig)

In [None]:
print(my_df.to_latex(escape=False))

# DEBUG

## Uncertainty

Remarks:

Problem linked to the fact that when you condition on less samples, the posterior LFC is sharper

I see two solutions:
- Voting stategy when you have many samples
- Modification of the decision rule
- Use posterior predicted
- dataset is too easy ==> Add complexity
- base decision making on credible intervals as previously
- 3 ways classification: Upregulated, downregulated, non DE

In [None]:
from scvi_utils import train_model
from scvi.utils import compute_hdi
from plotly import graph_objects

In [None]:
my_train_params = dict(
    ratio_loss=True,
    test_indices=TEST_INDICES,
    frequency=1,
    early_stopping_kwargs={
        "early_stopping_metric": "elbo_ratio_loss",
        "save_best_state_metric": "elbo_ratio_loss",
        "patience": 20,
        "threshold": 0,
        "reduce_lr_on_plateau": True,
        "lr_patience": 10,
        "lr_factor": 0.2,
    },
)

my_mdl_params = {
    "n_hidden": 128,
    "n_layers": 1,
    "do_h": True,
    "n_latent": 10,
    "t": 4,
    "n_blocks": 2,
    "dropout_rate": 0.2,
    "decoder_do_last_skip": True,
}

my_train_fn_params = {"n_epochs": 200, "lr": 0.01}

In [None]:
mdl_mf, trainer_mf = train_model(
    mdl_class=VAE,
    dataset=dataset,
    mdl_params=mdl_params["mf"],
    train_params=train_params["mf"],
    train_fn_params=train_fn_params["mf"],
)

mdl_iaf, trainer_iaf = train_model(
    mdl_class=IAVAE,
    dataset=dataset,
    mdl_params=mdl_params["iaf"],
    train_params=train_params["iaf"],
    train_fn_params=train_fn_params["iaf"],
)

In [None]:
plt.plot(trainer_iaf.history["elbo_ratio_loss_train_set"][5:])
plt.plot(trainer_iaf.history["elbo_ratio_loss_test_set"][5:])
plt.plot(trainer_mf.history["elbo_ratio_loss_train_set"][5:])
plt.plot(trainer_mf.history["elbo_ratio_loss_test_set"][5:])

In [None]:
outputs = trainer.test_set.get_latents(n_samples=10, other=True, device="cpu")
z, labels, scales = outputs["z"], outputs["label"], outputs["scale"]
labels = labels.squeeze().numpy()

In [None]:
from sklearn.manifold import TSNE

# z_transfo = TSNE().fit_transform(z.mean(0))
sc = plt.scatter(z_transfo[:, 0], z_transfo[:, 1], c=labels.squeeze())
plt.show()
sc = plt.scatter(z_transfo[:, 0], z_transfo[:, 1], c=clusters)
plt.scatter(z_transfo[idx_a, 0], z_transfo[idx_a, 1], c="blue")
plt.scatter(z_transfo[idx_b, 0], z_transfo[idx_b, 1], c="red")
# plt.colormaps(sc)

**Vanilla**

In [None]:
outputs = trainer.test_set.get_latents(n_samples=10, other=True, device="cpu")
z, labels, scales = outputs["z"], outputs["label"], outputs["scale"]

# Based on distances in latent

where_a = np.where(labels == 0)[0]
where_b = np.where(labels == 1)[0]

idx_a = np.random.choice(where_a, size=3000)
idx_b = np.random.choice(where_b, size=3000)

scales_a = scales[:, idx_a, :].reshape((-1, dataset.nb_genes)).numpy()
scales_b = scales[:, idx_b, :].reshape((-1, dataset.nb_genes)).numpy()

scales_a, scales_b = demultiply(scales_a, scales_b, 3)

lfc = np.log2(scales_a) - np.log2(scales_b)
hdis = np.array([np.percentile(lfc, q=5, axis=0), np.percentile(lfc, q=95, axis=0)]).T

In [None]:
hdis[:3]

In [None]:
gene_idx = 150
plt.hist(lfc[:, gene_idx], alpha=0.2, density=True)
plt.hist(lfc_orig[:, gene_idx], alpha=0.2, density=True)

In [None]:
hdis[:3]

In [None]:
gene_idx = 0
plt.hist(lfc[:, gene_idx], alpha=0.2, density=True)
plt.hist(lfc_orig[:, gene_idx], alpha=0.2, density=True)

In [None]:
((hdis[:, 0] <= lfc_orig.numpy()) & (lfc_orig.numpy() <= hdis[:, 1])).shape

In [None]:
((hdis[:, 0] <= lfc_orig.numpy()) & (lfc_orig.numpy() <= hdis[:, 1]))

In [None]:
hdis[:3]

In [None]:
hdis[:5]
lfc_orig[2]

In [None]:
errs_y = graph_objects.scatter.ErrorY(
    array=hdis[:, 1] - lfc.mean(0), arrayminus=lfc.mean(0) - hdis[:, 0]
)

fig = go.Figure()
fig.add_traces(
    [
        go.Scatter(x=lfc_gt, y=lfc.mean(0), error_y=errs_y, mode="markers"),
        go.Scatter(
            x=[-3, 3],
            y=[-3, 3],
            mode="lines",
            line=dict(color="black", width=4, dash="dash"),
            showlegend=False,
        ),
    ]
)
fig.show()

print(((hdis[:, 0] <= lfc_gt) & (lfc_gt <= hdis[:, 1])).mean())
print(((hdis[:, 0] <= lfc_orig.numpy()) & (lfc_orig.numpy() <= hdis[:, 1])).mean())

**Adjusted**

In [None]:
# outputs = trainer.train_set.get_latents(
#     n_samples=500, other=True, device="cpu"
# )
z, labels, scales = outputs["z"], outputs["label"], outputs["scale"]
where_a = np.where(labels == 0)[0]
where_b = np.where(labels == 1)[0]
where_a = where_a[np.random.choice(len(where_a), size=sz)]
where_b = where_b[np.random.choice(len(where_b), size=sz)]
scales_a = scales[:, where_a, :].reshape((-1, dataset.nb_genes)).numpy()
scales_b = scales[:, where_b, :].reshape((-1, dataset.nb_genes)).numpy()

lfc = np.log2(scales_a) - np.log2(scales_b)
hdis = compute_hdi(lfc, credible_interval=0.5)

In [None]:
# hdis = np.array([np.percentile(lfc, q=25, axis=0), np.percentile(lfc, q=75, axis=0)]).T

In [None]:
new_hdi = hdis

In [None]:
new_hdi = np.array(
    [
        1.0 / np.sqrt(sz) * (hdis[:, 1] - lfc.mean(0)),
        1.0 / np.sqrt(sz) * (lfc.mean(0) - hdis[:, 0]),
    ]
).T
new_hdi[:, 0] = lfc.mean(0) - new_hdi[:, 0]
new_hdi[:, 1] = lfc.mean(0) + new_hdi[:, 1]

In [None]:
gene_idx = 182

de_probas = (np.abs(lfc) >= 0.5).mean(0)
is_pred_de = predict_de_genes(de_probas, desired_fdr=Q0)

true_fdr = ((1.0 - is_significant_de) * is_pred_de).sum() / is_pred_de.sum()
n_positives = is_significant_de.sum()
true_fnr = (is_significant_de * (1.0 - is_pred_de)).sum() / n_positives
print(true_fdr, true_fnr)

plt.hist(lfc[:, gene_idx])
plt.axvline(x=lfc_gt[gene_idx], color="black")
plt.title(de_probas[gene_idx])

In [None]:
errs_y = graph_objects.scatter.ErrorY(
    array=new_hdi[:, 1] - lfc.mean(0), arrayminus=lfc.mean(0) - new_hdi[:, 0]
)

fig = go.Figure()
fig.add_traces(
    [
        go.Scatter(x=lfc_gt, y=lfc.mean(0), error_y=errs_y, mode="markers"),
        go.Scatter(
            x=[-3, 3],
            y=[-3, 3],
            mode="lines",
            line=dict(color="black", width=4, dash="dash"),
            showlegend=False,
        ),
    ]
)
fig.show()

print(((new_hdi[:, 0] <= lfc_gt) & (lfc_gt <= new_hdi[:, 1])).mean())

**Idea adjusted experiment**

In [None]:
from tqdm import tqdm

In [None]:
res = []
for sz in tqdm([5, 10, 25, 50]):
    for exp in range(20):
        z, labels, scales = outputs["z"], outputs["label"], outputs["scale"]
        where_a = np.where(labels == 0)[0]
        where_b = np.where(labels == 1)[0]
        where_a = where_a[np.random.choice(len(where_a), size=sz)]
        where_b = where_b[np.random.choice(len(where_b), size=sz)]
        scales_a = scales[:, where_a, :].reshape((-1, dataset.nb_genes)).numpy()
        scales_b = scales[:, where_b, :].reshape((-1, dataset.nb_genes)).numpy()
        lfc = np.log2(scales_a) - np.log2(scales_b)
        for level in [5, 10, 15, 20]:
            new_level = (50.0 - level) / np.sqrt(level)
            hdis = np.array(
                [
                    np.percentile(lfc, q=new_level, axis=0),
                    np.percentile(lfc, q=100 - new_level, axis=0),
                ]
            ).T
            score = ((hdis[:, 0] <= lfc_gt) & (lfc_gt <= hdis[:, 1])).mean()
            res.append(dict(level=level, sample_size=sz, experiment=exp, score=score))

#### Sum-related technique

Take log ratios of means as f

In [None]:
n_cells = 50

In [None]:
outputs = trainer_iaf.test_set.get_latents(n_samples=500, other=True, device="cpu")
z, labels, scales = outputs["z"], outputs["label"], outputs["scale"]
where_a = np.where(labels == 0)[0]
where_b = np.where(labels == 1)[0]
where_a = where_a[np.random.choice(len(where_a), size=n_cells)]
where_b = where_b[np.random.choice(len(where_b), size=n_cells)]
scales_a = scales[:, where_a, :]  # .reshape((-1, dataset.nb_genes)).numpy()
scales_b = scales[:, where_b, :]  # .reshape((-1, dataset.nb_genes)).numpy()

NEW_N_SAMPLES = 3000
n_samples = scales_a.shape[0]

new_scales_a = torch.zeros((NEW_N_SAMPLES, n_cells, n_genes))
for i in range(n_cells):
    idx_samp = np.random.choice(a=n_samples, size=NEW_N_SAMPLES)
    new_scales_a[:, i, :] = scales_a[idx_samp, i, :]
new_scales_a = new_scales_a.mean(1)

new_scales_b = torch.zeros((NEW_N_SAMPLES, n_cells, n_genes))
for i in range(n_cells):
    idx_samp = np.random.choice(a=n_samples, size=NEW_N_SAMPLES)
    new_scales_b[:, i, :] = scales_b[idx_samp, i, :]
new_scales_b = new_scales_b.mean(1)

log_scales_a = new_scales_a.log2()
log_scales_b = new_scales_b.log2()

In [None]:
lfc = (log_scales_a - log_scales_b).numpy()
hdis = np.array([np.percentile(lfc, q=5, axis=0), np.percentile(lfc, q=95, axis=0)]).T

In [None]:
h_test = h[TEST_INDICES]
# h_a_gt = h_test[where_a]
# h_b_gt = h_test[where_b]

where_a_tot = np.where(labels == 0)[0]
where_b_tot = np.where(labels == 1)[0]
h_a_gt = h_test[where_a_tot].exp()
h_b_gt = h_test[where_b_tot].exp()

In [None]:
# DEf as log of means
lfc_gt_sum = h_a_gt.mean(0).log2() - h_b_gt.mean(0).log2()
lfc_gt_sum = lfc_gt_sum.numpy()
print(lfc_gt_sum.shape)

In [None]:
errs_y = graph_objects.scatter.ErrorY(
    array=hdis[:, 1] - lfc.mean(0), arrayminus=lfc.mean(0) - hdis[:, 0]
)

fig = go.Figure()
fig.add_traces(
    [
        go.Scatter(x=lfc_gt_sum, y=lfc.mean(0), error_y=errs_y, mode="markers"),
        go.Scatter(
            x=[-3, 3],
            y=[-3, 3],
            mode="lines",
            line=dict(color="black", width=4, dash="dash"),
            showlegend=False,
        ),
    ]
)
fig.show()

print(((hdis[:, 0] <= lfc_gt_sum) & (lfc_gt_sum <= hdis[:, 1])).mean())

**Median**

Take Median LFC as f

==> Does not work

In [None]:
n_cells = 50

In [None]:
# outputs = trainer.test_set.get_latents(
#     n_samples=500, other=True, device="cpu"
# )
# z, labels, scales = outputs["z"], outputs["label"], outputs["scale"]
where_a = np.where(labels == 0)[0]
where_b = np.where(labels == 1)[0]
where_a = where_a[np.random.choice(len(where_a), size=n_cells)]
where_b = where_b[np.random.choice(len(where_b), size=n_cells)]
scales_a = scales[:, where_a, :]  # .reshape((-1, dataset.nb_genes)).numpy()
scales_b = scales[:, where_b, :]  # .reshape((-1, dataset.nb_genes)).numpy()

log_scales_a = scales_a.log2()
log_scales_b = scales_b.log2()

In [None]:
from tqdm import tqdm

SZ = 1200

lfc_estimate = torch.zeros((n_cells, n_cells, SZ, 1000))
for i in tqdm(range(n_cells)):
    for j in range(n_cells):
        idx_a = np.random.choice(500, size=SZ)
        idx_b = np.random.choice(500, size=SZ)
        lfc_estimate[i, j, :] = log_scales_a[idx_a, i, :] - log_scales_b[idx_b, j, :]
#         lfc_estimate[i, j, :] = scales_a[idx_a, i, :] - scales_b[idx_b, j, :]

lfc_estimate = lfc_estimate.reshape((n_cells * n_cells, SZ, 1000)).numpy()

In [None]:
# lfc = np.mean(lfc_estimate, axis=0)
lfc = np.median(lfc_estimate, axis=0)
hdis = np.array([np.percentile(lfc, q=10, axis=0), np.percentile(lfc, q=90, axis=0)]).T

In [None]:
lfc = lfc_estimate.reshape((-1, 1000))
hdis = np.array([np.percentile(lfc, q=25, axis=0), np.percentile(lfc, q=75, axis=0)]).T

In [None]:
hdis

In [None]:
errs_y = graph_objects.scatter.ErrorY(
    array=hdis[:, 1] - lfc.mean(0), arrayminus=lfc.mean(0) - hdis[:, 0]
)

fig = go.Figure()
fig.add_traces(
    [
        go.Scatter(x=lfc_gt, y=lfc.mean(0), error_y=errs_y, mode="markers"),
        go.Scatter(
            x=[-3, 3],
            y=[-3, 3],
            mode="lines",
            line=dict(color="black", width=4, dash="dash"),
            showlegend=False,
        ),
    ]
)
fig.show()

print(((hdis[:, 0] <= lfc_gt) & (lfc_gt <= hdis[:, 1])).mean())

### MMD

In [None]:
def subsampled_posterior(post, indices):
    post.data_loader.sampler.indices = indices
    return post

In [None]:
trainer_iaf.test_set.data_loader.sampler.indices = TEST_INDICES
trainer_mf.test_set.data_loader.sampler.indices = TEST_INDICES

In [None]:
test_post = subsampled_posterior(trainer_mf.test_set, TEST_INDICES)
outputs = trainer_mf.test_set.get_latents(n_samples=10, other=True, device="cpu")
labels = outputs["label"]
where_a = np.where(labels == 0)[0]
where_b = np.where(labels == 1)[0]
idx_a = np.random.choice(where_a, size=75)
idx_b = np.random.choice(where_b, size=75)

In [None]:
post_a = subsampled_posterior(trainer_mf.test_set, TEST_INDICES[idx_a])
scales_a = post_a.get_latents(n_samples=1000, other=True, device="cpu")["scale"]
scales_a = scales_a.reshape((-1, dataset.nb_genes)).numpy()

post_b = subsampled_posterior(trainer_mf.test_set, TEST_INDICES[idx_b])
scales_b = post_b.get_latents(n_samples=1000, other=True, device="cpu")["scale"]
scales_b = scales_b.reshape((-1, dataset.nb_genes)).numpy()

scales_a, scales_b = demultiply(scales_a, scales_b, 3)
lfc_mf = np.log2(scales_a) - np.log2(scales_b)

post_a = subsampled_posterior(trainer_iaf.test_set, TEST_INDICES[idx_a])
scales_a = post_a.get_latents(n_samples=1000, other=True, device="cpu")["scale"]
scales_a = scales_a.reshape((-1, dataset.nb_genes)).numpy()

post_b = subsampled_posterior(trainer_iaf.test_set, TEST_INDICES[idx_b])
scales_b = post_b.get_latents(n_samples=1000, other=True, device="cpu")["scale"]
scales_b = scales_b.reshape((-1, dataset.nb_genes)).numpy()

scales_a, scales_b = demultiply(scales_a, scales_b, 3)
lfc_iaf = np.log2(scales_a) - np.log2(scales_b)

# lfc_orig_gt = lfc_orig.numpy()
# lfc_orig_gt = lfc_orig_gt[:5000]
n_cells_a = len(idx_a)
n_cells_b = len(idx_b)
h_a_gt = h[TEST_INDICES][idx_a].exp().log2()
h_b_gt = h[TEST_INDICES][idx_b].exp().log2()
lfc_orig_gt = torch.zeros((n_cells_a, n_cells_b, 1000))
for i in range(n_cells_a):
    for j in range(n_cells_b):
        lfc_orig_gt[i, j, :] = h_a_gt[i] - h_b_gt[j]
lfc_orig_gt = lfc_orig_gt.mean((0, 1))

In [None]:
lfc_orig_gt.shape

MMD protocol using a RBF kernel

In [None]:
# from sklearn.gaussian_process.kernels import RBF
# from sklearn.metrics.pairwise import linear_kernel
# from tqdm import tqdm

# def compute_mmd(k, X, Y):
#     kxx = k(X, X)
#     kxy = k(X, Y)
#     kyy = k(Y, Y)
#     return (kxx.mean() + kyy.mean() - 2.0*kxy.mean())**0.5

# # k = RBF()
# k = linear_kernel

# mmds_iaf = []
# for gene in tqdm(range(n_genes)):
#     idxa = np.random.permutation(len(lfc_iaf))[:400]
#     idxb = np.random.permutation(len(lfc_orig_gt))[:400]
#     x = lfc_iaf[idxa, [gene]].reshape((-1, 1))
#     y = lfc_orig_gt[idxb, [gene]].reshape((-1, 1))
#     mmds_iaf.append(compute_mmd(k, x, y))

# mmds_mf = []
# for gene in tqdm(range(n_genes)):
#     idxa = np.random.permutation(len(lfc_mf))[:400]
#     idxb = np.random.permutation(len(lfc_orig_gt))[:400]
#     x = lfc_mf[idxa, [gene]].reshape((-1, 1))
#     y = lfc_orig_gt[idxb, [gene]].reshape((-1, 1))
#     mmds_mf.append(compute_mmd(k, x, y))

# plt.hist(mmds_iaf, label="IAF", alpha=0.5, bins=100)
# plt.hist(mmds_mf, label="MF", alpha=0.5, bins=100)
# plt.legend()
# plt.show()

# print(has_lower_mean(mmds_iaf, mmds_mf))

# has_lower_mean(mmds_mf, mmds_iaf)

Look at distributions

In [None]:
for i in np.random.choice(n_genes, size=15):
#     plt.hist(lfc_orig[:, i], density=True, alpha=0.5, label="GT")
    plt.axvline(x=lfc_orig_gt[i])
    plt.hist(lfc_mf[:, i], density=True, alpha=0.5, label="MF")
    plt.hist(lfc_iaf[:, i], density=True, alpha=0.5, label="IAF")
    plt.legend()
    plt.show()

In [None]:
CREDIBLE_LEVELS = [5, 10, 15, 20]
def get_coverage(lfc_pred, lfc_ground_truth):
    errs = []
    for q in CREDIBLE_LEVELS:
        hdi_low = np.percentile(lfc_pred, q=q)
        hdi_high = np.percentile(lfc_pred, q=100 - q)
        gene_is_covered = (lfc_ground_truth >= hdi_low) & (lfc_ground_truth <= hdi_high)
#         mean_cov = np.mean(gene_is_covered)
        mean_cov = gene_is_covered.numpy().mean()
        print(2*q, mean_cov)
        errs.append(((2*q / 100.0 - mean_cov) ** 2.0) * 0.5)
    return pd.Series(dict(calibration_score=np.mean(errs)))

In [None]:
get_coverage(lfc_mf, lfc_orig_gt)

In [None]:
get_coverage(lfc_iaf, lfc_orig_gt)

## FDR inconsistency

### Trying to use a local definition for FDR 

In [None]:
sz = 100
outputs = trainer.test_set.get_latents(n_samples=2, other=True, device="cpu")
z, labels, scales = outputs["z"], outputs["label"], outputs["scale"]
where_a = np.where(labels == 0)[0]
where_b = np.where(labels == 1)[0]
idx_a = np.random.choice(where_a, size=sz)
idx_b = np.random.choice(where_b, size=sz)

In [None]:
# IAF

In [None]:
outputs = trainer.test_set.get_latents(n_samples=500, other=True, device="cpu")
z, labels, scales = outputs["z"], outputs["label"], outputs["scale"]

scales_a = scales[:, idx_a, :].reshape((-1, dataset.nb_genes)).numpy()
scales_b = scales[:, idx_b, :].reshape((-1, dataset.nb_genes)).numpy()
scales_a, scales_b = demultiply(scales_a, scales_b, 3)
lfc = np.log2(scales_a) - np.log2(scales_b)

In [None]:
# MF
outputs = trainer_mf.test_set.get_latents(n_samples=500, other=True, device="cpu")
z, labels, scales = outputs["z"], outputs["label"], outputs["scale"]

scales_a = scales[:, idx_a, :].reshape((-1, dataset.nb_genes)).numpy()
scales_b = scales[:, idx_b, :].reshape((-1, dataset.nb_genes)).numpy()
scales_a, scales_b = demultiply(scales_a, scales_b, 3)
lfc_mf = np.log2(scales_a) - np.log2(scales_b)

In [None]:
probas_mf = (np.abs(lfc_mf) >= 0.5).mean(0)
probas_iaf = (np.abs(lfc) >= 0.5).mean(0)

In [None]:
# GT
h_a = h[TEST_INDICES][idx_a]
h_b = h[TEST_INDICES][idx_b]
lfcs_gt_loc = torch.zeros((sz, sz, 1000))
for i in range(sz):
    for j in range(sz):
        lfcs_gt_loc[i, j, :] = h_b[i] - h_a[j]
lfcs_gt_loc = lfcs_gt_loc.mean(dim=(0, 1)).numpy()
# is_sig_gt_loc = np.abs(lfcs_gt_loc) >= 0.5
is_sig_gt_loc = is_significant_de

In [None]:
def get_fdr(probas):
    sorted_genes = np.argsort(-probas)
    sorted_pgs = probas[sorted_genes]
    cumulative_fdr = (1.0 - sorted_pgs).cumsum() / (1.0 + np.arange(len(sorted_pgs)))
    d = (cumulative_fdr <= 5e-2).sum() - 1
    return cumulative_fdr, sorted_genes


def get_fdr_gt(my_sorted_genes, is_sig_gt_loc):
    fdr_k = []
    for k in range(n_genes):
        predictions = np.zeros(n_genes)
        predictions[my_sorted_genes[: (k + 1)]] = 1
        fdr = ((~is_sig_gt_loc) * predictions).sum() / (k + 1)
        fdr_k.append(fdr)
    return np.array(fdr_k)

In [None]:
cumulative_fdr, sorted_genes = get_fdr(probas_iaf)
fdr_gt = get_fdr_gt(sorted_genes, is_sig_gt_loc)
plt.plot(cumulative_fdr, label="PRED")
plt.plot(fdr_gt, label="GT")
plt.legend()
d = (cumulative_fdr <= 5e-2).sum() - 1
print(fdr_gt[d])

In [None]:
cumulative_fdr, sorted_genes = get_fdr(probas_mf)
fdr_gt = get_fdr_gt(sorted_genes, is_sig_gt_loc)
plt.plot(cumulative_fdr, label="PRED")
plt.plot(fdr_gt, label="GT")
plt.legend()
d = (cumulative_fdr <= 5e-2).sum() - 1
print(fdr_gt[d])

In [None]:
cumulative_fdr, sorted_genes = get_fdr(probas_iaf)
fdr_gt = get_fdr_gt(sorted_genes, is_sig_gt_loc)
plt.plot(cumulative_fdr, label="PRED")
plt.plot(fdr_gt, label="GT")
plt.legend()

### 100

In [None]:
scales_a.shape

In [None]:
# Option 1
# z, labels, scales = trainer.test_set.get_latents(
#     n_samples=50, other=True, device="cpu"
# )

# labels = labels.squeeze()
# where_a = np.where(labels == 0)[0]
# where_b = np.where(labels == 1)[0]
# where_a = where_a[np.random.choice(len(where_a), size=100)]
# where_b = where_b[np.random.choice(len(where_b), size=100)]
# scales_a = scales[:, where_a, :].reshape((-1, dataset.nb_genes)).numpy()
# scales_b = scales[:, where_b, :].reshape((-1, dataset.nb_genes)).numpy()
# scales_ab, scales_bb = demultiply(arr1=scales_a, arr2=scales_b, factor=3)
# lfc = np.log2(scales_ab) - np.log2(scales_bb)

# de_probas = (np.abs(lfc) >= 0.5).mean(0)
# is_pred_de = predict_de_genes(de_probas, desired_fdr=Q0)
# alpha = is_pred_de[is_pred_de].min()

# true_fdr = ((1.0 - is_significant_de) * is_pred_de).sum() / is_pred_de.sum()
# n_positives = is_significant_de.sum()
# true_fnr = (is_significant_de * (1.0 - is_pred_de)).sum() / n_positives
# print(true_fdr, true_fnr)

# plt.hist(lfc[:, gene_idx])
# plt.axvline(x=true_lfc, color="black")
# plt.title(de_probas[gene_idx])

In [None]:
probas_thresh

In [None]:
z, labels, scales = trainer.test_set.get_latents(
    n_samples=500, other=True, device="cpu"
)

In [None]:
# Option 2
labels = labels.squeeze()
where_a = np.where(labels == 0)[0]
where_b = np.where(labels == 1)[0]
where_a = where_a[np.random.choice(len(where_a), size=100)]
where_b = where_b[np.random.choice(len(where_b), size=100)]
scales_a = scales[:, where_a, :].reshape((-1, dataset.nb_genes)).numpy()
scales_b = scales[:, where_b, :].reshape((-1, dataset.nb_genes)).numpy()
scales_ab, scales_bb = demultiply(arr1=scales_a, arr2=scales_b, factor=3)
lfc = np.log2(scales_ab) - np.log2(scales_bb)

de_probas = (np.abs(lfc) >= 0.5).mean(0)
de_probas_std = (np.abs(lfc) >= 0.5).std(0)


# is_pred_de = predict_de_genes(de_probas, desired_fdr=Q0)
is_pred_de = de_probas >= 0.5
# probas_thresh = -np.sort(-de_probas)[215]
# is_pred_de = de_probas >= probas_thresh

alpha = is_pred_de[is_pred_de].min()

true_fdr = ((1.0 - is_significant_de) * is_pred_de).sum() / is_pred_de.sum()
n_positives = is_significant_de.sum()
true_fnr = (is_significant_de * (1.0 - is_pred_de)).sum() / n_positives
print(true_fdr, true_fnr)

plt.hist(lfc[:, gene_idx])
plt.axvline(x=true_lfc, color="black")
plt.title(de_probas[gene_idx])

Résultats pour decision >= 0.5

**100 cells**
0.23706896551724138 0.019390581717451522

0.2886178861788618 0.030470914127423823



**5 cellules**
0.42448979591836733 0.2188365650969529

0.12435233160621761 0.06371191135734072


Decision making based on credible intervals

In [None]:
low, high = np.percentile(lfc, q=[2.5, 97.5], axis=0)

is_pred_de = (np.abs(low) >= 0.5) & (np.abs(high) >= 0.5) & (low * high >= 0.0)

true_fdr = ((1.0 - is_significant_de) * is_pred_de).sum() / is_pred_de.sum()
# n_positives = is_significant_de.sum()
true_fnr = (is_significant_de * (1.0 - is_pred_de)).sum() / n_positives
print(true_fdr, true_fnr)

### 5

In [None]:
# # Option 1
# z, labels, scales = trainer.test_set.get_latents(
#     n_samples=50, other=True, device="cpu"
# )

# labels = labels.squeeze()
# where_a = np.where(labels == 0)[0]
# where_b = np.where(labels == 1)[0]
# where_a = where_a[np.random.choice(len(where_a), size=5)]
# where_b = where_b[np.random.choice(len(where_b), size=5)]
# scales_a = scales[:, where_a, :].reshape((-1, dataset.nb_genes)).numpy()
# scales_b = scales[:, where_b, :].reshape((-1, dataset.nb_genes)).numpy()
# scales_ab, scales_bb = demultiply(arr1=scales_a, arr2=scales_b, factor=3)
# lfc = np.log2(scales_ab) - np.log2(scales_bb)
# de_probas_small = (np.abs(lfc) >= 0.5).mean(0)


# is_pred_de = predict_de_genes(de_probas_small, desired_fdr=Q0)
# alpha = is_pred_de[is_pred_de].min()
# true_fdr = ((1.0 - is_significant_de) * is_pred_de).sum() / is_pred_de.sum()
# n_positives = is_significant_de.sum()
# true_fnr = (is_significant_de * (1.0 - is_pred_de)).sum() / n_positives
# print(true_fdr, true_fnr)

# plt.hist(lfc[:, gene_idx])
# plt.axvline(x=true_lfc, color="black")
# plt.title(de_probas_small[gene_idx])

In [None]:
# Option 1
# z, labels, scales = trainer.test_set.get_latents(
#     n_samples=1000, other=True, device="cpu"
# )

labels = labels.squeeze()
where_a = np.where(labels == 0)[0]
where_b = np.where(labels == 1)[0]
where_a = where_a[np.random.choice(len(where_a), size=5)]
where_b = where_b[np.random.choice(len(where_b), size=5)]
scales_a = scales[:, where_a, :].reshape((-1, dataset.nb_genes)).numpy()
scales_b = scales[:, where_b, :].reshape((-1, dataset.nb_genes)).numpy()
scales_ab, scales_bb = demultiply(arr1=scales_a, arr2=scales_b, factor=3)
lfc = np.log2(scales_ab) - np.log2(scales_bb)
de_probas_small = (np.abs(lfc) >= 0.5).mean(0)
de_probas_small_std = (np.abs(lfc) >= 0.5).std(0)

# is_pred_de_small = predict_de_genes(de_probas_small, desired_fdr=Q0)
is_pred_de_small = de_probas_small >= 0.5
alpha = is_pred_de_small[is_pred_de_small].min()
true_fdr = ((1.0 - is_significant_de) * is_pred_de_small).sum() / is_pred_de_small.sum()
n_positives = is_significant_de.sum()
true_fnr = (is_significant_de * (1.0 - is_pred_de_small)).sum() / n_positives
print(true_fdr, true_fnr)

plt.hist(lfc[:, gene_idx])
plt.axvline(x=true_lfc, color="black")
plt.title(de_probas_small[gene_idx])

In [None]:
low, high = np.percentile(lfc, q=[2.5, 97.5], axis=0)

is_pred_de = (np.abs(low) >= 0.5) & (np.abs(high) >= 0.5) & (low * high >= 0.0)

true_fdr = ((1.0 - is_significant_de) * is_pred_de).sum() / is_pred_de.sum()
n_positives = is_significant_de.sum()
true_fnr = (is_significant_de * (1.0 - is_pred_de)).sum() / n_positives
print(true_fdr, true_fnr)

### 1

In [None]:
# Option 1
z, labels, scales = trainer.test_set.get_latents(
    n_samples=2000, other=True, device="cpu"
)

labels = labels.squeeze()
where_a = np.where(labels == 0)[0]
where_b = np.where(labels == 1)[0]
where_a = where_a[np.random.choice(len(where_a), size=1)]
where_b = where_b[np.random.choice(len(where_b), size=1)]
scales_a = scales[:, where_a, :].reshape((-1, dataset.nb_genes)).numpy()
scales_b = scales[:, where_b, :].reshape((-1, dataset.nb_genes)).numpy()
scales_ab, scales_bb = demultiply(arr1=scales_a, arr2=scales_b, factor=10)
lfc = np.log2(scales_ab) - np.log2(scales_bb)
de_probas_small = (np.abs(lfc) >= 0.5).mean(0)
de_probas_small_std = (np.abs(lfc) >= 0.5).std(0)

is_pred_de_small = predict_de_genes(de_probas_small, desired_fdr=Q0)
# is_pred_de_small = de_probas_small >= 0.5
alpha = is_pred_de_small[is_pred_de_small].min()
true_fdr = ((1.0 - is_significant_de) * is_pred_de_small).sum() / is_pred_de_small.sum()
n_positives = is_significant_de.sum()
true_fnr = (is_significant_de * (1.0 - is_pred_de_small)).sum() / n_positives
print(true_fdr, true_fnr)

plt.hist(lfc[:, gene_idx])
plt.axvline(x=true_lfc, color="black")
plt.title(de_probas_small[gene_idx])

### Analysis

In [None]:
print(is_pred_de.sum())
print(is_pred_de_small.sum())

In [None]:
plt.title("Empirical distribution of predicted probabilities of being DE")

plt.hist(de_probas_small, alpha=0.25, label="5 cells")
plt.hist(de_probas, alpha=0.25, label="100 cells")

plt.legend()

In [None]:
plt.hist(de_probas_std, alpha=0.25)
plt.hist(de_probas_small_std, alpha=0.25)

### Voting

In [None]:
n_votes = 500

In [None]:
from tqdm import tqdm

In [None]:
z, labels, scales = trainer.test_set.get_latents(
    n_samples=2000, other=True, device="cpu"
)

labels = labels.squeeze()
where_a = np.where(labels == 0)[0]
where_b = np.where(labels == 1)[0]
where_a = where_a[np.random.choice(len(where_a), size=100)]
where_b = where_b[np.random.choice(len(where_b), size=100)]
scales_a_all = scales[:, where_a, :].numpy()
scales_b_all = scales[:, where_b, :].numpy()

all_votes = np.zeros((n_votes, n_genes))
for vote in tqdm(range(n_votes)):
    where_a = np.random.choice(100, size=1)
    where_b = np.random.choice(100, size=1)
    scales_a = scales_a_all[:, where_a, :].reshape((-1, n_genes))
    scales_b = scales_b_all[:, where_b, :].reshape((-1, n_genes))

    scales_ab, scales_bb = demultiply(arr1=scales_a, arr2=scales_b, factor=3)
    lfc = np.log2(scales_ab) - np.log2(scales_bb)

    de_probas = (np.abs(lfc) >= 0.5).mean(0)
    de_probas_std = (np.abs(lfc) >= 0.5).std(0)

    is_pred_de = predict_de_genes(de_probas, desired_fdr=Q0)

    all_votes[vote, :] = is_pred_de

#     de_probas_small_std = (np.abs(lfc) >= 0.5).std(0)
#     alpha = is_pred_de[is_pred_de].min()

#     true_fdr = ((1.0 - is_significant_de) * is_pred_de).sum() / is_pred_de.sum()
#     n_positives = is_significant_de.sum()
#     true_fnr = (is_significant_de * (1.0 - is_pred_de)).sum() / n_positives
#     print(true_fdr, true_fnr)

# plt.hist(lfc[:, gene_idx])
# plt.axvline(x=true_lfc, color="black")
# plt.title(de_probas[gene_idx])

In [None]:
is_pred_de_vote = all_votes.mean(0) >= 0.5

true_fdr = ((1.0 - is_significant_de) * is_pred_de_vote).sum() / is_pred_de_vote.sum()
n_positives = is_significant_de.sum()
true_fnr = (is_significant_de * (1.0 - is_pred_de_vote)).sum() / n_positives
print(true_fdr, true_fnr)

In [None]:
plt.hist(all_votes.mean(0))

### Credible intervals

In [None]:
res_iaf.keys()

In [None]:
def fnr_fdr(my_df):
    my_is_pred_de = my_df.is_pred_de
    true_fdr = ((1.0 - is_significant_de) * my_is_pred_de).sum() / my_is_pred_de.sum()
    n_positives = is_significant_de.sum()
    true_fnr = (is_significant_de * (1.0 - my_is_pred_de)).sum() / n_positives
    return pd.Series(dict(fdr=true_fdr, fnr=true_fnr))


(
    res_iaf.assign(
        is_pred_de=lambda x: (x.hdi64_low.abs() >= 0.5)
        & (x.hdi64_high.abs() >= 0.5)
        & (x.hdi64_low * x.hdi64_high >= 0.0)
    )
    .groupby(["training", "algorithm", "sample_size", "experiment"])
    .apply(fnr_fdr)
    .reset_index()
    .groupby(["sample_size"])
    .agg(dict(fdr=["mean", "std"], fnr=["mean", "std"]))
)