In [None]:
import numpy as np
import os
import torch
import pickle
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.express as px
import plotly as py
import pandas as pd
from chart_studio.plotly import plot, iplot

# from plotly.offline import init_notebook_mode, iplot
from tqdm import tqdm_notebook

from scvi.dataset import PowSimSynthetic, LatentLogPoissonDataset, SignedGamma, GeneExpressionDataset
from scvi.models import VAE, IAVAE
from scvi.inference import UnsupervisedTrainer
from scvi.utils import demultiply, make_dir_if_necessary, predict_de_genes, save_fig, load_pickle, save_pickle, has_lower_mean
from scvi_utils import estimate_de_proba, estimate_lfc_density, estimate_lfc_mean, multi_train_estimates
from R_interop import all_predictions, all_de_predictions


N_EPOCHS = 200
DELTA = 0.5
SIZES = [5, 10, 20, 30, 50, 100]
SIZE = 100
N_SIZES = len(SIZES)
DO_CLOUD = True
Q0 = 5e-2
N_TRAININGS = 5
N_PICKS = 10
n_genes = 1000

np.random.seed(42)
torch.manual_seed(42)

PATH_TO_SCRIPTS = "/home/ubuntu/conquer_comparison/scripts"
DIR_PATH = 'lfc_estimates/lognormal'
DF_PATH = "/home/ubuntu/scVI/scvi/dataset/kolodziejczk_param.csv"
make_dir_if_necessary(DIR_PATH)

# Generate Dataset

In [None]:
import chart_studio.plotly as py
py.sign_in("pierreboyeau", "2wvdnWZ2Qut1zD07ADVy")

## Constructing mu and sigma

In [None]:
selected = pd.read_csv(DF_PATH).sample(n_genes)
means = selected["means"].values

means[means >= 1000] = 1000
go.Figure([go.Histogram(x=means)]).show()

lfc_sampler = SignedGamma(dim=2, proba_pos=0.5)
lfcs = lfc_sampler.sample(n_genes).numpy()
non_de_genes = np.random.choice(n_genes, size=300)
lfcs[non_de_genes, :] = 0.0
go.Figure([go.Histogram(x=lfcs[:, 0])]).show()

log2_mu0 = lfcs[:, 0] + np.log2(means)
log2_mu1 = lfcs[:, 1] + np.log2(means)

loge_mu0 = log2_mu0 / np.log2(np.e)
loge_mu1 = log2_mu1 / np.log2(np.e)

In [None]:
# DEMO
a = (2.0 * np.random.random(size=(100, 1)) - 1).astype(float)
sigma = 2.0*a.dot(a.T) + (1.0 + 0.5*(2.0*np.random.random(100)-1.0)) * np.eye(100)

fig = go.Figure(data=go.Heatmap(z=sigma))
fig.show()

In [None]:
a = (2.0 * np.random.random(size=(n_genes, 1)) - 1).astype(float)
sigma = 2.0*a.dot(a.T) + 0.5*(1.0 + 0.5*(2.0*np.random.random(n_genes)-1.0)) * np.eye(n_genes)
sigma0 = 0.1*sigma

a = (2.0 * np.random.random(size=(n_genes, 1)) - 1).astype(float)
sigma = 2.0*a.dot(a.T) + 0.5*(1.0 + 0.5*(2.0*np.random.random(n_genes)-1.0)) * np.eye(n_genes)
sigma1 = 0.1*sigma

# sigma1 = sigma

# u, s, vh = np.linalg.svd(sigma)
# perturbations = s.min() + (s.max() - s.min()) * np.random.random(len(s))
# sigma1 = u @ (np.diag(perturbations)) @ vh

In [None]:
h0 = torch.distributions.MultivariateNormal(
    loc=torch.tensor(loge_mu0), covariance_matrix=torch.tensor(sigma0)
).sample((5000,))
h1 = torch.distributions.MultivariateNormal(
    loc=torch.tensor(loge_mu1), covariance_matrix=torch.tensor(sigma1)
).sample((5000,))

h = torch.cat([h0, h1])

x_obs = torch.distributions.Poisson(rate=h.exp()).sample()
# is_zi = np.random.random(x_obs.shape) >= 0.9
is_zi = np.random.random(x_obs.shape) <= np.exp(-1.4 * x_obs.numpy())
x_obs[is_zi] = 0.0
labels = torch.zeros((10000, 1))
labels[5000:] = 1

not_null_cell = (x_obs.sum(1) != 0)
x_obs = x_obs[not_null_cell]
labels = labels[not_null_cell]

trace1 = go.Histogram(x=x_obs.mean(0))
fig = go.Figure(data=[trace1])
fig.show()

In [None]:
_ = plt.hist(x_obs[:, 500], bins=100)

In [None]:
dataset_path = os.path.join(DIR_PATH, "dataset.pickle")
if not os.path.exists(dataset_path):
    dataset = GeneExpressionDataset()
    dataset.populate_from_data(X=x_obs.numpy(), labels=labels.numpy())
    dataset.lfc = lfcs
    save_pickle(data=dataset, filename=dataset_path)
else:
    dataset = load_pickle(dataset_path)
    lfcs = dataset.lfc

In [None]:
is_significant_de = np.abs(lfcs[:, 1] - lfcs[:, 0]) >= DELTA
n_genes = dataset.nb_genes
trace1 = go.Histogram(x=lfcs[:, 1] - lfcs[:, 0])
fig = go.Figure(data=[trace1])
# save_fig(fig, filename="powsimR_properties", do_cloud=DO_CLOUD)
# fig.show()
iplot(fig, filename="lognormal_properties")

In [None]:
n_examples = len(dataset)
TEST_INDICES = np.random.permutation(n_examples)[:2000]

x_test, y_test = dataset.X[TEST_INDICES, :], dataset.labels[TEST_INDICES, :].squeeze()
data_path = os.path.join(DIR_PATH, 'data.npy')
labels_path = os.path.join(DIR_PATH, 'labels.npy')

np.save(
    data_path,
    x_test.squeeze().astype(int)
)
np.savetxt(
    labels_path,
    y_test.squeeze()
)

## Train parameters

In [None]:
mdl_params = dict(
    iaf=dict(n_hidden=128, n_layers=1, do_h=True, n_latent=10, t=4, dropout_rate=0.2),
    mf=dict(n_hidden=128, n_layers=1, n_latent=10, dropout_rate=0.2),
    iaf_at=dict(n_hidden=128, n_layers=2, do_h=False, n_latent=12, t=3, dropout_rate=0.18),
    mf_at=dict(n_hidden=128, n_layers=1, n_latent=5, dropout_rate=0.1),
    iaf_k5=dict(n_hidden=128, n_layers=1, do_h=True, n_latent=10, t=4),
    mf_k5=dict(n_hidden=128, n_layers=1, n_latent=10),
)
train_params = dict(
    iaf=dict(ratio_loss=True, test_indices=TEST_INDICES),
    iaf_b=dict(ratio_loss=True, test_indices=TEST_INDICES),
    mf=dict(ratio_loss=True, test_indices=TEST_INDICES),
    iaf_k5=dict(ratio_loss=True, test_indices=TEST_INDICES, k_importance_weighted=5, single_backward=False),
    mf_k5=dict(ratio_loss=True, test_indices=TEST_INDICES, k_importance_weighted=5, single_backward=False)
)
train_fn_params = dict(
    iaf=dict(n_epochs=N_EPOCHS, lr=1e-2),
    iaf_b=dict(n_epochs=N_EPOCHS, lr=1e-2),
    mf=dict(n_epochs=N_EPOCHS, lr=1e-2),
    iaf_k5=dict(n_epochs=N_EPOCHS, lr=1e-2),
    mf_k5=dict(n_epochs=N_EPOCHS, lr=1e-2),
)

# Compute competitors scores

In [None]:
os.listdir(DIR_PATH)

In [None]:
other_predictions = all_predictions(
    filename=os.path.join(DIR_PATH, "other_predictions1.pickle"),
    n_genes=n_genes, 
    n_picks=N_PICKS, 
    sizes=SIZES, 
    data_path=data_path, 
    labels_path=labels_path,
    path_to_scripts=PATH_TO_SCRIPTS
)

other_predictions = all_de_predictions(
    other_predictions, significance_level=Q0, delta=DELTA
)

Check sign of LFC 

In [None]:
other_predictions["edger"]["lfc"].shape

In [None]:
from scvi.utils import plot_identity

lfc_gt = -(lfcs[:, 1] - lfcs[:, 0])
plt.scatter(lfc_gt, other_predictions["edger"]["lfc"][-1, -1, :])
plot_identity()
plt.show()

plt.scatter(lfc_gt, other_predictions["deseq2"]["lfc"][-1, -1, :])
plot_identity()
plt.show()


plt.scatter(lfc_gt, other_predictions["mast"]["lfc"][-1, -1, :])
plot_identity()
plt.show()


In [None]:
other_predictions["edger"]["lfc"] = -other_predictions["edger"]["lfc"]
other_predictions["mast"]["lfc"] = -other_predictions["mast"]["lfc"]

# Experiments

In [None]:
os.listdir(DIR_PATH)

In [None]:
res_mf = multi_train_estimates(
    filename=os.path.join(DIR_PATH, "res_mf_final1_high_lr_epochs.pickle"),
#     filename=os.path.join(DIR_PATH, "res_mf.pickle"),
    mdl_class=VAE,
    dataset=dataset,
    mdl_params=mdl_params["mf"],
    train_params=train_params["mf"],
    train_fn_params=train_fn_params["mf"],
    sizes=SIZES,
    n_trainings=N_TRAININGS,
    n_picks=N_PICKS,
    n_samples=500,
    label_a=0,
    label_b=1
).assign(algorithm="MF")

res_iaf = multi_train_estimates(
    filename=os.path.join(DIR_PATH, "res_iaf_final1_high_lr_epochs.pickle"),
#     filename=os.path.join(DIR_PATH, "res_iaf.pickle"),
    mdl_class=IAVAE,
    dataset=dataset,
    mdl_params=mdl_params["iaf"],
    train_params=train_params["iaf"],
    train_fn_params=train_fn_params["iaf"],
    sizes=SIZES,
    n_trainings=N_TRAININGS,
    n_picks=N_PICKS,
    n_samples=500,
    label_a=0,
    label_b=1
).assign(algorithm="IAF")

res_iafk5 = multi_train_estimates(
    filename=os.path.join(DIR_PATH, "res_iafk5_final1_high_lr_epochs.pickle"),
    mdl_class=IAVAE,
    dataset=dataset,
    mdl_params=mdl_params["iaf"],
    train_params=train_params["iaf"],
    train_fn_params=train_fn_params["iaf"],
    sizes=SIZES,
    n_trainings=N_TRAININGS,
    n_picks=N_PICKS,
    n_samples=500,
    label_a=0,
    label_b=1
).assign(algorithm="IAF K5")




# res_mf = multi_train_estimates(
#     filename=os.path.join(DIR_PATH, "res_mf_at.pickle"),
#     mdl_class=VAE,
#     dataset=dataset,
#     mdl_params=mdl_params["mf_at"],
#     train_params=train_params["mf"],
#     train_fn_params=train_fn_params["mf"],
#     sizes=SIZES,
#     n_trainings=N_TRAININGS,
#     n_picks=N_PICKS,
#     n_samples=500,
#     label_a=0,
#     label_b=1
# ).assign(algorithm="MF")

# res_iaf = multi_train_estimates(
#     filename=os.path.join(DIR_PATH, "res_iaf_at.pickle"),
#     mdl_class=IAVAE,
#     dataset=dataset,
#     mdl_params=mdl_params["iaf_at"],
#     train_params=train_params["iaf"],
#     train_fn_params=train_fn_params["iaf"],
#     sizes=SIZES,
#     n_trainings=N_TRAININGS,
#     n_picks=N_PICKS,
#     n_samples=500,
#     label_a=0,
#     label_b=1
# ).assign(algorithm="IAF")

## FDR / Power Control and PR Curves

In [None]:
def train_model(
    mdl_class, dataset, mdl_params: dict, train_params: dict, train_fn_params: dict
):
    """

    :param mdl_class: Class of algorithm
    :param dataset: Dataset
    :param mdl_params:
    :param train_params:
    :param train_fn_params:
    :return:
    """
    my_vae = mdl_class(dataset.nb_genes, n_batch=dataset.n_batches, **mdl_params)
    my_trainer = UnsupervisedTrainer(my_vae, dataset, **train_params)
    print(my_trainer.test_set.data_loader.sampler.indices)
    my_trainer.train(**train_fn_params)
    print(my_trainer.train_losses)
    return my_vae, my_trainer

### FDR and TPR Control

TODO: Comparer flows avec MF pour les mêmes 5 cells et comparer PE FDR a FDR
Montrer que FDR mieux estimer avec flows est super cool

Dans papier, Ok d'utiliser deux decision rules. Dire que PE FDR overconservative ok
Dire investigation futur papier



In [None]:
probas_5 = res_mf.loc[lambda x: (x.experiment == 0) & (x.training == 0) & (x.sample_size==5), "de_proba"].values
probas_100 = res_mf.loc[lambda x: (x.experiment == 0) & (x.training == 0) & (x.sample_size==100), "de_proba"].values

In [None]:
sorted_genes = np.argsort(-probas_5)
sorted_pgs = probas_5[sorted_genes]
cumulative_fdr_5 = (1.0 - sorted_pgs).cumsum() / (1.0 + np.arange(len(sorted_pgs)))

d = (cumulative_fdr_5 <= 5e-2).sum() - 1
print(d, cumulative_fdr_5[d])

fdr_k = []
for k in range(n_genes):
    predictions_5 = np.zeros(n_genes)
    predictions_5[sorted_genes[:(k+1)]] = 1
    fdr = ((~is_significant_de) * predictions_5).sum() / (k+1)
    fdr_k.append(fdr)

In [None]:
sorted_pgs[:0]

In [None]:
sorted_genes = np.argsort(-probas_100)
sorted_genes = np.random.permutation(n_genes)
sorted_pgs = probas_100[sorted_genes]
cumulative_fdr_100 = (1.0 - sorted_pgs).cumsum() / (1.0 + np.arange(len(sorted_pgs)))

d = (cumulative_fdr_100 <= 5e-2).sum() - 1
print(d, cumulative_fdr_100[d])

fdr_k_100 = []
for k in range(n_genes):
    predictions_100 = np.zeros(n_genes)
    predictions_100[sorted_genes[:(k+1)]] = 1
    fdr = ((~is_significant_de) * predictions_100).sum() / (k+1)
    fdr_k_100.append(fdr)

In [None]:
plt.plot(cumulative_fdr_5, label="PE FDR")
plt.plot(fdr_k, label="True 5")
plt.legend()
plt.show()

In [None]:
plt.plot(cumulative_fdr_100, label="PE FDR")
plt.plot(fdr_k_100, label="True 100")
plt.legend()
plt.show()

In [None]:
plt.plot(fdr_k, label="True 5")
plt.plot(fdr_k_100, label="True 100")
plt.legend()
plt.show()

In [None]:
plt.plot(cumulative_fdr_5, label="5")
plt.plot(cumulative_fdr_100, label="100")
plt.legend()
plt.show()

In [None]:
res_mf.loc[lambda x: x.experiment == 0 & x.training == 0]

In [None]:
def fdr_fnr(my_df):
    my_df = my_df.sort_values("gene")
    assert len(my_df) == n_genes
    is_pred_de = predict_de_genes(my_df.de_proba.values, desired_fdr=Q0)
    
    alpha = my_df.de_proba.values[is_pred_de].min()
#     alpha = 0.8
#     is_pred_de = my_df.de_proba.values >= 0.4
    true_fdr = ((1.0 - is_significant_de) * is_pred_de).sum() / is_pred_de.sum()
    n_positives = is_significant_de.sum()
    true_fnr = (is_significant_de * (1.0 - is_pred_de)).sum() / n_positives
    return pd.Series(dict(fdr=true_fdr, fnr=true_fnr, alpha=alpha))


fdr_fnr_mf = (
    res_mf.groupby(["experiment", "training", "sample_size"])
    .apply(fdr_fnr)
    .reset_index()
    .assign(algorithm="MF")
)
fdr_fnr_iaf = (
    res_iaf.groupby(["experiment", "training", "sample_size"])
    .apply(fdr_fnr)
    .reset_index()
    .assign(algorithm="IAF")
)

df = pd.concat([fdr_fnr_mf, fdr_fnr_iaf], ignore_index=True)


fig = px.box(
    df,
    x="sample_size",
    y="fdr",
    color="algorithm",
    title="Control on False Discovery Rate",
)
fig.show()
# iplot(fig, filename="powsimr_fdr_control")

fig = px.box(
    df,
    x="sample_size",
    y="fnr",
    color="algorithm",
    title="Control on False Negative Rate",
)
fig.show()
# iplot(fig, filename="powsimr_power_control")

Alpha comparisons

In [None]:
fdr_fnr_iaf.groupby("sample_size").alpha.mean()

In [None]:
fdr_fnr_mf.groupby("sample_size").alpha.mean()

Other algorithms

In [None]:
##

In [None]:
# ['deseq2', 'edger', 'mast']

def get_fdr_fnr(y_pred, y_true):
    """
        y_pred: (n_sz, n_picks, n_genes) bool predictions
        y_true: (n_genes) gt vals
    """
    n_sz, n_picks, _ = y_pred.shape
    fnrs = np.zeros((n_sz, n_picks))
    fdrs = np.zeros((n_sz, n_picks))
    for sz in range(n_sz):
        for pick in range(n_picks):
            y_pred_it = y_pred[sz, pick, :]
            fnr = ((~y_true) * y_pred_it).sum() / y_pred_it.sum()
            fdr = (y_true * (~y_pred_it)).sum() / y_true.sum()
            fnrs[sz, pick] = fnr
            fdrs[sz, pick] = fdr
    fnrs[np.isnan(fnrs)] = 0.0
    return dict(fnr=fnrs, fdr=fdrs)

print(other_predictions["mast"]['pval'].shape)
print(other_predictions["deseq2"]['pval'].shape)
print(other_predictions["edger"]['pval'].shape)

is_de_mast = other_predictions["mast"]["is_de"]
is_de_deseq2 = other_predictions["deseq2"]["is_de"]
is_de_edger = other_predictions["edger"]["is_de"]
# is_de_edgerr = other_predictions["edger_robust"]["is_de"]


res_mast = get_fdr_fnr(is_de_mast, y_true=is_significant_de)
res_deseq2 = get_fdr_fnr(is_de_deseq2, y_true=is_significant_de)
res_edger = get_fdr_fnr(is_de_edger, y_true=is_significant_de)
# res_edgerr = get_fdr_fnr(is_de_edgerr, y_true=is_significant_de)

In [None]:
preds_mf = res_mf[(res_mf.experiment == 0) & (res_mf.training == 0) & (res_mf.sample_size == 100)]
preds_iaf = res_iaf[(res_iaf.experiment == 0) & (res_iaf.training == 0) & (res_iaf.sample_size == 100)]

# preds_mf = preds_mf.sort_values("de_proba").set_index("gene")
# preds_iaf = preds_iaf.set_index("gene").reindex(index=preds_mf.index)
# preds_iaf[]

preds = pd.concat([preds_mf, preds_iaf], ignore_index=True)
preds.head()

In [None]:
preds_mf = preds_mf.assign(
    de_proba_iaf=preds_iaf.de_proba,
    gene_mean=dataset.X.mean(0),
    is_de=is_significant_de.astype(float),
)

import plotly.figure_factory as ff

fig = ff.create_distplot(
    [preds_mf["de_proba"], preds_mf["de_proba_iaf"]],
    ["de_proba", "de_proba_iaf"],
    bin_size=5e-2,
)
fig.show()

In [None]:
import plotly.figure_factory as ff

fig = ff.create_distplot(
    [preds_mf["de_proba"], preds_mf["de_proba_iaf"]],
    ["de_proba", "de_proba_iaf"],
    bin_size=5e-2,
)
fig.show()

#### Tables

In [None]:
def algos_comparison(my_df, key1, other_keys, key_values="error"):
    vals_key1 = my_df.loc[my_df["algorithm"]==key1, key_values].values
    algo1_is_better = True
    for key2 in other_keys:
        vals_other = my_df.loc[my_df["algorithm"] == key2, key_values].values
        try:
            key1_better = has_lower_mean(vals_key1, vals_other)
        except ValueError:
            key1_better = False
            break
        if not key1_better:
            algo1_is_better = False
            break
    return key1_better


gped = df.groupby("sample_size")
fdr_mf_better = gped.apply(algos_comparison, key1="MF", other_keys=["IAF"], key_values="fdr")
fdr_iaf_better = gped.apply(algos_comparison, key1="IAF", other_keys=["MF"], key_values="fdr")

fnr_mf_better = gped.apply(algos_comparison, key1="MF", other_keys=["IAF"], key_values="fnr")
fnr_iaf_better = gped.apply(algos_comparison, key1="IAF", other_keys=["MF"], key_values="fnr")

In [None]:
res_table = df.groupby(["sample_size", "algorithm"])["fdr", "fnr"].mean().round(3).reset_index()

res_table.loc[res_table["algorithm"] == "MF", "fdr_better"] = fdr_mf_better.values
res_table.loc[res_table["algorithm"] == "IAF", "fdr_better"] = fdr_iaf_better.values
res_table.loc[res_table["algorithm"] == "MF", "fnr_better"] = fnr_mf_better.values
res_table.loc[res_table["algorithm"] == "IAF", "fnr_better"] = fnr_iaf_better.values

res_table.loc[res_table["fdr_better"], "fdr"] = res_table.loc[
    res_table["fdr_better"], "fdr"
].apply(lambda x: "\mathbf{{ {} }}".format(x))

res_table.loc[res_table["fnr_better"], "fnr"] = res_table.loc[
    res_table["fnr_better"], "fnr"
].apply(lambda x: "\mathbf{{ {} }}".format(x))

res_table.loc[:, "fdr"] = res_table.loc[:, "fdr"].apply(lambda x: "$ {} $".format(x))
res_table.loc[:, "fnr"] = res_table.loc[:, "fnr"].apply(lambda x: "$ {} $".format(x))

In [None]:
res_table.pivot(
    index="algorithm", columns="sample_size", values=["fdr", "fnr"]
).T

In [None]:
res_table.loc[lambda x: x["sample_size"].isin([5, 20, 100])].pivot(
    index="algorithm", columns="sample_size", values=["fdr", "fnr"]
).T

In [None]:
print(
    res_table.loc[lambda x: x["sample_size"].isin([5, 20, 100])]
    .pivot(index="algorithm", columns="sample_size", values=["fdr", "fnr"])
    .T
    .to_latex(escape=False)
)

In [None]:
print(res_table.pivot(index="algorithm", columns="sample_size", values="fdr").loc[
    :, [5, 20, 100]
].to_latex(escape=True))

In [None]:
res_table.pivot(index="algorithm", columns="sample_size", values="fnr").loc[:, [5, 20, 100]]

### Understand why issue

Try to understand why FDR not properly estimated with many cells

In [None]:
preds_mf.sample_size.unique()

In [None]:
import plotly.figure_factory as ff

fig = ff.create_distplot(
    [
        res_mf.loc[lambda x: (x.sample_size==5) & (x.training==0),"de_proba"], 
        res_iaf.loc[lambda x: (x.sample_size==5) & (x.training==0),"de_proba"]
    ],
    ["de_proba", "de_proba_iaf"],
    bin_size=5e-2,
)
fig.show()

In [None]:
import plotly.figure_factory as ff

fig = ff.create_distplot(
    [
        preds_mf.loc[lambda x: x.sample_size==100,"de_proba"], 
        preds_mf.loc[lambda x: x.sample_size==100,"de_proba_iaf"]
    ],
    ["de_proba", "de_proba_iaf"],
    bin_size=5e-2,
)
fig.show()

### Confusion Matrices

In [None]:
# trains_res = all_fdrs.mean(axis=1)
# print(trains_res.mean(), trains_res.std())

In [None]:
# from sklearn.metrics import confusion_matrix

# y_preds_1d = y_preds.reshape((-1, dataset.nb_genes))
# n_exps = len(y_preds_1d)
# confs = np.zeros((n_exps, 2, 2))
# for i in range(n_exps):
#     confs[i, :, :] = confusion_matrix(is_significant_de, y_preds_1d[i, :])

In [None]:
# confusion_matrix(is_significant_de, y_preds_1d[0, :])

# confs_mean = confs.mean(0)
# confs_mean

# fig = ff.create_annotated_heatmap(
#     z=confs_mean, x=["Pred Negative", "Pred Positive"], y=["GT Negative", "GT Positive"]
# )
# fig.update({"layout": dict(title="Confusion Matrix")})

# py.iplot(fig)

### PR Curves

#### PR Curve

In [None]:
selected_training = 2

preds_md = res_mf.loc[
    lambda x: (x.experiment == 0) & (x.training == selected_training) & (x.sample_size == 100)
].sort_values("gene")["de_proba"]

preds_iaf = res_iaf.loc[
    lambda x: (x.experiment == 0) & (x.training == selected_training) & (x.sample_size == 100)
].sort_values("gene")["de_proba"]

preds_iafk5 = res_iafk5.loc[
    lambda x: (x.experiment == 0) & (x.training == selected_training) & (x.sample_size == 100)
].sort_values("gene")["de_proba"]

In [None]:
from sklearn.metrics import precision_recall_curve

preds_deseq2 = 1.0 - other_predictions['deseq2']['pval'][-1, 0, :]
preds_edger = 1.0 - other_predictions['edger']['pval'][-1, 0, :]
preds_mast = 1.0 - other_predictions['mast']['pval'][-1, 0, :]

# preds_deseq2 = 1.0 - other_predictions['deseq2']['pval'][:]
# preds_edger = 1.0 - other_predictions['edger']['pval'][:]
# preds_mast = 1.0 - other_predictions['mast']['pval'][:]

In [None]:
print(np.isnan(preds_md).mean())
print(np.isnan(preds_iaf).mean())
print(np.isnan(preds_deseq2).mean())
print(np.isnan(preds_deseq2).mean())
print(np.isnan(preds_edger).mean())
print(np.isnan(preds_mast).mean())

In [None]:
from sklearn.metrics import precision_recall_curve, average_precision_score

def plot_pr(fig, preds, y_true, name):
    average_precision = average_precision_score(y_true, preds)
    preds[np.isnan(preds)] = np.min(preds[~np.isnan(preds)])
    precs, recs, _ = precision_recall_curve(y_true=y_true, probas_pred=preds)
    fig.add_trace(
        go.Scatter(
            x=recs,
            y=precs,
            name=name+'@AP: {0:0.2f}'.format(average_precision)
        )
    )
    return
layout = go.Layout(
    title='Precision Recall Curves',
    xaxis=dict(title='Recall'),
    yaxis=dict(title='Precision'),
    width=800,
    height=600,
)
fig = go.Figure(layout=layout)
plot_pr(fig=fig, preds=preds_md, y_true=is_significant_de, name='MF')
plot_pr(fig=fig, preds=preds_iaf, y_true=is_significant_de, name='IAF')
plot_pr(fig=fig, preds=preds_iafk5, y_true=is_significant_de, name='IAFK5')

plot_pr(fig=fig, preds=preds_deseq2, y_true=is_significant_de, name='DESeq2')
plot_pr(fig=fig, preds=preds_edger, y_true=is_significant_de, name='EdgeR')
# plot_pr(fig=fig, preds=preds_edgerr, y_true=is_significant_de, name='EdgeR Robust')
plot_pr(fig=fig, preds=preds_mast, y_true=is_significant_de, name='MAST')

# fig.show()
iplot(fig, filename="lognormal_pr_curves2", sharing="private")

#### MAP

In [None]:
def do_ap(my_df):
    my_df = my_df.sort_values("gene")
    average_precision = average_precision_score(is_significant_de, my_df.de_proba)
    return pd.Series(dict(AP=average_precision))


ap_mf = (
    res_mf.groupby(["experiment", "training", "sample_size"])
    .apply(do_ap)
    .reset_index()
    .assign(algorithm="MF")
)
ap_iaf = (
    res_iaf.groupby(["experiment", "training", "sample_size"])
    .apply(do_ap)
    .reset_index()
    .assign(algorithm="IAF")
)

ap_iafk5 = (
    res_iafk5.groupby(["experiment", "training", "sample_size"])
    .apply(do_ap)
    .reset_index()
    .assign(algorithm="IAF K5")
)

all_ap = pd.concat([ap_mf, ap_iaf, ap_iafk5], ignore_index=True)

In [None]:
px.box(all_ap, x="sample_size", y="AP", color="algorithm")

In [None]:
# all_ap.groupby(["algorithm", "sample_size"]).agg(dict(AP=["mean", "std"]))


## Diagonal Curve

In [None]:
lfc_gt = -(lfcs[:, 1] - lfcs[:, 0])

In [None]:
res_mf.head()

In [None]:
selected_training = 0
subsample_genes = np.sort(np.random.permutation(n_genes)[:150])

lfcs_mf = (
    res_mf.loc[
        lambda x: (x.experiment == 0)
        & (x.training == selected_training)
        & (x.sample_size == 100)
        & (x.gene.isin(subsample_genes))
    ]
    .sort_values("gene")[
        ["lfc_mean", "hdi99_low", "hdi99_high", "hdi64_low", "hdi64_high", "algorithm"]
    ]
    .assign(
        #         err_minus=lambda x: x.lfc_mean - x.hdi99_low,
        #         err_pos=lambda x: x.hdi99_high - x.lfc_mean,
        err_minus=lambda x: x.lfc_mean - x.hdi64_low,
        err_pos=lambda x: x.hdi64_high - x.lfc_mean,
        lfc_gt=lfc_gt[subsample_genes],
    )
)

lfcs_ia = (
    res_iaf.loc[
        lambda x: (x.experiment == 0)
        & (x.training == selected_training)
        & (x.sample_size == 100)
        & (x.gene.isin(subsample_genes))
    ]
    .sort_values("gene")[
        ["lfc_mean", "hdi99_low", "hdi99_high", "hdi64_low", "hdi64_high", "algorithm"]
    ]
    .assign(
        #         err_minus=lambda x: x.lfc_mean - x.hdi99_low,
        #         err_pos=lambda x: x.hdi99_high - x.lfc_mean,
        err_minus=lambda x: x.lfc_mean - x.hdi64_low,
        err_pos=lambda x: x.hdi64_high - x.lfc_mean,
        lfc_gt=lfc_gt[subsample_genes],
    )
)

lfcs_iak5 = (
    res_iafk5.loc[
        lambda x: (x.experiment == 0)
        & (x.training == selected_training)
        & (x.sample_size == 100)
        & (x.gene.isin(subsample_genes))
    ]
    .sort_values("gene")[
        ["lfc_mean", "hdi99_low", "hdi99_high", "hdi64_low", "hdi64_high", "algorithm"]
    ]
    .assign(
        err_minus=lambda x: x.lfc_mean - x.hdi64_low,
        err_pos=lambda x: x.hdi64_high - x.lfc_mean,
        lfc_gt=lfc_gt[subsample_genes],
    )
)


all_lfcs = pd.concat([lfcs_mf, lfcs_ia, lfcs_iak5], ignore_index=True)

In [None]:
fig = px.scatter(
    all_lfcs,
    x="lfc_gt",
    y="lfc_mean",
    color="algorithm",
    error_y="err_pos",
    error_y_minus="err_minus",
)

fig.add_trace(
    go.Scatter(
        x=[-3, 3],
        y=[-3, 3],
        mode="lines",
        line=dict(color="black", width=4, dash="dash"),
        showlegend=False
    )
)

# fig.show()
iplot(fig, sharing="private", filename="logpoisson_diagonal")

In [None]:
# 5 against 100

lfcs_a = (
    res_iaf.loc[
        lambda x: (x.experiment == 0)
        & (x.training == selected_training)
        & (x.sample_size == 5)
        & (x.gene.isin(subsample_genes))
    ]
    .sort_values("gene")[
        ["lfc_mean", "hdi99_low", "hdi99_high", "hdi64_low", "hdi64_high", "algorithm"]
    ]
    .assign(
        err_minus=lambda x: x.lfc_mean - x.hdi64_low,
        err_pos=lambda x: x.hdi64_high - x.lfc_mean,
        lfc_gt=lfc_gt[subsample_genes],
        legend="5"
    )
)

lfcs_b= (
    res_iaf.loc[
        lambda x: (x.experiment == 0)
        & (x.training == selected_training)
        & (x.sample_size == 100)
        & (x.gene.isin(subsample_genes))
    ]
    .sort_values("gene")[
        ["lfc_mean", "hdi99_low", "hdi99_high", "hdi64_low", "hdi64_high", "algorithm"]
    ]
    .assign(
        err_minus=lambda x: x.lfc_mean - x.hdi64_low,
        err_pos=lambda x: x.hdi64_high - x.lfc_mean,
        lfc_gt=lfc_gt[subsample_genes],
        legend="100"
    )
)

all_lfcs = pd.concat([lfcs_a, lfcs_b], ignore_index=True)

fig = px.scatter(
    all_lfcs,
    x="lfc_gt",
    y="lfc_mean",
    color="legend",
    error_y="err_pos",
    error_y_minus="err_minus",
)

fig.add_trace(
    go.Scatter(
        x=[-3, 3],
        y=[-3, 3],
        mode="lines",
        line=dict(color="black", width=4, dash="dash"),
        showlegend=False
    )
)

fig.show()

### Counts

In [None]:
CONFIDENCES = [50, 75, 95, 64, 99]

In [None]:
def frac_inside(my_df):
    index = []
    values = []
    for confidence in [50, 75, 95, 64, 99]:
        is_in_hdi = (lfc_gt <= my_df["hdi{}_high".format(confidence)]) & (
            lfc_gt >= my_df["hdi{}_low".format(confidence)]
        )
        values.append((confidence / 100.0 - is_in_hdi.mean()) ** 2)
        index.append("error{}".format(confidence))
    return pd.Series(values, index=index)


errs_mf = (
    res_mf.groupby(by=["experiment", "sample_size", "training"])
    .apply(frac_inside)
    .reset_index()
).assign(algorithm="Mean Field")

errs_iaf = (
    res_iaf.groupby(by=["experiment", "sample_size", "training"])
    .apply(frac_inside)
    .reset_index()
).assign(algorithm="IAF")

display(errs_mf)
display(errs_iaf)

In [None]:
def scoring(my_df):
    errors = ["error{}".format(err) for err in CONFIDENCES]
    err_scores = my_df.loc[:, errors].sum(1)
    err_scores_mf = err_scores[my_df["algorithm"] == "Mean Field"].values
    err_scores_iaf = err_scores[my_df["algorithm"] == "IAF"].values
    
    disp_mf = str(err_scores_mf.mean().round(3))
    disp_iaf = str(err_scores_iaf.mean().round(3))
#     print(err_scores_mf)
    if has_lower_mean(err_scores_mf, err_scores_iaf):
        disp_mf = "\mathbf{{ {} }}".format(disp_mf)
    if has_lower_mean(err_scores_iaf, err_scores_mf):
        disp_iaf = "\mathbf{{ {} }}".format(disp_iaf)
    
    return pd.Series(
        dict(
            IAF=disp_mf, 
            MF=disp_iaf
        )
    )
    
    
errs = pd.concat([errs_mf, errs_iaf], ignore_index=True)
errs.groupby("sample_size").apply(scoring)


## Study of LFC errors

In [None]:
def compute_l2_err(diff):
    res = 0.5 * (diff ** 2) ** (0.5)
    res = np.nanmean(res, axis=-1)
    return res

def l2_err_competitor(vals: np.ndarray, other: np.ndarray = None):
    vals[np.isnan(vals)] = 0.0
    if other is None:
        diff = vals
    else:
        diff = vals - other
    res = compute_l2_err(diff)
    assert res.shape == (N_SIZES, N_PICKS)
    data = []
    for (size_ix, size) in enumerate(SIZES):
        for pick in range(N_PICKS):
            data.append(dict(experiment=pick, training=0, sample_size=size, error=res[size_ix, pick]))
    return pd.DataFrame(data)

lfcs_errs_deseq2 = l2_err_competitor(other_predictions["deseq2"]["lfc"], other=lfc_gt).assign(algorithm="DESeq2")
lfcs_errs_edger = l2_err_competitor(other_predictions["edger"]["lfc"], other=lfc_gt).assign(algorithm="EdgeR")
lfcs_errs_mast = l2_err_competitor(other_predictions["mast"]["lfc"], other=lfc_gt).assign(algorithm="MAST")

In [None]:
lfcs_errs_mast

In [None]:
def pd_l2_err(my_df):
    diff = my_df.sort_values("gene")["lfc_mean"] - lfc_gt
    error = 0.5 * (diff ** 2) ** (0.5)
    error = np.nanmean(error)
    return pd.Series(dict(error=error))

lfcs_errs_mf = (
    res_mf
    .groupby(["experiment", "sample_size", "training", "algorithm"])
    .apply(pd_l2_err)
    .reset_index()
)

lfcs_errs_iaf = (
    res_iaf
    .groupby(["experiment", "sample_size", "training", "algorithm"])
    .apply(pd_l2_err)
    .reset_index()
)

In [None]:
all_errs = pd.concat([
    lfcs_errs_mf,
    lfcs_errs_iaf,
    lfcs_errs_deseq2,
    lfcs_errs_edger,
    lfcs_errs_mast,
], ignore_index=True)

px.box(all_errs, x="sample_size", y="error", color="algorithm")

### Tables

In [None]:
def algos_comparison(my_df, key1, other_keys):
    vals_key1 = my_df.loc[my_df["algorithm"]==key1, "error"].values
    algo1_is_better = True
    for key2 in other_keys:
        vals_other = my_df.loc[my_df["algorithm"] == key2, "error"].values
        key1_better = has_lower_mean(vals_key1, vals_other)
        if not key1_better:
            algo1_is_better = False
            break
    return key1_better

gped = all_errs.groupby("sample_size")
mf_or_iaf_better = (
    gped.apply(algos_comparison, key1="MF", other_keys=["DESeq2", "EdgeR", "MAST"]) &
    gped.apply(algos_comparison, key1="IAF", other_keys=["DESeq2", "EdgeR", "MAST"])
)
mf_better = gped.apply(algos_comparison, key1="MF", other_keys=["IAF", "DESeq2", "EdgeR", "MAST"])
iaf_better = gped.apply(algos_comparison, key1="IAF", other_keys=["MF", "DESeq2", "EdgeR", "MAST"])

In [None]:
res_table = (
    all_errs.groupby(["sample_size", "algorithm"])
    .error.agg(dict(err_mean="mean", err_std="std"))
    .reset_index()
    .assign(
        displayed=lambda x: x.apply(
#             lambda y: "{:.3f} \pm {:.3f}".format(y.err_mean, y.err_std), axis=1
            lambda y: "{:.3f}".format(y.err_mean), axis=1

        ),
        is_better=False,
        one_of_best=False,
    )
)
res_table.loc[res_table["algorithm"] == "MF", "is_better"] = mf_better.values
res_table.loc[res_table["algorithm"] == "IAF", "is_better"] = iaf_better.values
res_table.loc[res_table["algorithm"] == "IAF", "one_of_best"] = mf_or_iaf_better.values
res_table.loc[res_table["algorithm"] == "MF", "one_of_best"] = mf_or_iaf_better.values


res_table.loc[lambda x: x.one_of_best, "displayed"] = (
    res_table.loc[lambda x: x.one_of_best, "displayed"] + "^*"
)
res_table.loc[lambda x: x.is_better, "displayed"] = res_table.loc[
    lambda x: x.is_better, "displayed"
].apply(lambda x: "\mathbf{{ {} }}".format(x))

res_table.loc[:, "displayed"] = res_table.loc[:, "displayed"].apply(lambda x: "$ {} $".format(x)) 

In [None]:
res_table.pivot(index="algorithm", columns="sample_size", values="displayed").loc[
    ["DESeq2", "EdgeR", "MAST", "MF", "IAF"],
#     :
    [5, 20, 100]
]

In [None]:
print(
    res_table.pivot(index="algorithm", columns="sample_size", values="displayed")
    .loc[
        ["DESeq2", "EdgeR", "MAST", "MF", "IAF"],
        #     :
        [5, 20, 100],
    ]
    .to_latex(escape=False)
)

In [None]:
mdl_params.keys()

Test to see if poorer performance of models for important number of cells is linked to mixing factors

## Coverage

In [None]:
res_mf.info()

In [None]:
print((res_mf.hdi64_high - res_mf.hdi64_low).mean())
print((res_iaf.hdi64_high - res_iaf.hdi64_low).mean())

In [None]:
def get_coverage(my_df, low_key="hdi64_low", high_key="hdi64_high"):
    my_df = my_df.sort_values("gene")
    assert len(my_df) == n_genes
    gene_is_covered = (lfc_gt >= my_df[low_key]) & (lfc_gt <= my_df[high_key])
#     mean_cov = (gene_is_covered / (my_df[high_key] - my_df[low_key])).mean()
    mean_cov = (gene_is_covered).mean()
    return pd.Series(dict(mean_cov=mean_cov))
    

coverage_mf = (
    res_mf.groupby(["experiment", "training", "sample_size", "algorithm"])
    .apply(get_coverage, low_key="hdi64_low", high_key="hdi64_high")
    .reset_index()
#     .groupby("sample_size")
#     .agg(dict(mean_cov=["mean", "std"]))
)
coverage_iaf = (
    res_iaf.groupby(["experiment", "training", "sample_size", "algorithm"])
    .apply(get_coverage, low_key="hdi64_low", high_key="hdi64_high")
    .reset_index()
#     .groupby("sample_size")
#     .agg(dict(mean_cov=["mean", "std"]))
)

all_coverages = pd.concat([coverage_mf, coverage_iaf], ignore_index=True)

In [None]:
all_coverages

In [None]:
px.box(all_coverages, x="sample_size", y="mean_cov", color="algorithm")

# DEBUG

## FNR inconsistency

Remarks:

Problem linked to the fact that when you condition on less samples, the posterior LFC is sharper

I see two solutions:
- Voting stategy when you have many samples
- Modification of the decision rule
- Use posterior predicted
- dataset is too easy ==> Add complexity
- base decision making on credible intervals as previously
- 3 ways classification: Upregulated, downregulated, non DE

In [None]:
from scvi_utils import train_model

In [None]:
mdl, trainer = train_model(
#     mdl_class=VAE,
#     dataset=dataset,
#     mdl_params=mdl_params["mf"],
#     train_params=train_params["mf"],
#     train_fn_params=train_fn_params["mf"],
    mdl_class=IAVAE,
    dataset=dataset,
    mdl_params=mdl_params["iaf"],
    train_params=train_params["iaf"],
    train_fn_params=train_fn_params["iaf"],

)

In [None]:
gene_idx = 200
true_lfc = lfc_gt[gene_idx]
print(true_lfc)

### 100

In [None]:
scales_a.shape

In [None]:
# Option 1
# z, labels, scales = trainer.test_set.get_latents(
#     n_samples=50, other=True, device="cpu"
# )

# labels = labels.squeeze()
# where_a = np.where(labels == 0)[0]
# where_b = np.where(labels == 1)[0]
# where_a = where_a[np.random.choice(len(where_a), size=100)]
# where_b = where_b[np.random.choice(len(where_b), size=100)]
# scales_a = scales[:, where_a, :].reshape((-1, dataset.nb_genes)).numpy()
# scales_b = scales[:, where_b, :].reshape((-1, dataset.nb_genes)).numpy()
# scales_ab, scales_bb = demultiply(arr1=scales_a, arr2=scales_b, factor=3)
# lfc = np.log2(scales_ab) - np.log2(scales_bb)

# de_probas = (np.abs(lfc) >= 0.5).mean(0)
# is_pred_de = predict_de_genes(de_probas, desired_fdr=Q0)
# alpha = is_pred_de[is_pred_de].min()

# true_fdr = ((1.0 - is_significant_de) * is_pred_de).sum() / is_pred_de.sum()
# n_positives = is_significant_de.sum()
# true_fnr = (is_significant_de * (1.0 - is_pred_de)).sum() / n_positives
# print(true_fdr, true_fnr)

# plt.hist(lfc[:, gene_idx])
# plt.axvline(x=true_lfc, color="black")
# plt.title(de_probas[gene_idx])

In [None]:
probas_thresh

In [None]:
z, labels, scales = trainer.test_set.get_latents(
    n_samples=500, other=True, device="cpu"
)

In [None]:
# Option 2
labels = labels.squeeze()
where_a = np.where(labels == 0)[0]
where_b = np.where(labels == 1)[0]
where_a = where_a[np.random.choice(len(where_a), size=100)]
where_b = where_b[np.random.choice(len(where_b), size=100)]
scales_a = scales[:, where_a, :].reshape((-1, dataset.nb_genes)).numpy()
scales_b = scales[:, where_b, :].reshape((-1, dataset.nb_genes)).numpy()
scales_ab, scales_bb = demultiply(arr1=scales_a, arr2=scales_b, factor=3)
lfc = np.log2(scales_ab) - np.log2(scales_bb)

de_probas = (np.abs(lfc) >= 0.5).mean(0)
de_probas_std = (np.abs(lfc) >= 0.5).std(0)


# is_pred_de = predict_de_genes(de_probas, desired_fdr=Q0)
is_pred_de = de_probas >= 0.5
# probas_thresh = -np.sort(-de_probas)[215]
# is_pred_de = de_probas >= probas_thresh

alpha = is_pred_de[is_pred_de].min()

true_fdr = ((1.0 - is_significant_de) * is_pred_de).sum() / is_pred_de.sum()
n_positives = is_significant_de.sum()
true_fnr = (is_significant_de * (1.0 - is_pred_de)).sum() / n_positives
print(true_fdr, true_fnr)

plt.hist(lfc[:, gene_idx])
plt.axvline(x=true_lfc, color="black")
plt.title(de_probas[gene_idx])

Résultats pour decision >= 0.5

**100 cells**
0.23706896551724138 0.019390581717451522

0.2886178861788618 0.030470914127423823



**5 cellules**
0.42448979591836733 0.2188365650969529

0.12435233160621761 0.06371191135734072


Decision making based on credible intervals

In [None]:
low, high = np.percentile(lfc, q=[2.5, 97.5], axis=0)

is_pred_de = (np.abs(low) >= 0.5) & (np.abs(high) >= 0.5) & (low * high >= 0.0)

true_fdr = ((1.0 - is_significant_de) * is_pred_de).sum() / is_pred_de.sum()
# n_positives = is_significant_de.sum()
true_fnr = (is_significant_de * (1.0 - is_pred_de)).sum() / n_positives
print(true_fdr, true_fnr)

### 5

In [None]:
# # Option 1
# z, labels, scales = trainer.test_set.get_latents(
#     n_samples=50, other=True, device="cpu"
# )

# labels = labels.squeeze()
# where_a = np.where(labels == 0)[0]
# where_b = np.where(labels == 1)[0]
# where_a = where_a[np.random.choice(len(where_a), size=5)]
# where_b = where_b[np.random.choice(len(where_b), size=5)]
# scales_a = scales[:, where_a, :].reshape((-1, dataset.nb_genes)).numpy()
# scales_b = scales[:, where_b, :].reshape((-1, dataset.nb_genes)).numpy()
# scales_ab, scales_bb = demultiply(arr1=scales_a, arr2=scales_b, factor=3)
# lfc = np.log2(scales_ab) - np.log2(scales_bb)
# de_probas_small = (np.abs(lfc) >= 0.5).mean(0)


# is_pred_de = predict_de_genes(de_probas_small, desired_fdr=Q0)
# alpha = is_pred_de[is_pred_de].min()
# true_fdr = ((1.0 - is_significant_de) * is_pred_de).sum() / is_pred_de.sum()
# n_positives = is_significant_de.sum()
# true_fnr = (is_significant_de * (1.0 - is_pred_de)).sum() / n_positives
# print(true_fdr, true_fnr)

# plt.hist(lfc[:, gene_idx])
# plt.axvline(x=true_lfc, color="black")
# plt.title(de_probas_small[gene_idx])

In [None]:
# Option 1
# z, labels, scales = trainer.test_set.get_latents(
#     n_samples=1000, other=True, device="cpu"
# )

labels = labels.squeeze()
where_a = np.where(labels == 0)[0]
where_b = np.where(labels == 1)[0]
where_a = where_a[np.random.choice(len(where_a), size=5)]
where_b = where_b[np.random.choice(len(where_b), size=5)]
scales_a = scales[:, where_a, :].reshape((-1, dataset.nb_genes)).numpy()
scales_b = scales[:, where_b, :].reshape((-1, dataset.nb_genes)).numpy()
scales_ab, scales_bb = demultiply(arr1=scales_a, arr2=scales_b, factor=3)
lfc = np.log2(scales_ab) - np.log2(scales_bb)
de_probas_small = (np.abs(lfc) >= 0.5).mean(0)
de_probas_small_std = (np.abs(lfc) >= 0.5).std(0)

# is_pred_de_small = predict_de_genes(de_probas_small, desired_fdr=Q0)
is_pred_de_small = de_probas_small >= 0.5
alpha = is_pred_de_small[is_pred_de_small].min()
true_fdr = ((1.0 - is_significant_de) * is_pred_de_small).sum() / is_pred_de_small.sum()
n_positives = is_significant_de.sum()
true_fnr = (is_significant_de * (1.0 - is_pred_de_small)).sum() / n_positives
print(true_fdr, true_fnr)

plt.hist(lfc[:, gene_idx])
plt.axvline(x=true_lfc, color="black")
plt.title(de_probas_small[gene_idx])

In [None]:
low, high = np.percentile(lfc, q=[2.5, 97.5], axis=0)

is_pred_de = (np.abs(low) >= 0.5) & (np.abs(high) >= 0.5) & (low * high >= 0.0)

true_fdr = ((1.0 - is_significant_de) * is_pred_de).sum() / is_pred_de.sum()
n_positives = is_significant_de.sum()
true_fnr = (is_significant_de * (1.0 - is_pred_de)).sum() / n_positives
print(true_fdr, true_fnr)

### 1

In [None]:
# Option 1
z, labels, scales = trainer.test_set.get_latents(
    n_samples=2000, other=True, device="cpu"
)

labels = labels.squeeze()
where_a = np.where(labels == 0)[0]
where_b = np.where(labels == 1)[0]
where_a = where_a[np.random.choice(len(where_a), size=1)]
where_b = where_b[np.random.choice(len(where_b), size=1)]
scales_a = scales[:, where_a, :].reshape((-1, dataset.nb_genes)).numpy()
scales_b = scales[:, where_b, :].reshape((-1, dataset.nb_genes)).numpy()
scales_ab, scales_bb = demultiply(arr1=scales_a, arr2=scales_b, factor=10)
lfc = np.log2(scales_ab) - np.log2(scales_bb)
de_probas_small = (np.abs(lfc) >= 0.5).mean(0)
de_probas_small_std = (np.abs(lfc) >= 0.5).std(0)

is_pred_de_small = predict_de_genes(de_probas_small, desired_fdr=Q0)
# is_pred_de_small = de_probas_small >= 0.5
alpha = is_pred_de_small[is_pred_de_small].min()
true_fdr = ((1.0 - is_significant_de) * is_pred_de_small).sum() / is_pred_de_small.sum()
n_positives = is_significant_de.sum()
true_fnr = (is_significant_de * (1.0 - is_pred_de_small)).sum() / n_positives
print(true_fdr, true_fnr)

plt.hist(lfc[:, gene_idx])
plt.axvline(x=true_lfc, color="black")
plt.title(de_probas_small[gene_idx])

### Analysis

In [None]:
print(is_pred_de.sum())
print(is_pred_de_small.sum())

In [None]:
plt.title("Empirical distribution of predicted probabilities of being DE")

plt.hist(de_probas_small, alpha=0.25, label="5 cells")
plt.hist(de_probas, alpha=0.25, label="100 cells")

plt.legend()

In [None]:
plt.hist(de_probas_std, alpha=0.25)
plt.hist(de_probas_small_std, alpha=0.25)

### Voting

In [None]:
n_votes = 500

In [None]:
from tqdm import tqdm

In [None]:
z, labels, scales = trainer.test_set.get_latents(
    n_samples=2000, other=True, device="cpu"
)

labels = labels.squeeze()
where_a = np.where(labels == 0)[0]
where_b = np.where(labels == 1)[0]
where_a = where_a[np.random.choice(len(where_a), size=100)]
where_b = where_b[np.random.choice(len(where_b), size=100)]
scales_a_all = scales[:, where_a, :].numpy()
scales_b_all = scales[:, where_b, :].numpy()

all_votes = np.zeros((n_votes, n_genes))
for vote in tqdm(range(n_votes)):
    where_a = np.random.choice(100, size=1)
    where_b = np.random.choice(100, size=1)
    scales_a = scales_a_all[:, where_a, :].reshape((-1, n_genes))
    scales_b = scales_b_all[:, where_b, :].reshape((-1, n_genes))

    scales_ab, scales_bb = demultiply(arr1=scales_a, arr2=scales_b, factor=3)
    lfc = np.log2(scales_ab) - np.log2(scales_bb)

    de_probas = (np.abs(lfc) >= 0.5).mean(0)
    de_probas_std = (np.abs(lfc) >= 0.5).std(0)


    is_pred_de = predict_de_genes(de_probas, desired_fdr=Q0)
    
    all_votes[vote, :] = is_pred_de
    
#     de_probas_small_std = (np.abs(lfc) >= 0.5).std(0)
#     alpha = is_pred_de[is_pred_de].min()

#     true_fdr = ((1.0 - is_significant_de) * is_pred_de).sum() / is_pred_de.sum()
#     n_positives = is_significant_de.sum()
#     true_fnr = (is_significant_de * (1.0 - is_pred_de)).sum() / n_positives
#     print(true_fdr, true_fnr)

# plt.hist(lfc[:, gene_idx])
# plt.axvline(x=true_lfc, color="black")
# plt.title(de_probas[gene_idx])

In [None]:
is_pred_de_vote = all_votes.mean(0) >= 0.5

true_fdr = ((1.0 - is_significant_de) * is_pred_de_vote).sum() / is_pred_de_vote.sum()
n_positives = is_significant_de.sum()
true_fnr = (is_significant_de * (1.0 - is_pred_de_vote)).sum() / n_positives
print(true_fdr, true_fnr)

In [None]:
plt.hist(all_votes.mean(0))

### Credible intervals

In [None]:
res_iaf.keys()

In [None]:
def fnr_fdr(my_df):
    my_is_pred_de = my_df.is_pred_de
    true_fdr = ((1.0 - is_significant_de) * my_is_pred_de).sum() / my_is_pred_de.sum()
    n_positives = is_significant_de.sum()
    true_fnr = (is_significant_de * (1.0 - my_is_pred_de)).sum() / n_positives
    return pd.Series(dict(fdr=true_fdr, fnr=true_fnr))

(
    res_iaf
    .assign(is_pred_de=lambda x: (x.hdi64_low.abs() >= 0.5) 
            &  (x.hdi64_high.abs() >= 0.5) 
            & (x.hdi64_low * x.hdi64_high >= 0.0))
    .groupby(["training", "algorithm", "sample_size", "experiment"])
    .apply(fnr_fdr)
    .reset_index()
    .groupby(["sample_size"])
    .agg(dict(fdr=["mean", "std"], fnr=["mean", "std"]))
)