This is a temporary notebook for shap value analysis and plots.

In [10]:
import os
import sys

proj_dir = "/home/scai/PhenPred"
if not os.path.exists(proj_dir):
    proj_dir = "/Users/emanuel/Projects/PhenPred"
sys.path.extend([proj_dir])

import json
import PhenPred
import argparse
import pandas as pd
from PhenPred.vae import plot_folder
from PhenPred.vae.Hypers import Hypers
from PhenPred.vae.Train import CLinesTrain
from PhenPred.vae.DatasetDepMap23Q2 import CLinesDatasetDepMap23Q2
from PhenPred.vae.DatasetMOFA import CLinesDatasetMOFA
from PhenPred.vae.DatasetMOVE import CLinesDatasetMOVE
from PhenPred.vae.DatasetJAMIE import CLinesDatasetJAMIE
from PhenPred.vae.DatasetIClusterPlus import CLinesDatasetIClusterPlus
from PhenPred.vae.DatasetMoCluster import CLinesDatasetMoCluster
from PhenPred.vae.DatasetMixOmics import CLinesDatasetMixOmics
from PhenPred.vae.DatasetSCVAEIT import CLinesDatasetSCVAEIT
from PhenPred.Utils import two_vars_correlation
from sklearn.discriminant_analysis import StandardScaler
from sklearn.metrics import calinski_harabasz_score, davies_bouldin_score


pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", 100)

In [2]:
from scipy.stats import ttest_ind, ttest_rel, wilcoxon
from scipy.stats import shapiro

In [3]:
import warnings

warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

plt.rcParams["font.family"] = "Arial"
plt.rcParams["font.size"] = 4
plt.rcParams["axes.linewidth"] = 0.25
plt.rcParams["figure.figsize"] = (2.5, 2.5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["ps.fonttype"] = 42
plt.rcParams["figure.dpi"] = 200
plt.rcParams["axes.linewidth"] = 0.25
plt.rcParams["legend.fontsize"] = 4

sns.set(style="ticks", context="paper", font_scale=1, font="Arial")
sns.set_context(
    "paper",
    rc={
        "axes.linewidth": 0.25,
        "xtick.major.size": 2,
        "xtick.major.width": 0.25,
        "ytick.major.size": 2,
        "ytick.major.width": 0.25,
        "xtick.labelsize": 6,
        "ytick.labelsize": 6,
        "axes.labelsize": 7,
        "legend.fontsize": 6,
        "legend.title_fontsize": 6,
    },
)

import matplotlib.patches as mpatches
import umap

pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", 100)

In [4]:
import shap
import pickle
from tqdm.notebook import tqdm

In [5]:
OMIC_PALLETS = {
    "conditionals": "#4c72b0",
    "copynumber": "#dd8452",
    "drugresponse": "#55a868",
    "metabolomics": "#c44e52",
    "proteomics": "#8172b3",
    "crisprcas9": "#937860",
    "transcriptomics": "#da8bc3",
    "methylation": "#8c8c8c",
}

# Latent comparison

In [13]:
TIMESTAMP = "20240830_110319"

In [9]:
clustering_score_df = pd.read_csv(
    f"./reports/vae/latent/{TIMESTAMP}_clustering_score.csv"
)

In [12]:
_, ax = plt.subplots(1, 1, figsize=(3, 3), dpi=600)
sns.barplot(data=clustering_score_df, x="metric", y="score", ax=ax, hue="model")
PhenPred.save_figure(f"{plot_folder}/latent/{TIMESTAMP}_clustering_score_barplot_2vs7omcis")

In [5]:
TIMESTAMP_2omics = "20240830_110319"
TIMESTAMP_7omics = "20231023_092657"

In [6]:
hyperparameters_7omics = Hypers.read_hyperparameters(timestamp="20231023_092657")
clines_db_7omics = CLinesDatasetDepMap23Q2(
    datasets=hyperparameters_7omics["datasets"],
    labels_names=hyperparameters_7omics["labels"],
    standardize=hyperparameters_7omics["standardize"],
    filter_features=hyperparameters_7omics["filter_features"],
    filtered_encoder_only=hyperparameters_7omics["filtered_encoder_only"],
    feature_miss_rate_thres=hyperparameters_7omics["feature_miss_rate_thres"],
)

train_7omics = CLinesTrain(
    clines_db_7omics,
    hyperparameters_7omics,
    verbose=hyperparameters_7omics["verbose"],
    stratify_cv_by=clines_db_7omics.samples_by_tissue(
        "Haematopoietic and Lymphoid"
    ),
)
train_7omics.run(run_timestamp=hyperparameters_7omics["load_run"])
mosa_7omics_imputed, mosa_7omics_latent = train_7omics.load_vae_reconstructions()
mosa_7omics_predicted, _ = train_7omics.load_vae_reconstructions(mode="all")

# ---- Hyperparameters
{
    "activation_function": "<not serializable>",
    "batch_norm": false,
    "batch_size": 256,
    "contrastive_neg_margin": 0.15,
    "contrastive_pos_margin": 0.85,
    "dataname": "depmap23Q2",
    "datasets": {
        "copynumber": "data/clines//cnv_summary_20230303_matrix.csv",
        "crisprcas9": "data/clines//depmap23Q2/CRISPRGeneEffect.csv",
        "drugresponse": "data/clines//drugresponse.csv",
        "metabolomics": "data/clines//metabolomics.csv",
        "methylation": "data/clines//methylation.csv",
        "proteomics": "data/clines//proteomics.csv",
        "transcriptomics": "data/clines//depmap23Q2/OmicsExpressionGenesExpectedCountProfileVoom.csv"
    },
    "feature_dropout": 0,
    "feature_miss_rate_thres": 0.85,
    "filter_features": [
        "transcriptomics",
        "crisprcas9",
        "methylation"
    ],
    "filtered_encoder_only": true,
    "gmvae_decay_temp": true,
    "gmvae_decay_temp_rate": 0.013862944,
    "gmvae_har

In [7]:
hyperparameters = Hypers.read_hyperparameters(timestamp=TIMESTAMP_2omics)
clines_db = CLinesDatasetDepMap23Q2(
    labels_names=hyperparameters["labels"],
    datasets=hyperparameters["datasets"],
    feature_miss_rate_thres=hyperparameters["feature_miss_rate_thres"],
    standardize=hyperparameters["standardize"],
    filter_features=hyperparameters["filter_features"],
    filtered_encoder_only=hyperparameters["filtered_encoder_only"],
)
train = CLinesTrain(
    clines_db,
    hyperparameters,
    verbose=hyperparameters["verbose"],
    stratify_cv_by=clines_db.samples_by_tissue("Haematopoietic and Lymphoid"),
)

# ---- Hyperparameters
{
    "activation_function": "<not serializable>",
    "batch_norm": false,
    "batch_size": 256,
    "contrastive_neg_margin": 0.15,
    "contrastive_pos_margin": 0.85,
    "dataname": "depmap23Q2",
    "datasets": {
        "drugresponse": "data/clines//drugresponse.csv",
        "transcriptomics": "data/clines//depmap23Q2/OmicsExpressionGenesExpectedCountProfileVoom.csv"
    },
    "feature_dropout": 0,
    "feature_miss_rate_thres": 0.85,
    "filter_features": [
        "transcriptomics",
        "crisprcas9",
        "methylation"
    ],
    "filtered_encoder_only": true,
    "gmvae_decay_temp": true,
    "gmvae_decay_temp_rate": 0.013862944,
    "gmvae_hard_gumbel": 0.7936881144482251,
    "gmvae_hidden_size": 935,
    "gmvae_init_temp": 1.0,
    "gmvae_k": 51,
    "gmvae_min_temp": 0.5,
    "gmvae_views_logits": 726,
    "hidden_dims": [
        0.7
    ],
    "labels": [
        "tissue",
        "mutations",
        "fussions",
        "msi",
        "

In [8]:
train.run(run_timestamp=hyperparameters["load_run"])
vae_imputed, vae_latent = train.load_vae_reconstructions()
vae_predicted, _ = train.load_vae_reconstructions(mode="all")

mofa_imputed, mofa_latent = CLinesDatasetMOFA.load_reconstructions(clines_db)
move_diabetes_imputed, move_diabetes_latent = CLinesDatasetMOVE.load_reconstructions(
    clines_db
)
jamie_imputed, jamie_latent = CLinesDatasetJAMIE.load_reconstructions(clines_db)

_, mixOmics_latent = CLinesDatasetMixOmics.load_reconstructions(clines_db)
_, iClusterPlus_latent = CLinesDatasetIClusterPlus.load_reconstructions(clines_db)
_, moCluster_latent = CLinesDatasetMoCluster.load_reconstructions(clines_db)

# ---- Hyperparameters
{
    "activation_function": "prelu",
    "batch_norm": false,
    "batch_size": 256,
    "contrastive_neg_margin": 0.15,
    "contrastive_pos_margin": 0.85,
    "dataname": "depmap23Q2",
    "datasets": {
        "drugresponse": "/home/scai/PhenPred/data/clines//drugresponse.csv",
        "transcriptomics": "/home/scai/PhenPred/data/clines//depmap23Q2/OmicsExpressionGenesExpectedCountProfileVoom.csv"
    },
    "feature_dropout": 0,
    "feature_miss_rate_thres": 0.85,
    "filter_features": [
        "transcriptomics",
        "crisprcas9",
        "methylation"
    ],
    "filtered_encoder_only": true,
    "gmvae_decay_temp": true,
    "gmvae_decay_temp_rate": 0.013862944,
    "gmvae_hard_gumbel": 0.7936881144482251,
    "gmvae_hidden_size": 935,
    "gmvae_init_temp": 1.0,
    "gmvae_k": 51,
    "gmvae_min_temp": 0.5,
    "gmvae_views_logits": 726,
    "hidden_dims": [
        0.7
    ],
    "labels": [
        "tissue",
        "mutations",
        "fussions

In [9]:
common_cell_lines = list(
    set(vae_latent.index)
    & set(mosa_7omics_latent.index)
)

In [10]:
mosa_7omics_latent = mosa_7omics_latent.loc[common_cell_lines]
vae_latent = vae_latent.loc[common_cell_lines]
mofa_latent["factors"] = mofa_latent["factors"].loc[common_cell_lines]
move_diabetes_latent["factors"] = move_diabetes_latent["factors"].loc[common_cell_lines]
jamie_latent["factors"] = jamie_latent["factors"].loc[common_cell_lines]
mixOmics_latent["factors"] = mixOmics_latent["factors"].loc[common_cell_lines]
iClusterPlus_latent["factors"] = iClusterPlus_latent["factors"].loc[common_cell_lines]
moCluster_latent["factors"] = moCluster_latent["factors"].loc[common_cell_lines]

In [11]:
samplesheet = clines_db.samplesheet["tissue"].fillna("Other tissue")

In [13]:
clustering_score_df = {"model": [], "metric": [], "score": []}
for n, z_joint in [
    ("MOSA (7 omics)", mosa_7omics_latent),
    ("MOSA (2 omics)", vae_latent),
    ("MOFA", mofa_latent["factors"]),
    ("MOVE", move_diabetes_latent["factors"]),
    ("JAMIE", jamie_latent["factors"]),
    ("mixOmics", mixOmics_latent["factors"]),
    ("iClusterPlus", iClusterPlus_latent["factors"]),
    ("moCluster", moCluster_latent["factors"]),
]:
    cluster_labels = samplesheet[z_joint.index]
    clustering_score_df["model"].append(n)
    clustering_score_df["metric"].append("calinski_harabasz")
    clustering_score_df["score"].append(
        calinski_harabasz_score(
            StandardScaler().fit_transform(z_joint), cluster_labels
        )
    )
    clustering_score_df["model"].append(n)
    clustering_score_df["metric"].append("davies_bouldin")
    clustering_score_df["score"].append(
        davies_bouldin_score(
            StandardScaler().fit_transform(z_joint), cluster_labels
        )
    )
clustering_score_df = pd.DataFrame(clustering_score_df)

In [15]:
_, ax = plt.subplots(1, 1, figsize=(3, 3), dpi=600)
sns.barplot(data=clustering_score_df, x="metric", y="score", ax=ax, hue="model")
PhenPred.save_figure(
    f"{plot_folder}/latent/{TIMESTAMP_2omics}_clustering_score_barplot_2vs7omics"
)

# RNA

In [6]:
TIMESTAMP_2omics = "20240830_110319"
TIMESTAMP_7omics = "20231023_092657"

In [7]:
hyperparameters = Hypers.read_hyperparameters(timestamp=TIMESTAMP_7omics)
clines_db = CLinesDatasetDepMap23Q2(
    labels_names=hyperparameters["labels"],
    datasets=hyperparameters["datasets"],
    feature_miss_rate_thres=hyperparameters["feature_miss_rate_thres"],
    standardize=hyperparameters["standardize"],
    filter_features=hyperparameters["filter_features"],
    filtered_encoder_only=hyperparameters["filtered_encoder_only"],
)
train = CLinesTrain(
    clines_db,
    hyperparameters,
    verbose=hyperparameters["verbose"],
    stratify_cv_by=clines_db.samples_by_tissue("Haematopoietic and Lymphoid"),
)

train.run(run_timestamp=hyperparameters["load_run"])
mosa_7omics_imputed, mosa_7omics_latent = train.load_vae_reconstructions()
mosa_7omics_predicted, _ = train.load_vae_reconstructions(mode="all")

# ---- Hyperparameters
{
    "activation_function": "<not serializable>",
    "batch_norm": false,
    "batch_size": 256,
    "contrastive_neg_margin": 0.15,
    "contrastive_pos_margin": 0.85,
    "dataname": "depmap23Q2",
    "datasets": {
        "copynumber": "data/clines//cnv_summary_20230303_matrix.csv",
        "crisprcas9": "data/clines//depmap23Q2/CRISPRGeneEffect.csv",
        "drugresponse": "data/clines//drugresponse.csv",
        "metabolomics": "data/clines//metabolomics.csv",
        "methylation": "data/clines//methylation.csv",
        "proteomics": "data/clines//proteomics.csv",
        "transcriptomics": "data/clines//depmap23Q2/OmicsExpressionGenesExpectedCountProfileVoom.csv"
    },
    "feature_dropout": 0,
    "feature_miss_rate_thres": 0.85,
    "filter_features": [
        "transcriptomics",
        "crisprcas9",
        "methylation"
    ],
    "filtered_encoder_only": true,
    "gmvae_decay_temp": true,
    "gmvae_decay_temp_rate": 0.013862944,
    "gmvae_har

In [8]:
hyperparameters = Hypers.read_hyperparameters(timestamp=TIMESTAMP_2omics)
clines_db = CLinesDatasetDepMap23Q2(
    labels_names=hyperparameters["labels"],
    datasets=hyperparameters["datasets"],
    feature_miss_rate_thres=hyperparameters["feature_miss_rate_thres"],
    standardize=hyperparameters["standardize"],
    filter_features=hyperparameters["filter_features"],
    filtered_encoder_only=hyperparameters["filtered_encoder_only"],
)
train = CLinesTrain(
    clines_db,
    hyperparameters,
    verbose=hyperparameters["verbose"],
    stratify_cv_by=clines_db.samples_by_tissue("Haematopoietic and Lymphoid"),
)

train.run(run_timestamp=hyperparameters["load_run"])

# ---- Hyperparameters
{
    "activation_function": "<not serializable>",
    "batch_norm": false,
    "batch_size": 256,
    "contrastive_neg_margin": 0.15,
    "contrastive_pos_margin": 0.85,
    "dataname": "depmap23Q2",
    "datasets": {
        "drugresponse": "data/clines//drugresponse.csv",
        "transcriptomics": "data/clines//depmap23Q2/OmicsExpressionGenesExpectedCountProfileVoom.csv"
    },
    "feature_dropout": 0,
    "feature_miss_rate_thres": 0.85,
    "filter_features": [
        "transcriptomics",
        "crisprcas9",
        "methylation"
    ],
    "filtered_encoder_only": true,
    "gmvae_decay_temp": true,
    "gmvae_decay_temp_rate": 0.013862944,
    "gmvae_hard_gumbel": 0.7936881144482251,
    "gmvae_hidden_size": 935,
    "gmvae_init_temp": 1.0,
    "gmvae_k": 51,
    "gmvae_min_temp": 0.5,
    "gmvae_views_logits": 726,
    "hidden_dims": [
        0.7
    ],
    "labels": [
        "tissue",
        "mutations",
        "fussions",
        "msi",
        "

DepMap23Q2 | Samples = 1,590 | Transcriptomics = 15,278 (7,193 masked) | Drug response = 810 (0 masked) | Labels = 237


In [11]:
vae_imputed, vae_latent = train.load_vae_reconstructions()
vae_predicted, _ = train.load_vae_reconstructions(mode="all")

mofa_imputed, mofa_latent = CLinesDatasetMOFA.load_reconstructions(clines_db)
move_diabetes_imputed, move_diabetes_latent = (
    CLinesDatasetMOVE.load_reconstructions(clines_db)
)
jamie_imputed, jamie_latent = CLinesDatasetJAMIE.load_reconstructions(clines_db)
scvaeit_imputed, scvaeit_latent = CLinesDatasetSCVAEIT.load_reconstructions(clines_db)

_, mixOmics_latent = CLinesDatasetMixOmics.load_reconstructions(clines_db)
_, iClusterPlus_latent = CLinesDatasetIClusterPlus.load_reconstructions(clines_db)
_, moCluster_latent = CLinesDatasetMoCluster.load_reconstructions(clines_db)

# ---- Hyperparameters
{
    "activation_function": "prelu",
    "batch_norm": false,
    "batch_size": 256,
    "contrastive_neg_margin": 0.15,
    "contrastive_pos_margin": 0.85,
    "dataname": "depmap23Q2",
    "datasets": {
        "drugresponse": "/home/scai/PhenPred/data/clines//drugresponse.csv",
        "transcriptomics": "/home/scai/PhenPred/data/clines//depmap23Q2/OmicsExpressionGenesExpectedCountProfileVoom.csv"
    },
    "feature_dropout": 0,
    "feature_miss_rate_thres": 0.85,
    "filter_features": [
        "transcriptomics",
        "crisprcas9",
        "methylation"
    ],
    "filtered_encoder_only": true,
    "gmvae_decay_temp": true,
    "gmvae_decay_temp_rate": 0.013862944,
    "gmvae_hard_gumbel": 0.7936881144482251,
    "gmvae_hidden_size": 935,
    "gmvae_init_temp": 1.0,
    "gmvae_k": 51,
    "gmvae_min_temp": 0.5,
    "gmvae_views_logits": 726,
    "hidden_dims": [
        0.7
    ],
    "labels": [
        "tissue",
        "mutations",
        "fussions

In [12]:
samples_mgexp = ~clines_db.dfs["transcriptomics"].isnull().all(axis=1)

gexp_gdsc = pd.read_csv(f"./data/clines/transcriptomics.csv", index_col=0).T
gexp_mosa = vae_imputed["transcriptomics"]
gexp_mosa_7omics = mosa_7omics_imputed["transcriptomics"]
gexp_move = move_diabetes_imputed["transcriptomics"]
gexp_jamie = jamie_imputed["transcriptomics"]
gexp_mofa = mofa_imputed["transcriptomics"]
gexp_scvaeit = scvaeit_imputed["transcriptomics"]

gexp_dfs = dict(
    [
        ("MOSA_7omics", gexp_mosa_7omics),
        ("MOSA_2omics", gexp_mosa),
        ("MOFA", gexp_mofa),
        ("MOVE", gexp_move),
        ("JAMIE", gexp_jamie),
        ("scVAEIT", gexp_scvaeit),
    ]
)
samples = set(gexp_gdsc.index).intersection(gexp_mosa.index)
genes = list(set(gexp_gdsc.columns).intersection(gexp_mosa.columns))

In [13]:
gexp_corr_dfs = []
for name in gexp_dfs:
    gexp_corr = pd.DataFrame(
        [
            two_vars_correlation(
                gexp_gdsc.loc[s, genes],
                gexp_dfs[name].loc[s, genes],
                method="pearson",
                extra_fields=dict(sample=s, with_gexp=samples_mgexp.loc[s]),
            )
            for s in samples
        ]
    )
    gexp_corr['model'] = name
    gexp_corr_dfs.append(gexp_corr)
gexp_corr_dfs = pd.concat(gexp_corr_dfs)

In [14]:
_, ax = plt.subplots(1, 1, figsize=(1.5, 2), dpi=600)

sns.boxplot(
    data=gexp_corr_dfs[~gexp_corr_dfs["with_gexp"]],
    x="model",
    y="corr",
    hue="model",
    palette="tab10",
    linewidth=0.3,
    fliersize=1,
    notch=True,
    saturation=1.0,
    showcaps=False,
    boxprops=dict(linewidth=0.5, edgecolor="black"),
    whiskerprops=dict(linewidth=0.5, color="black"),
    flierprops=dict(
        marker="o",
        markerfacecolor="black",
        markersize=1.0,
        linestyle="none",
        markeredgecolor="none",
        alpha=0.6,
    ),
    medianprops=dict(linestyle="-", linewidth=0.5),
    ax=ax,
)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
ax.set(
    title=f"",
    ylabel="Correlation between reconstructed\nand GDSC transcriptomics (Pearson's r)",
    xlabel=f"Sample without transcriptomics\nduring training",
)

PhenPred.save_figure(
    f"{plot_folder}/{hyperparameters['load_run']}_reconstructed_gexp_correlation_boxplot_2vs7omics"
)

In [13]:
gexp_corr_out_sample_dfs = gexp_corr_dfs[~gexp_corr_dfs["with_gexp"]]

In [50]:
shapiro(
    gexp_corr_out_sample_dfs.query("model == 'MOSA_7omics'")["corr"],
)

ShapiroResult(statistic=0.9753528740783312, pvalue=0.00012429713994164323)

In [53]:
print(ttest_ind(
    gexp_corr_out_sample_dfs.query("model == 'MOSA_7omics'")["corr"],
    gexp_corr_out_sample_dfs.query("model == 'MOSA_2omics'")["corr"],
))
print(wilcoxon(
    gexp_corr_out_sample_dfs.query("model == 'MOSA_7omics'")["corr"],
    gexp_corr_out_sample_dfs.query("model == 'MOSA_2omics'")["corr"],
))

TtestResult(statistic=21.43230547868578, pvalue=3.3627101251530766e-74, df=540.0)
WilcoxonResult(statistic=65.0, pvalue=6.947921853872344e-46)


In [54]:
print(
    ttest_ind(
        gexp_corr_out_sample_dfs.query("model == 'MOSA_7omics'")["corr"],
        gexp_corr_out_sample_dfs.query("model == 'MOFA'")["corr"],
    )
)
print(
    wilcoxon(
        gexp_corr_out_sample_dfs.query("model == 'MOSA_7omics'")["corr"],
        gexp_corr_out_sample_dfs.query("model == 'MOFA'")["corr"],
    )
)

TtestResult(statistic=18.24570154229317, pvalue=2.6836218299727367e-58, df=540.0)
WilcoxonResult(statistic=74.0, pvalue=7.675247614114657e-46)


In [55]:
print(
    ttest_ind(
        gexp_corr_out_sample_dfs.query("model == 'MOSA_2omics'")["corr"],
        gexp_corr_out_sample_dfs.query("model == 'MOFA'")["corr"],
    )
)
print(
    wilcoxon(
        gexp_corr_out_sample_dfs.query("model == 'MOSA_2omics'")["corr"],
        gexp_corr_out_sample_dfs.query("model == 'MOFA'")["corr"],
    )
)

TtestResult(statistic=-2.282000726313907, pvalue=0.022877465018056837, df=540.0)
WilcoxonResult(statistic=7739.0, pvalue=1.2630246307937823e-16)


In [32]:
ttest_rel(
    gexp_corr_out_sample_dfs.query("model == 'MOSA_7omics'")["corr"],
    gexp_corr_out_sample_dfs.query("model == 'JAMIE'")["corr"],
)

TtestResult(statistic=29.191437824112302, pvalue=1.6706086209243048e-85, df=270)

# Drug

In [6]:
plot_df = pd.read_csv(
    "./reports/vae/drugresponse/20240830_110319_predicted_ctd2_corr.csv"
)

In [7]:
plot_out_df = plot_df[plot_df["MOSA_outofsample"] == "Out-of-sample"]
plot_in_df = plot_df[plot_df["MOSA_outofsample"] == "In-sample"]

In [8]:
print(ttest_ind(
    plot_out_df[plot_out_df["method"] == "MOSA_corr"]["corr"],
    plot_out_df[plot_out_df["method"] == "MOFA_corr"]["corr"],
))
print(wilcoxon(
    plot_out_df[plot_out_df["method"] == "MOSA_corr"]["corr"],
    plot_out_df[plot_out_df["method"] == "MOFA_corr"]["corr"],
))

TtestResult(statistic=3.44106965740751, pvalue=0.0006309014889326985, df=472.0)
WilcoxonResult(statistic=3480.0, pvalue=8.939902145451377e-24)


In [57]:
print(
    ttest_ind(
        plot_out_df[plot_out_df["method"] == "MOSA_corr"]["corr"],
        plot_out_df[plot_out_df["method"] == "MOSA_7omics_corr"]["corr"],
    )
)
print(
    wilcoxon(
        plot_out_df[plot_out_df["method"] == "MOSA_corr"]["corr"],
        plot_out_df[plot_out_df["method"] == "MOSA_7omics_corr"]["corr"],
    )
)

TtestResult(statistic=-0.10472369634495242, pvalue=0.916639544284535, df=472.0)
WilcoxonResult(statistic=13553.0, pvalue=0.6036728652783327)
