In [None]:
%matplotlib inline
%autosave 0
%load_ext autoreload
%autoreload 2

In [None]:
import logging
import numpy as np
import pandas as pd
import pkg_resources
import seaborn as sns
import numpy.ma as ma
import itertools as it
import matplotlib.pyplot as plt
from natsort import natsorted
from crispy.GIPlot import GIPlot
from adjustText import adjust_text
from crispy.MOFA import MOFA, MOFAPlot
from sklearn.metrics.ranking import auc
from crispy.Enrichment import Enrichment
from crispy.CrispyPlot import CrispyPlot
from scipy.stats import pearsonr, spearmanr
from sklearn.mixture import GaussianMixture
from statsmodels.stats.multitest import multipletests
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from cancer_proteomics.notebooks import DataImport, two_vars_correlation
from crispy.DataImporter import (
    CORUM,
    BioGRID,
    PPI,
    HuRI,
)

In [None]:
LOG = logging.getLogger("cancer_proteomics")
DPATH = pkg_resources.resource_filename("data", "/")
PPIPATH = pkg_resources.resource_filename("data", "ppi/")
TPATH = pkg_resources.resource_filename("tables", "/")
RPATH = pkg_resources.resource_filename("cancer_proteomics", "plots/")

### Imports

In [None]:
# Read samplesheet
ss = DataImport.read_samplesheet()

In [None]:
# Read proteomics (Proteins x Cell lines)
prot = DataImport.read_protein_matrix(map_protein=True)

In [None]:
# Read Transcriptomics
gexp = DataImport.read_gene_matrix()

In [None]:
# Read CRISPR
crispr = DataImport.read_crispr_matrix()

In [None]:
# Read Methylation
methy = DataImport.read_methylation_matrix()

In [None]:
# Read Drug-response
drespo = DataImport.read_drug_response()
drespo = drespo.set_index(pd.Series([";".join(map(str, i)) for i in drespo.index]))

In [None]:
dmaxc = DataImport.read_drug_max_concentration()
dmaxc.index = [";".join(map(str, i)) for i in dmaxc.index]
dmaxc = dmaxc.reindex(drespo.index)

### Covariates

In [None]:
covariates = pd.concat(
    [
        ss["CopyNumberAttenuation"],
        ss["GeneExpressionAttenuation"],
        ss["EMT"],
        ss["Proteasome"],
        ss["TranslationInitiation"],
        ss["CopyNumberInstability"],
        prot.loc[["CDH1", "VIM"]].T.add_suffix("_prot"),
        gexp.loc[["CDH1", "VIM"]].T.add_suffix("_gexp"),
        pd.get_dummies(ss["media"]),
        pd.get_dummies(ss["growth_properties"]),
        pd.get_dummies(ss["tissue"])[["Haematopoietic and Lymphoid", "Lung"]],
        ss[["ploidy", "mutational_burden", "growth", "size"]],
        ss["replicates_correlation"].rename("RepsCorrelation"),
    ],
    axis=1,
)

In [None]:
# ### MOFA
#
groupby = ss.loc[prot.columns, "tissue"].apply(lambda v: "Haem" if v == "Haematopoietic and Lymphoid" else "Other")

In [None]:
mofa = MOFA(
    views=dict(
        proteomics=prot,
        transcriptomics=gexp,
        methylation=methy,
        drespo=drespo,
    ),
    groupby=groupby,
    iterations=2000,
    use_overlap=False,
    convergence_mode="fast",
    factors_n=20,
    from_file=f"{TPATH}/MultiOmics.hdf5",
    verbose=2,
)

In [None]:
# ### Factors integrated with other measurements
#
n_factors_corr = {}
for f in mofa.factors:
    n_factors_corr[f] = {}

    for c in covariates:
        fc_samples = list(covariates.reindex(mofa.factors[f].index)[c].dropna().index)
        n_factors_corr[f][c] = pearsonr(mofa.factors[f][fc_samples], covariates[c][fc_samples])[0]
n_factors_corr = pd.DataFrame(n_factors_corr)

In [None]:
# Factor clustermap
MOFAPlot.factors_corr_clustermap(mofa)
plt.savefig(
    f"{RPATH}/MultiOmics_factors_corr_clustermap.pdf", bbox_inches="tight",
)
plt.savefig(
    f"{RPATH}/MultiOmics_factors_corr_clustermap.png", bbox_inches="tight", dpi=600,
)
plt.close("all")

In [None]:
# Variance explained across data-sets
MOFAPlot.variance_explained_heatmap(mofa)
plt.savefig(
    f"{RPATH}/MultiOmics_factors_corr_clustermap.pdf", bbox_inches="tight",
)
plt.savefig(
    f"{RPATH}/MultiOmics_factors_corr_clustermap.png", bbox_inches="tight", dpi=600,
)
plt.close("all")

In [None]:
# Covairates correlation heatmap
MOFAPlot.covariates_heatmap(n_factors_corr, mofa, ss["tissue"])
plt.savefig(
    f"{RPATH}/MultiOmics_factors_covariates_clustermap.pdf",
    bbox_inches="tight",
)
plt.savefig(
    f"{RPATH}/MultiOmics_factors_covariates_clustermap.png",
    bbox_inches="tight",
    dpi=600,
)
plt.close("all")