In [1]:
from mofapy2.run.entry_point import entry_point
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# data preparation

In [51]:
protein_sample = pd.read_csv("../data/protein/E0022_P06_Protein_Matrix_ProNorM_no_control_update.txt", sep='\t')

In [52]:
drug_score = pd.read_csv("../data/drug/final_drug_scores.tsv", sep="\t")
drug_list = drug_score[
    (drug_score['MultiDrug_correlation'] > 0.4) & (drug_score['sensitive_count'] > 20)][
    'drug_id'].values

In [53]:
protein_mofa = protein_sample.melt(id_vars='Cell_line',
                                   value_name='value',
                                   var_name='feature')
protein_mofa['view'] = 'protein'
protein_mofa['group'] = 'group_0'
protein_mofa = protein_mofa.rename(columns={'Cell_line': 'sample'})
protein_mofa = protein_mofa[['sample', 'group', 'feature', 'view', 'value']]

In [54]:
ic50 = pd.read_csv("../data/drug/drug_final_processed_growth_corrected.csv.gz")

In [55]:
ic50_mofa = ic50[ic50['cell_line_name'].isin(protein_mofa['sample'].unique())]
ic50_mofa = ic50_mofa.rename(columns={'cell_line_name':'sample', 'drug_id':'feature', 'ln_IC50':'value'})
ic50_mofa['view'] = 'drug'
ic50_mofa['group'] = 'group_0'
ic50_mofa = ic50_mofa[['sample', 'group', 'feature', 'view', 'value']]

In [56]:
ic50_mofa = ic50_mofa[ic50_mofa['feature'].isin(drug_list)]

In [57]:
combined_mofa = pd.concat([protein_mofa, ic50_mofa])

In [58]:
combined_mofa.shape

(3710307, 5)

# train model

In [59]:
ent = entry_point()
ent.set_data_options(
    scale_groups = False, 
    scale_views = True
)


        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Scaling views to unit variance...



In [60]:
ent.set_data_df(combined_mofa, likelihoods = ["gaussian","gaussian"])

Loaded group='group_0' view='drug' with N=976 samples and D=435 features...
Loaded group='group_0' view='protein' with N=979 samples and D=3425 features...




In [61]:
ent.set_model_options(
    factors = 10, 
    spikeslab_weights = True, 
    ard_factors = True,
    ard_weights = True
)

Model options:
- Automatic Relevance Determination prior on the factors: True
- Automatic Relevance Determination prior on the weights: True
- Spike-and-slab prior on the factors: False
- Spike-and-slab prior on the weights: True 

Likelihoods:
- View 0 (drug): gaussian
- View 1 (protein): gaussian


In [62]:
ent.set_train_options(
    iter = 1000, 
    convergence_mode = "fast", 
    startELBO = 1, 
    freqELBO = 1, 
    dropR2 = 0.001, 
    gpu_mode = False, 
    verbose = False, 
    seed = 1
)

In [63]:
ent.build()

ent.run()



######################################
## Training the model with seed 1 ##
######################################


ELBO before training: -37899833.43 

Iteration 1: time=0.69, ELBO=-3440647.19, deltaELBO=34459186.237 (90.92173533%), Factors=9
Iteration 2: time=0.69, ELBO=-3086703.79, deltaELBO=353943.398 (0.93389170%), Factors=9
Iteration 3: time=0.70, ELBO=-3056062.70, deltaELBO=30641.089 (0.08084756%), Factors=9
Iteration 4: time=0.70, ELBO=-3039597.87, deltaELBO=16464.830 (0.04344301%), Factors=9
Iteration 5: time=0.69, ELBO=-3023075.17, deltaELBO=16522.703 (0.04359571%), Factors=9
Iteration 6: time=0.69, ELBO=-3007975.76, deltaELBO=15099.410 (0.03984031%), Factors=9
Iteration 7: time=0.70, ELBO=-2996976.28, deltaELBO=10999.478 (0.02902250%), Factors=9
Iteration 8: time=0.68, ELBO=-2990026.67, deltaELBO=6949.615 (0.01833680%), Factors=9
Iteration 9: time=0.63, ELBO=-2985743.73, deltaELBO=4282.939 (0.01130068%), Factors=9
Iteration 10: time=0.64, ELBO=-2983026.52, deltaELBO=2717.

In [64]:
ent.save("../data/MOFA/protein_selected_drug_growth_corrected.hdf5")

Saving model in ../data/MOFA/protein_selected_drug_growth_corrected.hdf5...



In [65]:
ent.model.getExpectations()['W'][1]

{'E': array([[-3.06389580e-01,  5.25482266e-02,  2.33203503e-01, ...,
          1.09986094e-01, -2.71649546e-02, -4.27469905e-02],
        [-3.53686433e-01, -8.70295027e-03,  1.47282077e-01, ...,
          6.45156060e-02,  5.66297400e-04, -8.12869509e-02],
        [-2.90927964e-01, -2.59971912e-04,  9.25504069e-02, ...,
          6.68480944e-02, -2.92659145e-02, -1.13559825e-01],
        ...,
        [-1.89652516e-01,  5.21884208e-06,  1.39756152e-03, ...,
         -2.63526404e-02,  6.44759674e-02, -8.63174987e-02],
        [-1.14409190e-01,  7.23494697e-02,  3.49479564e-02, ...,
         -8.10892368e-03,  1.97184220e-02, -1.05258269e-02],
        [-3.13979705e-01,  2.35639294e-01,  6.30433274e-02, ...,
          5.19892555e-02,  5.88159787e-04,  9.29303260e-02]]),
 'EB': array([[1.        , 1.        , 1.        , ..., 1.        , 0.9891599 ,
         0.9999996 ],
        [1.        , 0.60564061, 1.        , ..., 0.99999997, 0.15281801,
         1.        ],
        [1.        , 0.062

In [64]:
ent.model.getExpectations()['W'][0]

{'E': array([[-5.89142176e-02, -1.62332995e-03, -4.44567325e-03, ...,
          5.10156557e-05,  1.63016731e-02,  4.46569126e-02],
        [-4.25091455e-03,  4.25256788e-02, -2.05447048e-02, ...,
          5.20456156e-04, -1.87950818e-02,  2.53352736e-02],
        [-6.58992481e-02,  3.42068408e-03, -4.73004881e-02, ...,
         -2.96164425e-03, -1.65514579e-02,  9.40613850e-02],
        ...,
        [ 4.42667765e-02,  9.83416306e-02,  1.06673249e-03, ...,
          5.84138008e-03,  9.83028069e-04, -3.35706973e-02],
        [ 9.85783333e-02,  1.53912883e-01,  5.90176224e-03, ...,
          1.34594656e-02, -4.00593602e-03, -5.45793633e-02],
        [ 8.65896041e-02,  8.73822137e-02,  4.37233490e-03, ...,
          2.72492946e-03, -1.19277253e-02, -6.77066748e-02]]),
 'EB': array([[0.99951741, 0.14511979, 0.33287473, ..., 0.30840132, 0.60621148,
         0.95182174],
        [0.25866503, 0.98750645, 0.68260118, ..., 0.35179966, 0.61487055,
         0.66459437],
        [0.97474816, 0.200