# MOFA+

In this notebook we "train" our MOFA model, in order to find the Latent Factors.

In [2]:
from mofapy2.run.entry_point import entry_point
import pandas as pd
import numpy as np

In [18]:
ent = entry_point()


        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        


## Data Loading

In [58]:
GO_BP = pd.read_csv('data/gene_go_matrix_propT_rel-is_a-part_of_ont-BP.csv', index_col=0)
GO_CC = pd.read_csv('data/gene_go_matrix_propT_rel-is_a-part_of_ont-CC.csv', index_col=0)
GO_MF = pd.read_csv('data/gene_go_matrix_propT_rel-is_a-part_of_ont-MF.csv', index_col=0)
HPO = pd.read_csv('data/gene_hpo_matrix_binary_withAncestors_namespace_Phenotypic abnormality.csv', index_col=0)

In [59]:
GO_BP = GO_BP.values.astype(float)
GO_CC = GO_CC.values.astype(float)
GO_MF = GO_MF.values.astype(float)
HPO = HPO.values.astype(float)

In [60]:
print(GO_BP.shape)
print(GO_CC.shape)
print(GO_MF.shape)
print(HPO.shape)

(5183, 9873)
(5183, 1478)
(5183, 3258)
(5183, 10185)


In [61]:
D = [GO_BP.shape[1],GO_CC.shape[1], GO_MF.shape[1], HPO.shape[1]] # Number of features per view
M = len(D)      # Number of views
K = 15           # Number of factors
N = [5183] * 4   # Number of samples per group
G = len(N)      # Number of groups

In [64]:
data = [[GO_BP], [GO_CC], [GO_MF], [HPO]]

In [67]:
print(len(data))          # 4 views  (groups)
for i, view in enumerate(data):
    print(f"View {i} shape: {view.shape}")

4
View 0 shape: (5183, 9873)
View 1 shape: (5183, 1478)
View 2 shape: (5183, 3258)
View 3 shape: (5183, 10185)


## Model Training

In [66]:
ent.set_data_options(scale_groups=False, scale_views=False)
ent.set_data_matrix(
    data=data,
    likelihoods=['bernoulli', 'bernoulli', 'bernoulli', 'bernoulli']
)
ent.set_model_options(factors=15)

View names not provided, using default naming convention:
- view1, view2, ..., viewM

Features names not provided, using default naming convention:
- feature1_view1, featureD_viewM

Groups names not provided, using default naming convention:
- group1, group2, ..., groupG

Samples names not provided, using default naming convention:
- sample1_group1, sample2_group1, sample1_group2, ..., sampleN_groupG

Successfully loaded view='view0' group='group0' with N=5183 samples and D=9873 features...
Successfully loaded view='view1' group='group0' with N=5183 samples and D=1478 features...
Successfully loaded view='view2' group='group0' with N=5183 samples and D=3258 features...
Successfully loaded view='view3' group='group0' with N=5183 samples and D=10185 features...


Model options:
- Automatic Relevance Determination prior on the factors: False
- Automatic Relevance Determination prior on the weights: True
- Spike-and-slab prior on the factors: False
- Spike-and-slab prior on the weights: Tr

In [68]:
ent.set_train_options(iter=1000, convergence_mode='fast', gpu_mode=False)

In [69]:
ent.build()
ent.run()



######################################
## Training the model with seed 819427 ##
######################################


ELBO before training: -267643536.64 

Iteration 1: time=68.05, ELBO=-138478162.28, deltaELBO=129165374.358 (48.26022551%), Factors=15
Iteration 2: time=72.07, ELBO=-88917844.18, deltaELBO=49560318.107 (18.51728561%), Factors=15
Iteration 3: time=66.83, ELBO=-88789043.54, deltaELBO=128800.639 (0.04812395%), Factors=15


#######################
## Training finished ##
#######################




## Saving the Model

In [71]:
ent.save(outfile='data/mofa.hdf5', save_data=True)

Saving model in data/mofa...
Note: the model to be saved is not trained.
