# MOFA+

In this notebook we "train" our MOFA model, in order to find the Latent Factors.

In [1]:
from mofapy2.run.entry_point import entry_point
import pandas as pd
import numpy as np

In [2]:
# Only run if you have CUDA
import cupy as cp
print(cp.cuda.runtime.getDeviceCount())
print(cp.__version__)

1
13.6.0


In [3]:
ent = entry_point()


        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        


## Data Loading

In [4]:
GO_BP = pd.read_csv('data/gene_go_matrix_propT_rel-is_a-part_of_ont-BP.csv', index_col=0)
GO_CC = pd.read_csv('data/gene_go_matrix_propT_rel-is_a-part_of_ont-CC.csv', index_col=0)
GO_MF = pd.read_csv('data/gene_go_matrix_propT_rel-is_a-part_of_ont-MF.csv', index_col=0)
HPO   = pd.read_csv('data/gene_hpo_matrix_binary_withAncestors_namespace_Phenotypic abnormality.csv', index_col=0)

In [5]:
print(GO_BP.shape)
print(GO_CC.shape)
print(GO_MF.shape)
print(HPO.shape)

(5183, 9873)
(5183, 1478)
(5183, 3258)
(5183, 10185)


In [6]:
data = [[GO_BP], [GO_CC], [GO_MF], [HPO]]

## Model Training

In [8]:
sample_names = [
    [str(x) for x in GO_BP.index.tolist()],  # group 1 (BP)
]

feature_names = [
    GO_BP.columns.tolist(),  # group 1 has 1 view
    GO_CC.columns.tolist(),  # group 2 has 1 view
    GO_MF.columns.tolist(),  # group 3 has 1 view
    HPO.columns.tolist()     # group 4 has 1 view
]

In [9]:
ent.set_data_options(scale_groups=False, scale_views=False)

# ent.sample_names = sample_names
# ent.feature_names = feature_names
ent.set_data_matrix(
    data=data,
    likelihoods=['bernoulli', 'bernoulli', 'bernoulli', 'bernoulli'],
    groups_names=['GO_HPO-View'],  # Add view names
    views_names=['GO_BP', 'GO_CC', 'GO_MF', 'HPO'],  # Add group names
    features_names=feature_names,
    samples_names=sample_names,
)
ent.set_model_options(factors=15)

Successfully loaded view='GO_BP' group='GO_HPO-View' with N=5183 samples and D=9873 features...
Successfully loaded view='GO_CC' group='GO_HPO-View' with N=5183 samples and D=1478 features...
Successfully loaded view='GO_MF' group='GO_HPO-View' with N=5183 samples and D=3258 features...
Successfully loaded view='HPO' group='GO_HPO-View' with N=5183 samples and D=10185 features...


Model options:
- Automatic Relevance Determination prior on the factors: False
- Automatic Relevance Determination prior on the weights: True
- Spike-and-slab prior on the factors: False
- Spike-and-slab prior on the weights: True
Likelihoods:
- View 0 (GO_BP): bernoulli
- View 1 (GO_CC): bernoulli
- View 2 (GO_MF): bernoulli
- View 3 (HPO): bernoulli




In [10]:
ent.set_train_options(iter=50, convergence_mode='slow', gpu_mode=True)


GPU mode is activated



In [11]:
ent.build()
ent.run()



######################################
## Training the model with seed 756540 ##
######################################


ELBO before training: -267664160.67 

Iteration 1: time=10.51, ELBO=-138487038.58, deltaELBO=129177122.090 (48.26089595%), Factors=15
Iteration 2: time=10.38, ELBO=-88917765.31, deltaELBO=49569273.275 (18.51920450%), Factors=15
Iteration 3: time=10.26, ELBO=-88789034.94, deltaELBO=128730.369 (0.04809399%), Factors=15
Iteration 4: time=10.22, ELBO=-88768999.97, deltaELBO=20034.968 (0.00748511%), Factors=15
Iteration 5: time=10.22, ELBO=-88753355.75, deltaELBO=15644.220 (0.00584472%), Factors=15
Iteration 6: time=10.21, ELBO=-88734692.54, deltaELBO=18663.206 (0.00697262%), Factors=15
Iteration 7: time=10.28, ELBO=-88717019.57, deltaELBO=17672.978 (0.00660267%), Factors=15
Iteration 8: time=10.41, ELBO=-88701906.53, deltaELBO=15113.033 (0.00564627%), Factors=15
Iteration 9: time=10.23, ELBO=-88688758.27, deltaELBO=13148.258 (0.00491222%), Factors=15
Iteration 10: tim

## Saving the Model

In [12]:
ent.save(outfile='model/mofa.hdf5', save_data=True)

Saving model in model/mofa.hdf5...
