# MOFA+

In this notebook we "train" our MOFA model, in order to find the Latent Factors.

In [67]:
from mofapy2.run.entry_point import entry_point
import pandas as pd
import numpy as np

In [68]:
# Only run if you have CUDA
import cupy as cp
print(cp.cuda.runtime.getDeviceCount())
print(cp.__version__)

1
13.6.0


In [95]:
ent = entry_point()


        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        


## Data Loading

In [96]:
GO_BP = pd.read_csv('data/gene_go_matrix_propT_rel-is_a-part_of_ont-BP.csv', index_col=0)
GO_CC = pd.read_csv('data/gene_go_matrix_propT_rel-is_a-part_of_ont-CC.csv', index_col=0)
GO_MF = pd.read_csv('data/gene_go_matrix_propT_rel-is_a-part_of_ont-MF.csv', index_col=0)
HPO   = pd.read_csv('data/gene_hpo_matrix_binary_withAncestors_namespace_Phenotypic abnormality.csv', index_col=0)

In [97]:
print(GO_BP.shape)
print(GO_CC.shape)
print(GO_MF.shape)
print(HPO.shape)

(5183, 9873)
(5183, 1478)
(5183, 3258)
(5183, 10185)


In [98]:
data = [[GO_BP], [GO_CC], [GO_MF], [HPO]]

In [99]:
data

[[           GO.0006805  GO.0006400  GO.0006418  GO.0006419  GO.0008033  \
  10                  1           0           0           0           0   
  16                  0           1           1           1           1   
  18                  0           0           0           0           0   
  19                  0           0           0           0           0   
  20                  0           0           0           0           0   
  ...               ...         ...         ...         ...         ...   
  105371045           0           0           0           0           0   
  105804841           0           0           0           0           0   
  109580095           0           0           0           0           0   
  111365204           0           0           0           0           0   
  120766137           0           0           0           0           0   
  
             GO.0021680  GO.0043524  GO.0050885  GO.0051402  GO.0106074  ...  \
  10             

## Model Training

In [100]:
sample_names = [
    [str(x) for x in GO_BP.index.tolist()],  # group 1 (BP)
]

feature_names = [
    GO_BP.columns.tolist(),  # group 1 has 1 view
    GO_CC.columns.tolist(),  # group 2 has 1 view
    GO_MF.columns.tolist(),  # group 3 has 1 view
    HPO.columns.tolist()     # group 4 has 1 view
]

In [101]:
ent.set_data_options(scale_groups=False, scale_views=False)

# ent.sample_names = sample_names
# ent.feature_names = feature_names
ent.set_data_matrix(
    data=data,
    likelihoods=['bernoulli', 'bernoulli', 'bernoulli', 'bernoulli'],
    groups_names=['GO_HPO-View'],  # Add view names
    views_names=['GO_BP', 'GO_CC', 'GO_MF', 'HPO'],  # Add group names
    features_names=feature_names,
    samples_names=sample_names,
)
ent.set_model_options(factors=15)

Successfully loaded view='GO_BP' group='GO_HPO-View' with N=5183 samples and D=9873 features...
Successfully loaded view='GO_CC' group='GO_HPO-View' with N=5183 samples and D=1478 features...
Successfully loaded view='GO_MF' group='GO_HPO-View' with N=5183 samples and D=3258 features...
Successfully loaded view='HPO' group='GO_HPO-View' with N=5183 samples and D=10185 features...


Model options:
- Automatic Relevance Determination prior on the factors: False
- Automatic Relevance Determination prior on the weights: True
- Spike-and-slab prior on the factors: False
- Spike-and-slab prior on the weights: True
Likelihoods:
- View 0 (GO_BP): bernoulli
- View 1 (GO_CC): bernoulli
- View 2 (GO_MF): bernoulli
- View 3 (HPO): bernoulli




In [102]:
ent.set_train_options(iter=2, convergence_mode='slow', gpu_mode=True)


GPU mode is activated



In [103]:
ent.build()
ent.run()



######################################
## Training the model with seed 400217 ##
######################################


ELBO before training: -267687041.80 

Iteration 1: time=12.10, ELBO=-138496376.66, deltaELBO=129190665.142 (48.26183004%), Factors=15


#######################
## Training finished ##
#######################




## Saving the Model

In [106]:
ent.save(outfile='model/mofa.hdf5', save_data=True)

Saving model in model/mofa.hdf5...
