In [31]:
import pandas as pd

df = pd.read_csv('Clinical data/clinical_data.csv', index_col=0)

# Demographics
demographics = df[["age", "sex", "race", "participant_country", "bmi"]]

# Tumor characteristics
tumor = df[["tumor_site", "tumor_size_cm", "tumor_necrosis",
            "lymph_vascular_invasion", "perineural_invasion",
            "tumor_stage_pathological"]]

# Lifestyle
lifestyle = df[["alcohol_consumption", "tobacco_smoking_history"]]

# Outcomes
outcomes = df[["follow_up_days", "vital_status", "cause_of_death"]]

In [32]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler

def preprocess(df_subset):
    # Separate categorical and numeric
    cat_cols = df_subset.select_dtypes(include=["object"]).columns
    num_cols = df_subset.select_dtypes(exclude=["object"]).columns
    
    # One-hot encode categorical
    enc = OneHotEncoder(sparse_output=False, handle_unknown="ignore")
    cat_encoded = enc.fit_transform(df_subset[cat_cols]) if len(cat_cols) > 0 else None
    
    # Scale numeric
    scaler = StandardScaler()
    num_scaled = scaler.fit_transform(df_subset[num_cols]) if len(num_cols) > 0 else None
    
    # Combine
    import numpy as np
    if cat_encoded is not None and num_scaled is not None:
        return np.hstack([num_scaled, cat_encoded])
    elif cat_encoded is not None:
        return cat_encoded
    else:
        return num_scaled

# Apply preprocessing
views = {
    "demographics": preprocess(demographics),
    "tumor": preprocess(tumor),
    "lifestyle": preprocess(lifestyle),
    "outcomes": preprocess(outcomes)
}

In [33]:

from mofapy2.run.entry_point import entry_point
import pandas as pd
import numpy as np

# initialise the entry point
ent = entry_point()

D = [1000,1000] # Number of features per view
M = len(D)      # Number of views
K = 5           # Number of factors
N = [100,100]   # Number of samples per group
G = len(N)      # Number of groups

data_dt = pd.read_csv("http://ftp.ebi.ac.uk/pub/databases/mofa/getting_started/data.txt.gz", sep="\t")

ent.set_data_df(data_dt, likelihoods = ["gaussian","gaussian"])

ent.set_model_options(
    factors = 10, 
    spikeslab_weights = True, 
    ard_weights = True
)



        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        


Loaded group='group_0' view='view_0' with N=100 samples and D=1000 features...
Loaded group='group_0' view='view_1' with N=100 samples and D=1000 features...
Loaded group='group_1' view='view_0' with N=100 samples and D=1000 features...
Loaded group='group_1' view='view_1' with N=100 samples and D=1000 features...



Model options:
- Automatic Relevance Determination prior on 

In [34]:
ent.set_train_options(
    convergence_mode = "fast", 
    dropR2 = 0.001, 
    gpu_mode = True, 
    seed = 1
)

ent.build()

ent.run()

# Save the output
ent.save("tmp/mofa_model.hdf5")


GPU mode is activated, but GPU not found... switching to CPU mode
For GPU mode, you need:
1 - Make sure that you are running MOFA+ on a machine with an NVIDIA GPU
2 - Install CUPY following instructions on https://docs-cupy.chainer.org/en/stable/install.html



######################################
## Training the model with seed 1 ##
######################################


ELBO before training: -3181781.20 

Iteration 1: time=0.35, ELBO=-567016.11, deltaELBO=2614765.088 (82.17928657%), Factors=9
Iteration 2: time=0.36, ELBO=-478387.33, deltaELBO=88628.777 (2.78550822%), Factors=8
Iteration 3: time=0.33, ELBO=-470541.39, deltaELBO=7845.941 (0.24658959%), Factors=7
Iteration 4: time=0.28, ELBO=-466294.22, deltaELBO=4247.174 (0.13348416%), Factors=6
Iteration 5: time=0.31, ELBO=-463375.32, deltaELBO=2918.899 (0.09173791%), Factors=5
Iteration 6: time=0.25, ELBO=-463041.02, deltaELBO=334.296 (0.01050656%), Factors=5
Iteration 7: time=0.18, ELBO=-462888.35, deltaELBO=152.672 (0.00479833