In [18]:
from mofapy2.run.entry_point import entry_point
import pandas as pd
import numpy as np
import argparse
from scipy.io import mmread, mmwrite
from pathlib import Path

# set number of cores
MKL_NUM_THREADS = 16

################################
## Initialise argument parser ##
################################

#p = argparse.ArgumentParser( description='' )
## p.add_argument( '--input_folder',          type=str,              required=True,          help='Input data file (matrix format)' )
#p.add_argument( '--rna_matrix',          type=str,              required=True,          help='' )
#p.add_argument( '--atac_matrix',          type=str,              required=True,          help='' )
#p.add_argument( '--rna_features',          type=str,              required=True,          help='' )
#p.add_argument( '--atac_features',          type=str,              required=True,          help='' )
#p.add_argument( '--cells',          type=str,              required=True,          help='' )
#p.add_argument( '--outfile',               type=str,              required=True,          help='Output file to store the model (.hdf5)' )
#p.add_argument( '--factors',               type=int,              default=25,             help='Number of factors' )
#p.add_argument( '--seed',                  type=int,              default=42,             help='Random seed' )
#p.add_argument( '--convergence_mode',      type=str,              default="fast",       help='Convergence mode')
#p.add_argument( '--test',               action="store_true",                           help='Do stochastic inference?' )
#args = p.parse_args()

## START TEST ##
# /bi/group/reik/ricard/data/gastrulation_multiome_10x
# /Users/argelagr/data/gastrulation_multiome_10x
main = '/rds/project/rds-SDzz0CATGms/users/bt392/09_Eomes_invitro_blood/'
args = {}
args["input_folder"] = main + "results/rna_atac/mofa/"
args["rna_matrix"] = args["input_folder"] + "rna.mtx"
args["atac_matrix"] = args["input_folder"] + "atac_tfidf.mtx"
args["rna_features"] = args["input_folder"] + "rna_features.txt"
args["atac_features"] = args["input_folder"] + "atac_features.txt"
args["cells"] = args["input_folder"] + "cells.txt"
args["outfile"] = args["input_folder"] + "MOFA_model.hdf5"
args["factors"] = 25
args["seed"] = 42
args["convergence_mode"] = "fast"
args["test"] = False
## END TEST ##

# convert args to dictionary
#args = vars(args) # not sure if this is needed

###############
## Load data ##
###############

rna_mtx = mmread(args["rna_matrix"]).todense().T
atac_mtx = mmread(args["atac_matrix"]).todense().T

rna_features = pd.read_csv(args["rna_features"], header=None)[0].tolist()
atac_features = pd.read_csv(args["atac_features"], header=None)[0].tolist()
cells = pd.read_csv(args["cells"], header=None)[0].tolist()

# sample_metadata = pd.read_csv(input_folder/"sample_metadata.txt.gz")
# assert sample_metadata.cell.tolist() == cells

########################
## Create MOFA object ##
########################

# initialise entry point    
ent = entry_point()

# Set data
ent.set_data_matrix(
	data = [[rna_mtx], [atac_mtx]], 
	views_names = ["RNA","ATAC"], 
	samples_names = [ cells ], 
	features_names = [ rna_features, atac_features ]
)

# Set data options
ent.set_data_options(use_float32 = True)

# Set model options
ent.set_model_options(factors=args["factors"], spikeslab_factors=False, spikeslab_weights=False)

# Set training options
if args["test"]:
	ent.set_train_options(iter=3)
else:
	ent.set_train_options(convergence_mode=args["convergence_mode"], seed=args["seed"])

###############################
## Build and train the model ##
###############################

# Build the model
ent.build()

# Train the model
ent.run()

####################
## Save the model ##
####################

ent.save(args["outfile"], save_data=False)


        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
use_float32 set to True: replacing float64 arrays by float32 arrays to speed up computations...

Groups names not provided, using default naming convention:
- group1, group2, ..., groupG

Successfully loaded view='RNA' group='group0' with N=33177 samples and D=4000 features...
Successfully loaded view='ATAC' group='group0' with N=33177 samples and D=25000 features...


use_float