# Testing document for pyGSLModel

### Testing model download and pruning functions

Standard HUMAN-GEM model downlaod test

In [None]:
# Downloading standard Human-GEM model
from pyGSLModel import download_model

model_1 = download_model()

print(f"Number of Reactions in model : {len(model_1.reactions)}")
print(f"Number of Metabolites in model : {len(model_1.metabolites)}")
print(f"Number of Genes in model : {len(model_1.genes)}")

Converting gene symbols, model pruning, and removing transport reactions test

In [None]:
# Converting gene names, pruning the model and removing GSl transport reactions
from pyGSLModel import convert_genes, prune_model, remove_GSL_transport

model_2 = convert_genes(model_1)
model_2 = prune_model(model_2)
model_2 = remove_GSL_transport(model_2)

print(f"Number of Reactions in model : {len(model_1.reactions)}")
print(f"Number of Metabolites in model : {len(model_1.metabolites)}")
print(f"Number of Genes in model : {len(model_1.genes)}")

print(f"Checking gene symbol conversion :")
model_1.genes.get_by_id("B4GALNT1")

Testing download of pre-pruned model

In [1]:
# Testing pre-pruned model download
from pyGSLModel import download_GSL_model

model_3 = download_GSL_model()

print(f"Number of Reactions in model : {len(model_3.reactions)}")
print(f"Number of Metabolites in model : {len(model_3.metabolites)}")
print(f"Number of Genes in model : {len(model_3.genes)}")

print(f"Checking gene symbol conversion :")
model_3.genes.get_by_id("UGT8")

Downloading  and Reading in Model
Model succesfully downloaded and read in.
Number of Reactions in model : 2307
Number of Metabolites in model : 2009
Number of Genes in model : 2887
Checking gene symbol conversion :


0,1
Gene identifier,UGT8
Name,G_UGT8
Memory address,0x14eb94cc250
Functional,True
In 1 reaction(s),MAR00919


### Testing performing simulations and analysing results

Performing a generic FBA simulation

In [None]:
from pyGSLModel import run_metabolic_model, tabulate_model_results, plot_model_results

# Running Basic simulation
sol_1 = run_metabolic_model(model_3, method="FBA",objective_choice="D14_Neuron")

# Preparing dataframe
results_1 = tabulate_model_results(model_3,sol_1)
print(results_1.head())

# Generating plot
fig_1 = plot_model_results(results_1)
fig_1

Performing a knockout simulation

In [None]:
from pyGSLModel import run_metabolic_model, tabulate_model_results, plot_model_results

# Running Basic simulation
sol_2 = run_metabolic_model(model_3, method="FBA",objective_choice="AC",knockout="B4GALNT1")

# Preparing dataframe
results_2 = tabulate_model_results(model_3,sol_2)
print(results_2.head())

# Generating plot
fig_2 = plot_model_results(results_2)
fig_2

Performing mFBA simulation

In [None]:
from pyGSLModel import run_metabolic_model, tabulate_model_results, plot_model_results

# Running Basic simulation
sol_3 = run_metabolic_model(model_3, method="mFBA",objective_choice="D14_Neuron")

# Preparing dataframe
results_3 = tabulate_model_results(model_3,sol_3)
print(results_3.head())

# Generating plot
fig_3 = plot_model_results(results_3)
fig_3

Testing network visualisation. This saves a .html file

In [None]:
from pyGSLModel import visualise_flux_network

visualise_flux_network(model_3,sol_3,file_path="./flux_network_test.html",height="1080px",width="100%")


### Transcriptomic integration testing

Testing standard iMAT integration

In [4]:
import pandas as pd
# Making a dummy dataframe
d = {
    "Gene" : ["B4GALNT1", "ST3GAL5", "ST8SIA1","A4GALT"],
    "Sample_1" : [8,6,4,2]
}

# Converting the dictionary to a pandas dataframe and setting the index to Gene
iMAT_df_1 = pd.DataFrame(d)
iMAT_df_1 = iMAT_df_1.set_index("Gene").copy()

from pyGSLModel import iMAT_integrate, tabulate_model_results
sol_4 = iMAT_integrate(model_3,iMAT_df_1)

results_4 = tabulate_model_results(model_3, sol_4)
results_4.head()

Unnamed: 0,Reaction ID,Reactants,Products,Key Product,Genes,Flux (mmol/gDW/hr),Relative GSL Flux (%)
0,MAR08147,"MAM01972g (glucosylceramide pool), MAM03107g (...","MAM02039g (H+), MAM02328g (LacCer pool), MAM03...",LacCer pool,B3GNT3,4.0,20.0
14,MAR08184,"MAM01592g (CMP-N-acetylneuraminate), MAM02328g...","MAM01590g (CMP), MAM02015g (GM3), MAM02039g (H+)",GM3,ST3GAL5,3.0,15.0
10,MAR08179,"MAM01592g (CMP-N-acetylneuraminate), MAM02015g...","MAM01590g (CMP), MAM01947g (GD3), MAM02039g (H+)",GD3,ST8SIA1,2.0,10.0
4,MAR08165,"MAM02328g (LacCer pool), MAM03110g (UDP-N-acet...","MAM01905g (GA2), MAM02039g (H+), MAM03106g (UDP)",GA2,B4GALNT1,1.0,5.0
15,MAR08185,"MAM02011g (GM2), MAM03107g (UDP-galactose)","MAM02008g (GM1), MAM02039g (H+), MAM03106g (UDP)",GM1,B3GALT4,1.0,5.0


Testing multi sample iMat integration with a dummy dataframe

In [5]:
import pandas as pd
# Making a dummy dataframe
d = {
    "Gene" : ["B4GALNT1", "ST3GAL5", "ST8SIA1","A4GALT"],
    "Sample_1" : [8,6,4,2],
    "Sample_2" : [2,4,6,8],
    "Sample_3" : [6,2,8,4],
    "Sample_4" : [4,8,2,6]
}

# Converting the dictionary to a pandas dataframe and setting the index to Gene
iMAT_df_1 = pd.DataFrame(d)
iMAT_df_1 = iMAT_df_1.set_index("Gene").copy()

# Performing the iMAT integration for the custom data
from pyGSLModel import iMAT_multi_integrate
iMAT_res_1 = iMAT_multi_integrate(model_3, iMAT_df_1)

iMAT_res_1

Simulations Performed:1/4
Simulations Performed:2/4
Simulations Performed:3/4
Simulations Performed:4/4


Key Product,LacCer pool,GM3,GD3,GA2,GM1,GD1a,GM2,GD2,GT1c,GQ1c,...,GT1a,globotriaosylceramide,GQ1b,GD1c,GD1alpha,GM1b,GA1,galactosylgloboside,globoside,sialyl-3-paragloboside
Sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Sample_1,4.0,3.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Sample_2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
Sample_3,63.768555,62.768555,1.0,0.0,61.768555,61.768555,61.768555,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
Sample_4,1.0,1.0,0.01,0.0,0.99,0.99,0.99,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Testing average TCGA integration with iMAT

In [None]:
from pyGSLModel import TCGA_iMAT_integrate

# Lets adjust upper and lower quantiles here to select the top and bottom 15% of genes instead. We're going to leave epsilon and threshold alone
iMAT_TCGA_1 = TCGA_iMAT_integrate(model_3, upper_quantile=0.15,lower_quantile=0.85)

iMAT_TCGA_1.head()

Testing sample wise iMAT integration of TCGA data

In [None]:
from pyGSLModel import TCGA_iMAT_sample_integrate

# Selecting Bile duct for tissue argument as this has fewer samples (Brain has something like 2000 so takes a while). Leaving other arguments as default
iMAT_TCGA_sample = TCGA_iMAT_sample_integrate(model_3, tissue="Bile duct")

iMAT_TCGA_sample.head()