# Analysis Workflow #

#### Imports

In [None]:
import subprocess
import os
import glob
import pandas as pd
import numpy as np

from cobra import io
from jproperties import Properties
from mana import results_analysis

#### Java 11 or later must be installed on your machine to run this notebook


### Load properties file

In [None]:
props = Properties()
try:
    with open('props.properties', 'rb') as config_file:
        props.load(config_file)
except FileNotFoundError as e:
    print(e)
    print("\033[91m\033[1m "+"You must provide a props.properties file"+" \033[0m\033[91m")

### Load required datasets

In [None]:
### Load the metadata file
pheno = pd.read_csv(props.get("pheno").data,sep="\t",index_col=0)
#path to input_data
input_path = props.get("working_path").data+"input_data/"

### Create analysis directory and subdirectories

In [None]:
if "analysis" not in os.listdir(str(props.get("working_path").data)):
    os.mkdir(str(props.get("working_path").data)+"analysis")
for subdir in ["dist_matrices","reaction_clusters","clusters_annotation_tables","subnetwork_gmls","subnetwork_reactions"]:
    if subdir not in os.listdir(str(props.get("working_path").data)+"analysis/"):
        os.mkdir(str(props.get("working_path").data)+"analysis/"+subdir)

#### Compute the distance matrix for the differentially activated reactions

In [None]:
#Read a reaction list file
for file in os.listdir(props.get("working_path").data+"/DARS/"):
    cmd = "java -cp met4j-toolbox-1.2.2.jar"\
    + " fr.inrae.toulouse.metexplore.met4j_toolbox.networkAnalysis.ReactionDistanceMatrix"\
    + " -i "+input_path+"recon2v2_biomass_corrected.sbml"\
    + " -s "+input_path+"DARS/"+file\
    + " -o "+str(props.get("working_path").data)+"analysis/dist_matrices/"+'_'.join(file.split('_')[0:3])+'_extracellexclude.tsv'\
    + " -sc "+input_path+"recon22_scs_metex.txt"\
    + " -re "+input_path+"blocked_inactives_phh_exchange_exportextracell_reactions.txt" \
    + " -u &"
    print(cmd)
    #call ReactionDistanceMatrix Met4j app
    subprocess.run(cmd,capture_output=False,shell=True)
    

#### Plot the dendrogram and identify interesting clusters of reactions

In [None]:
### Read computed distances matrices
for mat in os.listdir(str(props.get("working_path").data)+"analysis/dist_matrices/"):
    prefix = mat.replace('.tsv','')+'_cluster_'
    title = mat.split('/')[-1].replace('.tsv','')+" subnetwork reaction similarity"
    matrix = pd.read_csv(str(props.get("working_path").data)+"analysis/dist_matrices/"+mat,sep=',',index_col='id')
    #remove disconnected reactions
    matrix = matrix[~matrix.isin([np.nan, np.inf, -np.inf])]
    #thresh is the number of NA in row/col in order to remove this row/col
    matrix.dropna(inplace=True,axis=0,thresh=matrix.shape[0]*0.7)
    matrix.dropna(inplace=True,axis=1,thresh=matrix.shape[0]*0.7)
    results_analysis.extract_reactions_from_clusters(matrix,title=title,write_files=True,file_prefix=str(props.get("working_path").data)+"analysis/reaction_clusters/"+mat.replace('.tsv','')+'_cluster',header=True)

In [None]:
#Generate an annotation table for each cluster
#load model
#dict compartments id to compartments names
# comp = {"c":"Cytoplasm","m":"Mitochondrion","x":"Peroxisome","l":"Lysosome","g":"Golgi appartus","e":"Extracellular space","r":"Endoplasic reticulum","n":"Nucleus","i":"Mitochondrial intermembrane space"}
recon_model = io.load_json_model(str(props.get("modelFile").data))
recon_model.id = 'recon2.2'
#load hgnc data
hgnc_data = pd.read_csv(str(props.get("mappingFile").data), sep='\t', dtype='unicode')
#reaction file ?
#glob.glob("working_files/computed_scores/"+str(reaction_file).split('_')[0]+'*_computed_scores.tsv')[0]
#fill the df
for file in os.listdir(str(props.get("working_path").data)+"analysis/reaction_clusters/"):
    results_analysis.generate_annotation_table(str(props.get("working_path").data)+"analysis/reaction_clusters/"+file,\
                                               recon_model,hgnc_data,\
                                                DARs_direction = glob.glob(str(props.get("working_path").data)+"DARS_direction/"+file.split("_")[0]+"*.tsv")[0],\
                                                    outputFile=str(props.get("working_path").data+"analysis/clusters_annotation_tables/"+file).replace('.tsv','')+'_table.xlsx')

#### Extract a subnetwork for each interesting clusters of reaction

In [None]:
#Ask the user for a condition to analyse
# file_prefix = str(input("Enter a condition to analyse(e.g ethanol_highvsctrls_24h))"))
#TODO implement a thread pool for the subprocesses. 
file_prefix=""
#Read a reaction list file
for file in os.listdir(str(props.get("working_path").data)+"analysis/reaction_clusters/"):
    if (file_prefix in file or file_prefix==""):
        cmd = "java -cp met4j-toolbox-1.2.2.jar"\
        + " fr.inrae.toulouse.metexplore.met4j_toolbox.networkAnalysis.ExtractSubReactionNetwork"\
        + " -i "+input_path+"/recon2v2_biomass_corrected.sbml"\
        + " -s "+str(props.get("working_path").data)+"analysis/reaction_clusters/"+file \
        + " -t "+str(props.get("working_path").data)+"analysis/reaction_clusters/"+file \
        + " -o "+str(props.get("working_path").data)+"analysis/subnetwork_gmls/"+file.replace(".tsv","")+'_undirected_r2_noisecond_extracell.gml'\
        + " -sc "+input_path+"/recon22_scs_metex.txt"\
        + " -re "+input_path+"/blocked_inactives_phh_exchange_exportextracell_reactions.txt"\
        + " -st"\
        + " -u &"
        print(cmd)
        #call ReactionDistanceMatrix Met4j app
        subprocess.run(cmd,capture_output=False,shell=True)

#### Get subnetworks reactions

In [None]:
#For a temporary visualisation, use the results_analysis.visualize_gml function
#For a complete visualization, use MetExplore Viz(https://metexplore.toulouse.inrae.fr/index.html/)
pattern='.'
for file in os.listdir(str(props.get("working_path").data)+"analysis/subnetwork_gmls/"):
    if pattern in file:
        rlist = results_analysis.get_node_list(str(props.get("working_path").data)+"analysis/subnetwork_gmls/"+file)
        with open(str(props.get("working_path").data)+"analysis/subnetwork_reactions/"+file.replace('.tab','').replace(".gml",'.txt'),'w') as w_hdler:
            for r in rlist:
                w_hdler.write(r+'\n')