In [None]:
%run cliffcommutil.py

def smipps(arguments):
    # unpack the arguments
    (mag_list, mags_to_models, model_ws, metabolites, feature_probabilities,problemlist, auxo_media, gmm_base_media, aerobicity
    ) = arguments
    
    # print status
    from multiprocess import current_process
    pid = current_process().name
    print(pid)
    
    # define the phenotype sets
    uptake_phenoset = util.create_phenotypeset_from_compounds(
        metabolites,
        base_media=auxo_media,
        base_uptake=0,
        base_excretion=1000,
        global_atom_limits={},
        type="uptake"
    )
    excretion_phenoset = util.create_phenotypeset_from_compounds(
        metabolites,
        base_media=auxo_media,
        base_uptake=0,
        base_excretion=1000,
        global_atom_limits={},
        type="excretion"
    )
    growth_phenoset = util.create_phenotypeset_from_compounds(
        metabolites,
        base_media=gmm_base_media,
        base_uptake=0,
        base_excretion=1000,
        global_atom_limits={},
        type="growth"
    )
    phenosets = {"uptake":uptake_phenoset,"excretion":excretion_phenoset,"growth":growth_phenoset}
    
    # compute the SMIPP for each MAG
    for i,mag in enumerate(mag_list):
        print(mag)
        name = mag[1]
        if name not in probability_finished and name not in problemlist:
            mdlutl = util.msrecon.get_model(name+model_suffix,model_ws)
            reaction_probabilities[name] = {}
            for rxn in mdlutl.model.reactions:
                highest_prob = None
                for gene in rxn.genes:
                    if gene.id in feature_probabilities[mag[1]]:
                        if highest_prob == None or feature_probabilities[mag[1]][gene.id] > highest_prob:
                            highest_prob = feature_probabilities[mag[1]][gene.id]
                if highest_prob != None:
                    rxn.probability = highest_prob
                    reaction_probabilities[name][rxn.id] = highest_prob

            print(mdlutl.model.genome_ref)
            genome = util.msrecon.get_msgenome_from_ontology(mdlutl.model.genome_ref,native_python_api=True,output_ws=None)
            reaction_hash = genome.annoont.get_reaction_gene_hash(feature_type="gene")
            for rxn in reaction_hash:
                highest_prob = None
                for gene in reaction_hash[rxn]:
                    if gene in feature_probabilities[mag[1]]:
                        if highest_prob == None or feature_probabilities[mag[1]][gene] > highest_prob:
                            highest_prob = feature_probabilities[mag[1]][gene]
                if highest_prob != None and (rxn+"_c0" not in reaction_probabilities[asvname] or highest_prob >= reaction_probabilities[asvname][rxn+"_c0"]):
                    if rxn+"_c0" in mdlutl.model.reactions:
                        mdlutl.model.reactions.get_by_id(rxn+"_c0").probability = highest_prob
                    reaction_probabilities[name][rxn+"_c0"] = highest_prob

            filters = mdlutl.get_attributes("gf_filter")
            tests = mdlutl.get_atp_tests(core_template=util.msrecon.core_template,atp_media_filename=util.msrecon.module_dir+"/data/atp_medias.tsv",recompute=False)
            msgapfill = MSGapfill(
                mdlutl,
                [util.msrecon.get_template(mdlutl.model.template_ref)],
                [],
                tests,
                blacklist=[],
                default_target="bio1",
                minimum_obj=0.01,
                base_media=None,
                base_media_target_element=None
            )

            #Adding missing transporter for metabolites to gapfilling database
            for cpd in metabolites:
                if "EX_"+cpd+"_e0" not in msgapfill.gfmodelutl.model.reactions:
                    transport = msgapfill.gfmodelutl.add_transport_and_exchange_for_metabolite(cpd,direction="=",prefix="trans",override=False)

            coefficients = {}
            gf_penalties = msgapfill.gfpkgmgr.getpkg("GapfillingPkg").gapfilling_penalties
            gfrxn = 0
            probrxn = 0
            otherrxn = 0
            for reaction in msgapfill.gfmodelutl.model.reactions:
                if reaction.id in reaction_probabilities[name]:
                    probrxn += 2
                    coefficients[">"+reaction.id] = 1-reaction_probabilities[name][reaction.id]
                    coefficients["<"+reaction.id] = 1-reaction_probabilities[name][reaction.id]
                elif reaction.id in gf_penalties:
                    if "forward" in gf_penalties[reaction.id]:
                        gfrxn += 1
                        coefficients[">"+reaction.id] = 1+gf_penalties[reaction.id]["forward"]
                    else:
                        otherrxn += 1
                        coefficients[">"+reaction.id] = 0.95
                    if "reverse" in gf_penalties[reaction.id]:
                        gfrxn += 1
                        coefficients["<"+reaction.id] = 1+gf_penalties[reaction.id]["reverse"]
                    else:
                        otherrxn += 1
                        coefficients["<"+reaction.id] = 0.95
                else:
                    otherrxn += 2
                    coefficients[">"+reaction.id] = 0.95
                    coefficients["<"+reaction.id] = 0.95
            print(name,"GF:",gfrxn,"Prob:",probrxn,"Other:",otherrxn)

            # Create conditional logic for growth phenotypes if errors are raised here 
            msgapfill.prefilter(test_conditions=tests,growth_conditions=[],use_prior_filtering=True,base_filter_only=True)

            gf_phenotype_results[name] = {}
            for phenoid in phenosets:
                gf_phenotype_results[name][phenoid] = {}
                output = phenosets[phenoid].simulate_phenotypes(
                    msgapfill.gfmodelutl,
                    multiplier=2,
                    add_missing_exchanges=True,
                    save_fluxes=False,
                    save_reaction_list=True,
                    gapfill_negatives=False,
                    msgapfill=None,
                    test_conditions=None,
                    ignore_experimental_data=True,
                    flux_coefficients=coefficients
                )
                for index, row in output["details"].iterrows():
                    if "reactions" in output["data"][row["Phenotype"]]:
                        output["data"][row["Phenotype"]]["average_probability"] = 0
                        for rxn in output["data"][row["Phenotype"]]["reactions"]:
                            direction = rxn[0:1]
                            rxnid = rxn[1:]
                            if direction == ">":
                                if rxnid not in gf_penalties or "forward" not in gf_penalties[rxnid]:
                                    if rxnid in reaction_probabilities[name]:
                                        output["data"][row["Phenotype"]]["average_probability"] += reaction_probabilities[name][rxnid]
                                    else:
                                        output["data"][row["Phenotype"]]["average_probability"] += 0.05
                            elif direction == "<":
                                if rxnid not in gf_penalties or "reverse" not in gf_penalties[rxnid]:
                                    if rxnid in reaction_probabilities[name]:
                                        output["data"][row["Phenotype"]]["average_probability"] += reaction_probabilities[name][rxnid]
                                    else:
                                        output["data"][row["Phenotype"]]["average_probability"] += 0.05
                        output["data"][row["Phenotype"]]["average_probability"] = output["data"][row["Phenotype"]]["average_probability"]/len(output["data"][row["Phenotype"]]["reactions"])
                    gf_phenotype_results[name][phenoid][row["Phenotype"]] = output["data"][row["Phenotype"]]
            probability_finished.append(name)
            util.save("new_gf_phenotype_results_",gf_phenotype_results)
            util.save("reaction_probabilities",reaction_probabilities)
            util.save("probability_finished_",probability_finished)
        break
                
                
# define function parameters
model_ws = 188065
model_suffix = ".pyr"
mag_list = util.load("mag_list")
metabolites = util.load("metabolites")
feature_probabilities = util.load("feature_probabilities")
reaction_probabilities = util.load("reaction_probabilities",{})
gf_phenotype_results = util.load("new_gf_phenotype_results_",{})
probability_finished = util.load("probability_finished_",[])
problemlist = util.load("problemlist",[])
mag_to_name = util.load("mag_to_name")
mag_feature_probabilities = {mag_to_name[k]+".pg.G.D":v for k,v in feature_probabilities.items()}
# print(mag_feature_probabilities.keys())
for mag in mag_list:
    mag[1] = mag_to_name[mag[1]]+".pg.G.D"

auxo_media = util.msrecon.get_media(f"{model_ws}/AuxoMedia")
gmm_base_media = util.msrecon.get_media(f"{model_ws}/PyruateMinimalAerobic")


# run the parallelized code
parallelize = False
if parallelize:
    from multiprocess import Pool
    from os import cpu_count
    pool = Pool(int(cpu_count()))
    args = [(mag_list, mag_to_name, model_ws, metabolites, mag_feature_probabilities, problemlist, auxo_media, gmm_base_media, "aerobic")
           for mag_list in mag_lists]
    list_of_outputs = pool.map(smipps, args)
else:
    outputs = smipps((mag_list, mag_to_name, model_ws, metabolites, mag_feature_probabilities, problemlist, auxo_media, gmm_base_media, "aerobic"))