### Changing acetate transporters.

In [1]:
import reframed
import pandas as pd
import numpy as np

import seaborn as sns

import os
import json

import copy

from reframed.io.sbml import parse_gpr_rule
import functions.general_functions as general_func

#### Load data

In [2]:
all_mags_paper = general_func.read_allmags_data()
all_mags_paper.head(2)

Unnamed: 0_level_0,Source,Substrate,Completeness (%),Contamination (%),Domain,Phylum,Class,Order,Family,Genus,Species,Genome size (bp),Scaffolds,N50,CDS prediction (DRAM),Column1,new_coverage
MAG,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
CH14-bin.1,Compost_Digestate,Xylan,99.37,0.32,Bacteria,Firmicutes,Clostridia,Lachnospirales,Lachnospiraceae,Lacrimispora,Lacrimispora amygdalina_A,5147597.0,48.0,197896.0,4902.0,CH14-bin.1,54.707294
CH14-bin.2,Compost_Digestate,Xylan,99.37,0.63,Bacteria,Firmicutes,Clostridia,Lachnospirales,Lachnospiraceae,Robinsoniella,Robinsoniella peoriensis,5647052.0,70.0,172899.0,4618.0,CH14-bin.2,18.831114


In [3]:
with open("output/relevant_MAGs_99.txt") as text_file:
    relevant_MAGs = text_file.read().split("\n")

relevant_MAGs = [string.replace("\t","") for string in relevant_MAGs]


#### Load models

In [4]:
GEMs_dict = {}

directory = os.fsencode("output/GEMs/GEMs_adapt_media/")

for file in os.listdir(directory):
    filename = os.fsdecode(file)
    print(filename)

    GEMs_dict[filename[:-4]]= reframed.load_cbmodel("output/GEMs/GEMs_adapt_media/"+filename)


CH7-bin.18.xml
CH15-bin.7.xml
CH13-bin.0.xml
CH1-bin.4.xml
CH13-bin.1.xml
CH13-bin.11.xml
CH15-bin.6.xml
CH7-bin.2.xml
CH13-bin.13.xml
CH1-bin.10.xml
CH1-bin.6.xml
CH13-bin.2.xml
CH13-bin.12.xml
CH15-bin.5.xml
CH7-bin.1.xml
CH15-bin.1.xml
CH1-bin.3.xml
CH13-bin.17.xml
CH15-bin.0.xml
CH7-bin.4.xml
CH7-bin.23.xml
CH7-bin.6.xml
CH15-bin.22.xml
CH15-bin.2.xml
CH1-bin.1.xml
CH13-bin.4.xml
CH13-bin.14.xml
CH15-bin.23.xml
CH7-bin.20.xml
CH8-bin.8.xml
CH8-bin.9.xml
CH8-bin.25.xml
CH14-bin.4.xml
CH14-bin.1.xml
CH8-bin.21.xml
CH14-bin.2.xml
CH8-bin.22.xml
CH8-bin.2.xml
CH8-bin.29.xml
CH8-bin.7.xml
CH8-bin.6.xml
CH3-bin.2.xml
CH8-bin.14.xml
CH8-bin.16.xml
CH3-bin.0.xml
CH8-bin.5.xml
CH3-bin.1.xml
CH8-bin.17.xml
CH7-bin.11.xml
CH9-bin.1.xml
CH15-bin.12.xml
CH13-bin.25.xml
CH15-bin.13.xml
CH9-bin.0.xml
CH7-bin.12.xml
CH9-bin.2.xml
CH7-bin.9.xml
CH15-bin.10.xml
CH7-bin.8.xml
CH7-bin.13.xml
CH7-bin.17.xml
CH15-bin.8.xml
CH15-bin.15.xml
CH7-bin.16.xml
CH9-bin.6.xml
CH9-bin.4.xml
CH15-bin.17.xml
CH1-bi

## Find acetate producers/consumers

#### Find which models has GPRs for producing acetate through phosphotransacetylase(R_PTAr) and acetate kinase (R_ACKr)

In [5]:
has_enzymes_for_acetate = {}
for MAG in GEMs_dict.keys():
    
    has_enzyme = []
    if "R_ACKr" in GEMs_dict[MAG].reactions:
        if GEMs_dict[MAG].reactions["R_ACKr"].gpr!=None:
            has_enzyme.append("R_ACKr")
    if "R_PTAr" in GEMs_dict[MAG].reactions:
        
        if GEMs_dict[MAG].reactions["R_PTAr"].gpr!=None:
            has_enzyme.append("R_PTAr")
    
    has_enzymes_for_acetate[MAG]= len(has_enzyme)==2

In [6]:
has_enzymes_for_acetate_MAGs = list(pd.Series(has_enzymes_for_acetate)[pd.Series(has_enzymes_for_acetate)].index)

#### Find hits on the ACt2r protein from the TCDB database

In [7]:
ACt2r_MAGs = []

ACt2r_MAGs_data = []
for filename in os.listdir("transporters/"):
    
    if filename.endswith(".tsv"):

        transport= pd.read_csv("transporters/"+filename,sep="\t",header=None)
        transport.columns = ["query acc.ver", "subject acc.ver", "% identity", "alignment length", "mismatches", "gap opens", "q. start", "q. end", "s. start", "s. end", "evalue", "bit score"]
        transport.reset_index(inplace=True)
        
        if transport.shape[0]>1:
            
            transport.sort_values(by="evalue",ascending=True,inplace=True)
            transport.reset_index(inplace=True)
            
            query = transport.loc[0,"query acc.ver"]
            gene = transport.loc[0,"subject acc.ver"]
            best_evalue = transport.loc[0,"evalue"]
            bit_score = transport.loc[0,"bit score"]
            
            if best_evalue<1e-5 and bit_score>20:
                if "2.A.1.13.1" in query:
                    ACt2r_MAGs.append(filename[:-4])
                    
                    ACt2r_MAGs_data.append((filename[:-4],gene,best_evalue,bit_score,"2.A.1.13.1"))

                elif "2.A.21.7.3" in query: 
                    ACt2r_MAGs.append(filename[:-4])
                    ACt2r_MAGs_data.append((filename[:-4],gene,best_evalue,bit_score,"2.A.21.7.3"))


In [8]:
ACt2r_MAGs_df = pd.DataFrame(ACt2r_MAGs_data,columns=["MAG","gene","evalue","bit_score","TCDB_id"]).sort_values("evalue")
ACt2r_MAGs_df.set_index("MAG",inplace=True)
ACt2r_MAGs_df

Unnamed: 0_level_0,gene,evalue,bit_score,TCDB_id
MAG,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CH15-bin.17,gene545,4.9000000000000004e-157,456.0,2.A.21.7.3
CH13-bin.1,gene374,4.9100000000000004e-157,456.0,2.A.21.7.3
CH7-bin.16,gene2185,8.2e-156,452.0,2.A.21.7.3
CH7-bin.4,gene3341,4.48e-153,445.0,2.A.21.7.3
CH8-bin.25,gene3105,2.22e-151,441.0,2.A.21.7.3
CH1-bin.9,gene769,1.28e-150,439.0,2.A.21.7.3
CH15-bin.6,gene1704,1.4299999999999999e-142,420.0,2.A.21.7.3
CH1-bin.1,gene727,3.5899999999999997e-56,191.0,2.A.21.7.3
CH14-bin.2,gene1003,9.490000000000001e-23,99.0,2.A.21.7.3
CH9-bin.1,gene3888,1.19e-22,99.0,2.A.21.7.3


### Add acetate transport

Two different conditions.

1. The MAG has a hit in the TCDB database -> add reversible reaction
2. The MAG has a hit for the enzymes in acetate production -> add only producing reaction

In [9]:
GEMs_dict2 = {}

for MAG,model in GEMs_dict.items():
    model_copy = model.copy()
    
    if "R_Acabc" in model_copy.reactions.keys(): 
        model_copy.remove_reaction("R_Acabc")
      
    if "R_ACt2r" in model_copy.reactions.keys():
        model_copy.remove_reaction("R_ACt2r")
        
    if MAG in ACt2r_MAGs_df.index.values:
        model_copy.add_reaction_from_str("R_ACt2r: M_ac_c + M_h_c --> M_ac_e + M_h_e")
        model_copy.reversible=True
        model_copy.lb=-1000
        
        GPR = parse_gpr_rule(ACt2r_MAGs_df.loc[MAG,"gene"])
        model_copy.set_gpr_association("R_ACt2r",GPR)
        
        
    elif has_enzymes_for_acetate_MAGs:
        model_copy.add_reaction_from_str("R_ACt2r: M_ac_c + M_h_c --> M_ac_e + M_h_e")
        
    GEMs_dict2[MAG]=model_copy

### Look for energy generating cycles

In [10]:
import functions.EGC as EGC

In [11]:
for MAG,model in GEMs_dict2.items():
    print(MAG)
    EGC.EGC_identifier(model,print_results=False)

CH7-bin.18
Set parameter Username
Academic license - for non-commercial use only - expires 2025-03-14
[92mThere are NO energy producing cycles in the model[0m
CH15-bin.7
[92mThere are NO energy producing cycles in the model[0m
CH13-bin.0
[92mThere are NO energy producing cycles in the model[0m
CH1-bin.4
[92mThere are NO energy producing cycles in the model[0m
CH13-bin.1
[92mThere are NO energy producing cycles in the model[0m
CH13-bin.11
[92mThere are NO energy producing cycles in the model[0m
CH15-bin.6
[92mThere are NO energy producing cycles in the model[0m
CH7-bin.2
[92mThere are NO energy producing cycles in the model[0m
CH13-bin.13
[92mThere are NO energy producing cycles in the model[0m
CH1-bin.10
[92mThere are NO energy producing cycles in the model[0m
CH1-bin.6
[92mThere are NO energy producing cycles in the model[0m
CH13-bin.2
[92mThere are NO energy producing cycles in the model[0m
CH13-bin.12
[92mThere are NO energy producing cycles in the model[0m

In [12]:
model_uni = reframed.load_cbmodel("/Users/idunmariaburgos/universal_model_extension/output/universe_bacteria.xml")

In [13]:
EGC.EGC_identifier(model_uni,print_results=True)

There are NO energy producing cycles in the model for R_ATPM
There are NO energy producing cycles in the model for R_CTPM
There are NO energy producing cycles in the model for R_GTPM
There are NO energy producing cycles in the model for R_UTPM
There are NO energy producing cycles in the model for R_ITPM
There are NO energy producing cycles in the model for R_nadhM
There are NO energy producing cycles in the model for R_nadphM
There are NO energy producing cycles in the model for R_fadh2M
There are NO energy producing cycles in the model for R_fmnh2M
There are NO energy producing cycles in the model for R_q8h2M
There are NO energy producing cycles in the model for R_mql8M
There are NO energy producing cycles in the model for R_dmmql8M
There are NO energy producing cycles in the model for R_accoaM
There are NO energy producing cycles in the model for R_gluM
There are NO energy producing cycles in the model for R_protonM
There are NO energy producing cycles in the model for R_ferredoxinM


{}

In [14]:
EGC_results = EGC.EGC_identifier(GEMs_dict2["CH9-bin.5"],print_results=True)
set.intersection(*map(set,[[value[0] for value in values] for source,values in EGC_results.items()]))

There are energy producing cycles in the model for R_ATPM
	R_ALAD_L: 1000.0
	R_ATPS4rpp: 1000.0
	R_CYTBO3_4pp: 1000.0
	R_GLYCLTDx: 1000.0
	R_GLYCTO2: 1000.0
	R_ATPM: 1000.0
	R_CAT: 1000.0
	R_PDYXPT_c: 1000.0
	R_PYDXO: 1000.0
There are energy producing cycles in the model for R_CTPM
	R_ALAD_L: 1000.0
	R_ATPS4rpp: 1000.0
	R_CYTBO3_4pp: 1000.0
	R_CYTK2: 1000.0
	R_CYTK2_1: -1000.0
	R_GLYCLTDx: 1000.0
	R_GLYCTO2: 1000.0
	R_CAT: 1000.0
	R_PDYXPT_c: 1000.0
	R_PYDXO: 1000.0
	R_CTPM: 1000.0
There are energy producing cycles in the model for R_GTPM
	R_ADOCBIK: 1000.0
	R_ALAD_L: 1000.0
	R_ATPS4rpp: 1000.0
	R_CYTBO3_4pp: 1000.0
	R_GDOCBIK: -1000.0
	R_GLYCLTDx: 1000.0
	R_GLYCTO2: 1000.0
	R_CAT: 1000.0
	R_PDYXPT_c: 1000.0
	R_PYDXO: 1000.0
	R_GTPM: 1000.0
There are energy producing cycles in the model for R_UTPM
	R_ALAD_L: 1000.0
	R_ATPS4rpp: 1000.0
	R_CYTBO3_4pp: 1000.0
	R_GLYCLTDx: 1000.0
	R_GLYCTO2: 1000.0
	R_NDPK2: 1000.0
	R_CAT: 1000.0
	R_PDYXPT_c: 1000.0
	R_PYDXO: 1000.0
	R_UTPM: 1000.0
There a

{'R_ALAD_L',
 'R_CAT',
 'R_CYTBO3_4pp',
 'R_GLYCLTDx',
 'R_GLYCTO2',
 'R_PDYXPT_c',
 'R_PYDXO'}

In [15]:
model_uni.reactions.R_PYDXO

R_PYDXO: 2.0 M_h2o_c + M_nh4_c + 0.5 M_o2_c + M_pydx_c <-> 2.0 M_h2o2_c + M_pydam_c [-inf, 0.0]

In [16]:
GEMs_dict2["CH9-bin.5"].reactions.R_PYDXO

R_PYDXO: 2.0 M_h2o_c + M_nh4_c + 0.5 M_o2_c + M_pydx_c <-> 2.0 M_h2o2_c + M_pydam_c

In [17]:
model_9_5 = reframed.load_cbmodel("output/GEMs/GEMs_adapt/CH9-bin.5.xml")

In [18]:
GEMs_dict2["CH9-bin.5"].remove_reactions(list(set(GEMs_dict2["CH9-bin.5"].reactions)-set(model_9_5.reactions)))

In [19]:
EGC_results = EGC.EGC_identifier(GEMs_dict2["CH9-bin.5"],print_results=False)

[92mThere are NO energy producing cycles in the model[0m


In [20]:
EGC_results = EGC.EGC_identifier(GEMs_dict2["CH9-bin.6"],print_results=True)
set.intersection(*map(set,[[value[0] for value in values] for source,values in EGC_results.items()]))

There are energy producing cycles in the model for R_ATPM
	R_ATPS4rpp: 1000.0
	R_MG2tex: 2000.0
	R_MGt5: 2000.0
	R_ATPM: 1000.0
	R_MG2t3_2pp: 2000.0
There are energy producing cycles in the model for R_CTPM
	R_ADK1: 1000.0
	R_ATPS4rpp: 2000.0
	R_MG2tex: 4000.0
	R_MGt5: 4000.0
	R_PPA: 1000.0
	R_PPDK: 1000.0
	R_PYK4: 1000.0
	R_MG2t3_2pp: 4000.0
	R_CTPM: 1000.0
There are energy producing cycles in the model for R_GTPM
	R_ADK1: 1000.0
	R_ADK3: -1000.0
	R_ATPS4rpp: 1000.0
	R_MG2tex: 2000.0
	R_MGt5: 2000.0
	R_MG2t3_2pp: 2000.0
	R_GTPM: 1000.0
There are energy producing cycles in the model for R_UTPM
	R_ADK1: 1000.0
	R_ATPS4rpp: 2000.0
	R_MG2tex: 4000.0
	R_MGt5: 4000.0
	R_PPA: 1000.0
	R_PPDK: 1000.0
	R_PYK2: 1000.0
	R_MG2t3_2pp: 4000.0
	R_UTPM: 1000.0
There are energy producing cycles in the model for R_ITPM
	R_ADK1: 1000.0
	R_ADK4: -1000.0
	R_ATPS4rpp: 1000.0
	R_MG2tex: 2000.0
	R_MGt5: 2000.0
	R_MG2t3_2pp: 2000.0
	R_ITPM: 1000.0
There are NO energy producing cycles in the model for R_nadhM
T

{'R_MG2t3_2pp', 'R_MG2tex', 'R_MGt5'}

In [21]:
model_uni.reactions.R_MG2t3_2pp

R_MG2t3_2pp: 2.0 M_h_c + M_mg2_p <-> 2.0 M_h_p + M_mg2_c [-inf, 0.0]

In [22]:
GEMs_dict2["CH9-bin.6"].reactions.R_MG2t3_2pp

R_MG2t3_2pp: 2.0 M_h_c + M_mg2_p <-> 2.0 M_h_p + M_mg2_c

In [23]:
model_9_6 = reframed.load_cbmodel("output/GEMs/GEMs_adapt/CH9-bin.6.xml")

In [24]:
GEMs_dict2["CH9-bin.6"].remove_reactions(list(set(GEMs_dict2["CH9-bin.6"].reactions)-set(model_9_6.reactions)))

In [25]:
EGC.EGC_identifier(GEMs_dict2["CH9-bin.6"],print_results=False)

[92mThere are NO energy producing cycles in the model[0m


{}

In [26]:
GEMs_dict3 = {}
for MAG, model in GEMs_dict2.items():
    
    model_copy = model.copy()
    
    print(model_copy.get_metabolite_reactions("M_o2_e"))
    model_copy.remove_reactions(model_copy.get_metabolite_reactions("M_o2_e"))
    model_copy.remove_metabolite("M_o2_e")
    model_copy.remove_reaction("R_EX_o2_e")
    
    GEMs_dict3[MAG]=model_copy

['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']


  warn(f"No such reaction {r_id}")


['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']
['R_O2tex', 'R_EX_o2_e']


### Save models

In [27]:
for MAG,model in GEMs_dict3.items():
    model.update()
    reframed.save_cbmodel(model,"output/GEMs/GEMs_adapt_media_ACt2r/"+MAG+".xml")