## Caproicproducens

In [163]:
from Bio import Entrez, SeqIO
import pandas as pd
import os

import reframed
import pandas as pd
import copy

import sys 
sys.path.append("../functions/")
import EGC

### Find ORFs and translate sequences -> fasta file

In [2]:
table = 11
min_pro_len = 100

# https://biopython.org/DIST/docs/tutorial/Tutorial.html#sec443

def find_orfs_with_trans(seq, trans_table, min_protein_length):
    answer = []
    seq_len = len(seq)
    for strand, nuc in [(+1, seq), (-1, seq.reverse_complement())]:
        for frame in range(3):
            trans = nuc[frame:].translate(trans_table)
            trans_len = len(trans)
            aa_start = 0
            aa_end = 0
            while aa_start < trans_len:
                aa_end = trans.find("*", aa_start)
                if aa_end == -1:
                    aa_end = trans_len
                if aa_end - aa_start >= min_protein_length:
                    if strand == 1:
                        start = frame + aa_start * 3
                        end = min(seq_len, frame + aa_end * 3 + 3)
                    else:
                        start = seq_len - frame - aa_end * 3 - 3
                        end = seq_len - frame - aa_start * 3
                    answer.append((start, end, strand, trans[aa_start:aa_end]))
                aa_start = aa_end + 1
    answer.sort()
    return answer

In [3]:
all_recs = []
i = 1

ofile = open("../output/Caproicproducens_CH14_bin_0/CH14-bin.0.faa","w")     
for record in SeqIO.parse("../input/CH14-bin.0.fa","fasta"):
    orf_list = find_orfs_with_trans(record.seq, table, min_pro_len)
    
    for start, end, strand, pro in orf_list:
        ofile.write(">"+"gene"+str(i)+"\n"+str(pro)+"\n")
        i +=1
ofile.close()



### Acetate transport


- 3.A.1.120.5 - abc
- 1.A.14.2.2 - acetate uptake
- 2.A.23.1.10 - uptake
- 2.A.6.2.9 - abc
- 2.A.96.1.11 - uptake


- 2.A.1.13.1 - proton linked efflux/influx
- 2.A.1.6.11 - Acetate/haloacid transporter (no mechanism)
- 2.A.102.4.13 - acetate transporter
- 2.A.21.7.3 - Pyruvate/acetate/propionate: H+ symporter
- 2.A.96.1.4 - acetate permease

In [4]:
from io import TextIOWrapper
from Bio import SeqIO 
from urllib.request import urlopen
import re

In [5]:
possible_transporters = pd.read_excel("/Users/idunmariaburgos/Documents/Work/Project/chain elongation/Clostridia BL3 and BL4/possible_transporters_tcdb.xlsx",sheet_name="overview")

possible_transporters_regex = "|".join(possible_transporters["Acetate"].values)

**Filter data from tcdb**

In [6]:
handle = TextIOWrapper(urlopen("https://www.tcdb.org/public/tcdb"))
recs = list(SeqIO.parse(handle, 'fasta'))
handle.close()

In [7]:
ofile = open("../output/Caproicproducens_CH14_bin_0/transporters.faa","w")

for rec in recs:
    id_ = str(rec.id)
    
    if re.search(possible_transporters_regex, id_):
        
        
        sequence = str(rec.seq)
        
        ofile.write(">"+id_+"\n"+sequence+"\n")
ofile.close()        


#### Check matches

**No good match** 
- Best matches (F:ATP binding): https://www.tcdb.org/search/result.php?acc=q2pgb8

In [8]:
transporters_CH14_bin_0 = pd.read_csv("../output/Caproicproducens_CH14_bin_0/CH14-bin.0_transporters.tsv",
            sep="\t",
            header=None)

transporters_CH14_bin_0.columns=["query acc.ver", "subject acc.ver", "% identity", "alignment length", "mismatches", "gap opens", "q. start", "q. end", "s. start", "s. end", "evalue", "bit score"]
transporters_CH14_bin_0 = transporters_CH14_bin_0[["query acc.ver","subject acc.ver","% identity","evalue","bit score"]]

In [9]:
transporters_CH14_bin_0.sort_values("bit score",ascending=False).head(10)

Unnamed: 0,query acc.ver,subject acc.ver,% identity,evalue,bit score
66,gnl|TC-DB|Q2PGB8|3.A.1.120.5,gene9840,31.239,4.75e-84,273.0
68,gnl|TC-DB|Q2PGB8|3.A.1.120.5,gene948,27.273,6.299999999999999e-57,200.0
67,gnl|TC-DB|Q2PGB8|3.A.1.120.5,gene9840,35.398,7.07e-33,131.0
69,gnl|TC-DB|Q2PGB8|3.A.1.120.5,gene948,34.555,2.5800000000000002e-23,102.0
70,gnl|TC-DB|Q2PGB8|3.A.1.120.5,gene2822,26.142,1.1200000000000001e-22,99.8
73,gnl|TC-DB|Q2PGB8|3.A.1.120.5,gene11089,23.387,1.23e-21,96.3
78,gnl|TC-DB|Q2PGB8|3.A.1.120.5,gene12059,24.44,2.6300000000000003e-17,82.8
76,gnl|TC-DB|Q2PGB8|3.A.1.120.5,gene10468,30.244,8.670000000000001e-18,82.4
81,gnl|TC-DB|Q2PGB8|3.A.1.120.5,gene45,23.664,1.35e-16,80.9
82,gnl|TC-DB|Q2PGB8|3.A.1.120.5,gene4578,23.379,1.43e-16,80.9


## Create model

carve ./output/Caproicproducens_CH14_bin_0/CH14-bin.0.faa --fbc2 -o ./output/Caproicproducens_CH14_bin_0/CH14-bin.0.xml --verbose --gapfill 'LB[-O2]' --solver gurobi --soft  ./output/Caproicproducens_CH14_bin_0/soft_constraints.tsv     

### Constraints

In [115]:
soft_constraints = {"R_EX_hxa_e":1,
"R_EX_but_e":1,
"R_EX_ac_e":1,
"R_EX_h2_e":1,
"R_EX_co2_e":1}

In [116]:
pd.Series(soft_constraints).to_csv("../output/Caproicproducens_CH14_bin_0/soft_constraints.tsv",sep="\t",index_label=None,header=None)

In [148]:
soft_constraints_etoh = {"R_EX_hxa_e":1,
"R_EX_but_e":1,
"R_EX_ac_e":1,
"R_EX_h2_e":1,
"R_EX_etoh_e":1,
"R_EX_co2_e":1}

In [149]:
pd.Series(soft_constraints_etoh).to_csv("../output/Caproicproducens_CH14_bin_0/soft_constraints_etoh.tsv",sep="\t",index_label=None,header=None)

### Difference between models

In [160]:
model = reframed.load_cbmodel("../output/Caproicproducens_CH14_bin_0/CH14-bin.0.xml")
model_etoh_constr = reframed.load_cbmodel("../output/Caproicproducens_CH14_bin_0/CH14-bin.0_etoh_constraint.xml")
model_no_constr = reframed.load_cbmodel("../output/Caproicproducens_CH14_bin_0/CH14-bin.0_no_constraints.xml")

##### Symmetric difference

In [161]:
len(set(model_no_constr.reactions)-set(model.reactions)) + len(set(model.reactions) - set(model_no_constr.reactions))


50

In [162]:
len(set(model_etoh_constr.reactions)-set(model_no_constr.reactions)) + len(set(model_no_constr.reactions) - set(model_etoh_constr.reactions))


283

## Fixing acetate transport

In [142]:
media_db = pd.read_csv("https://raw.githubusercontent.com/cdanielmachado/carveme/master/carveme/data/input/media_db.tsv",sep="\t")

In [119]:
model.get_objective()

{'R_Growth': 1.0}

**Model has the wrong acetate transporter**

In [120]:
model.get_metabolite_reactions("M_ac_e")

['R_EX_ac_e', 'R_Acabc']

In [121]:
model.reactions.R_ACKr

R_ACKr: M_ac_c + M_atp_c <-> M_actp_c + M_adp_c

In [122]:
model.reactions.R_PTAr

R_PTAr: M_accoa_c + M_pi_c <-> M_actp_c + M_coa_c

**Removing previous and adding new acetate transporter**

In [123]:
model_uni = reframed.load_cbmodel("../input/universe_bacteria.xml")
model_uni.reactions.R_ACt2r

R_ACt2r: M_ac_e + M_h_e --> M_ac_c + M_h_c

In [124]:
R_ACt2r = copy.copy(model_uni.reactions.R_ACt2r)

model.remove_reaction("R_Acabc")

model.add_reaction(R_ACt2r)

In [125]:

model.reactions.R_ACt2r.reversible=True
model.reactions.R_ACt2r.lb=-1000
model.update()

model.reactions.R_ACt2r.reversible

True

#### Save temporary model

In [126]:
model.update()

reframed.save_cbmodel(model=model,filename="../output/Caproicproducens_CH14_bin_0/CH14-bin.0_acetate.xml")

### RNF -  H+/Na+-translocating ferredoxin:NAD+ oxidoreductase

In [127]:
from reframed.io.sbml import parse_gpr_rule

In [128]:
model.reactions.R_ATPS4rpp

R_ATPS4rpp: M_adp_c + 4.0 M_h_p + M_pi_c <-> M_atp_c + M_h2o_c + 3.0 M_h_c

**Results from BlastKOALA search (14.08.24) show that RNF-related proteins exist in our MAG**

In [129]:
blast_koala_results = pd.read_table("../output/Caproicproducens_CH14_bin_0/user_ko_definition.txt",header=None)
blast_koala_results = blast_koala_results[[0,1,2]].copy()
blast_koala_results.columns=["gene","KO","description"]
blast_koala_results = blast_koala_results.dropna()

blast_koala_results[blast_koala_results.description.str.contains("rnf")]

Unnamed: 0,gene,KO,description
966,gene3270,K03616,rnfB; H+/Na+-translocating ferredoxin:NAD+ oxi...
967,gene3275,K03617,rnfA; H+/Na+-translocating ferredoxin:NAD+ oxi...
968,gene3277,K03613,rnfE; H+/Na+-translocating ferredoxin:NAD+ oxi...
969,gene3280,K03612,rnfG; H+/Na+-translocating ferredoxin:NAD+ oxi...
970,gene3282,K03614,rnfD; H+/Na+-translocating ferredoxin:NAD+ oxi...
971,gene3286,K03615,rnfC; H+/Na+-translocating ferredoxin:NAD+ oxi...
2692,gene9189,K03614,rnfD; H+/Na+-translocating ferredoxin:NAD+ oxi...
2693,gene9195,K03613,rnfE; H+/Na+-translocating ferredoxin:NAD+ oxi...


**Adding this protein**

In [130]:
gpr = parse_gpr_rule("G_gene3275 and G_gene3270 and G_gene3286 and (G_gene3282 or G_gene9189) and (G_gene9195 or G_gene3277) and G_gene3280")

In [131]:
R_RnfH2_2 = reframed.CBReaction("R_RnfH2_2",
                    name="energy-converting NADH:ferredoxin oxidoreductase transport of H+",
                   reversible=False,
                   stoichiometry={'M_nad_c':-1,'M_fdxrd_c':-2, 'M_h_c':-2, 'M_nadh_c':1,'M_fdxo_2_2_c':2,'M_h_p':1},
                    gpr_association=gpr
                   )

In [132]:
results = EGC.EGC_identifier(model)

[92mThere are NO energy producing cycles in the model[0m


In [133]:
model.add_reaction(R_RnfH2_2)
model.update()

In [134]:
results = EGC.EGC_identifier(model)

[91mThere ARE energy producing cycles in the model[0m


In [135]:
reaction_sets = [set(reaction for reaction, value in reactions) for reactions in results.values()]
set.intersection(*reaction_sets)

{'R_FNOR', 'R_RnfH2_2'}

**These reactions (FNOR and Hnd) are likely only going in this direction**

In [136]:
model.reactions.R_FNOR.reversible=False
model.reactions.R_FNOR.lb=0

In [137]:
model.reactions.R_Hnd.reversible=False
model.reactions.R_Hnd.lb=0

In [138]:
results = EGC.EGC_identifier(model)

[92mThere are NO energy producing cycles in the model[0m


### Removing oxygen related reactions

In [139]:
model.update()
model.remove_reactions(model.get_metabolite_reactions("M_o2_e"))
model.remove_metabolite("M_o2_e")
model.update()

### Save models

In [140]:
model.update()

reframed.save_cbmodel(model=model,filename="../output/GEMs/GEMs_final/CH14-bin.0.xml")

## Production of butyrate

**Here butyrate is preferred**

#### From glucose