In [1]:
import sys
import re
import cobra
import cplex
import json
from configparser import ConfigParser
config = ConfigParser()
config.read("config.cfg")
paths = config.get("script", "syspaths").split(";")
for path in paths:
    sys.path.append(path)
import cobrakbase


cobrakbase 0.2.8


In [2]:
abbreviation = {
    "M":"Methionine",
    "C":"Cysteine",
    "A":"Alanine",
    "T":"Threonine",
    "V":"Valine",
    "F":"Phenylalanine",
    "W":"Tryptophan",
    "Y":"Tyrosine",
    "I":"Isoleucine",
    "P":"Proline",
    "L":"Leucine",
    "D":"Aspartate",
    "R":"Arginine",
    "H":"Histidine",
    "K":"Lysine",
    "G":"Glycine",
    "S":"Serine",
    "Q":"Glutamine",
    "E":"Glutamate",
    "N":"Asparagine"
}
data = ""
with open('PTMData.txt', 'r') as file:
    data = file.read()
lines = data.split("\n")
modifications = {}
for line in lines:
    array = line.split("\t")
    if re.search('(.)\(([\+-])(\d*\.*\d+)\)', array[0]) != None:
        m = re.search('(.)\(([\+-])(\d*\.*\d+)\)', array[0])
        residue = m[1]
        if residue.upper() in abbreviation:
            residue = abbreviation[residue.upper()]
        mass = float(m[3])
        if m[2] == "-":
            mass = -1*mass
        if mass not in modifications:
            modifications[mass] = {}
        if residue not in modifications[mass]:  
            modifications[mass][residue] = {"count":0,"ops":[]}
        modifications[mass][residue]["count"] += 1
        if len(array[1]) > 0:
            subarray = array[1].split(";")
            for op in subarray:
                if op not in modifications[mass][residue]["ops"]:
                    modifications[mass][residue]["ops"].append(op)

In [3]:
data = ""
with open('AA_residues.tsv', 'r') as file:
    data = file.read()
lines = data.split("\n")
residue_data = {}
headers = None
for line in lines:
    if headers == None:
        headers = line.split("\t")
    else:
        array = line.split("\t")
        residue_data[array[0]+"_c0"] = {
            "name":array[1],
            "structure":array[2],
            "formula":array[3],
            "mass":array[4],
            "exact_mass":array[5]
        }

predicted_mods = {}
kbase_api = cobrakbase.KBaseAPI()
model = kbase_api.get_from_ws("AAResidueDamageProducts",95771)
kbmodel = kbase_api.get_object("AAResidueDamageProducts",95771)
rxnops = {}
for rxn in kbmodel["modelreactions"]:
    rxnops[rxn["id"]] = []
    if "dblinks" in rxn and "PickAxe" in rxn["dblinks"]:
        for op in rxn["dblinks"]["PickAxe"]:
            if op[0:5] == "spont":
                op = op.replace("spontaneous.","")
                rxnops[rxn["id"]].append(op)
for metabolite in model.metabolites:
    if metabolite.id in residue_data:
        metabolite.formula = residue_data[metabolite.id]["formula"]
        print(metabolite.id+":"+metabolite.formula)
for reaction in model.reactions:
    if len(rxnops[reaction.id]) > 0:
        reactant = None
        other_reactant = []
        for metabolite in reaction.metabolites:
            if reaction.metabolites[metabolite] < 0:
                if metabolite.id in residue_data:
                    reactant = metabolite
                else:
                    other_reactant.append(metabolite)
        if reactant != None:
            closest_metabolite = None
            closest_mass = None
            for metabolite in reaction.metabolites:
                if reaction.metabolites[metabolite] > 0 and metabolite != reactant:
                    if closest_mass == None or abs(metabolite.formula_weight - reactant.formula_weight) < closest_mass:
                        closest_mass = abs(metabolite.formula_weight - reactant.formula_weight)
                        closest_metabolite = metabolite
            closest_mass = closest_metabolite.formula_weight - reactant.formula_weight
            if closest_mass not in predicted_mods:
                predicted_mods[closest_mass] = {}
            residue = reactant.id
            residue = residue[0:-10]
            if residue not in predicted_mods[closest_mass]:
                predicted_mods[closest_mass][residue] = {"count":0,"ops":[],"others":other_reactant}
            predicted_mods[closest_mass][residue]["count"] += 1
            for op in rxnops[reaction.id]:
                if op not in predicted_mods[closest_mass][residue]["ops"]:
                    predicted_mods[closest_mass][residue]["ops"].append(op)

header = "ExpMassdiff\tPredMassDiff\tDiff\tExpOp\tPredOp\tOthers"
abbrev_keys = abbreviation.keys()
for abbrev in abbrev_keys:
    header += "\t"+abbrev
print(header)
cpd_hash = {}
for mass in modifications:
    line = ""
    closest = None
    closest_mass = None
    for pmass in predicted_mods:
        if closest == None or abs(pmass-mass) < abs(closest):
            closest = pmass-mass
            closest_mass = pmass
    if abs(closest) > 2:
        closest = None
    pop_hash = {}
    op_hash = {}
    residue_values = ""
    for abbrev in abbrev_keys:
        new_value = "0"
        if abbreviation[abbrev] in modifications[mass]:
            for op in modifications[mass][abbreviation[abbrev]]["ops"]:
                op_hash[op] = 1
            new_value = str(modifications[mass][abbreviation[abbrev]]["count"])
        if closest != None and abbreviation[abbrev] in predicted_mods[closest_mass]:
            others_hash = {}
            for other in predicted_mods[closest_mass][abbreviation[abbrev]]["others"]:
                if other.name != "H+":
                    others_hash[other.name] = 1
                    cpd_hash[other.id] = 1
            for op in predicted_mods[closest_mass][abbreviation[abbrev]]["ops"]:
                pop_hash[op] = 1
            new_value = new_value+"/"+str(predicted_mods[closest_mass][abbreviation[abbrev]]["count"])    
        residue_values += "\t"+new_value
    if closest != None:
        line = str(mass)+"\t"+str(closest_mass)+"\t"+str(closest)+"\t"+";".join(op_hash.keys())+"\t"+";".join(pop_hash.keys())+"\t"+";".join(others_hash.keys())+residue_values
    else:
        line = str(mass)+"\t\t\t"+";".join(op_hash.keys())+"\t\t"+residue_values
    print(line)

PhenylalanineResidue_c0:C11H16N2O
TyrosineResidue_c0:C11H16N2O2
TryptophanResidue_c0:C13H17N3O
AlanineResidue_c0:C5H12N2O
ArginineResidue_c0:C8H19N5O
AsparagineResidue_c0:C6H13N3O2
AspartateResidue_c0:C6H12N2O3
CysteineResidue_c0:C5H12N2OS
GlutamineResidue_c0:C7H15N3O2
GlutamateResidue_c0:C7H14N2O3
GlycineResidue_c0:C4H10N2O
HistidineResidue_c0:C8H14N4O
IsoleucineResidue_c0:C8H18N2O
LeucineResidue_c0:C8H18N2O
LysineResidue_c0:C8H19N3O
MethionineResidue_c0:C7H16N2OS
ProlineResidue_c0:C7H14N2O
SerineResidue_c0:C5H12N2O2
ThreonineResidue_c0:C6H14N2O2
ValineResidue_c0:C7H16N2O
ExpMassdiff	PredMassDiff	Diff	ExpOp	PredOp	Others	M	C	A	T	V	F	W	Y	I	P	L	D	R	H	K	G	S	Q	E	N
0.98	0.9847599999999943	0.004759999999994324	Deamidation (R);Carbamidomethylation;Deamidation (NQ);Dehydration;Oxidation (M);Sodium adduct;Carbamidomethylation (DHKE  X@N-term);Methylation(KR);Oxidation (HW)	ImineDeamination	H2O	0	0	0	0	0	0	0	0	0	0	0	0	10/1	0	0	0	0	1526	0	2783
15.99	14.026580000000024	-1.9634199999999762	Oxidati

In [4]:
reaction_hash = {}
with open('/Users/chenry/code/fba_tools/data/Reactions.json') as json_file:
    input_data = json.load(json_file)
    for rxn in input_data:
        if "compound_ids" in rxn:
            for cpd in rxn["compound_ids"]:
                if cpd+"_c0" in cpd_hash:
                    reaction_hash[rxn["id"]] = rxn
for rxnid in reaction_hash:
    roles = ""
    if "roles" in reaction_hash[rxnid]:
        roles = "|".join(reaction_hash[rxnid]["roles"])
        print(rxnid+"\t"+reaction_hash[rxnid]["definition"]+"\t"+roles)

rxn01788	(1) NADPH[0] + (1) O2[0] + (1) H+[0] + (1) Limonene[0] <=> (1) H2O[0] + (1) NADP[0] + (1) Perillyl alcohol[0]	fr.31443;6-oxocineole dehydrogenase (EC 1.14.13.51);6-oxocineoledehydrogenase
rxn11537	(1) S-Adenosyl-L-methionine[0] + (1) Juvenile hormone III acid[0] <=> (1) S-Adenosyl-homocysteine[0] + (1) Juvenile hormone III[0]	fr.31326;macrocin O-methyltransferase (EC 2.1.1.101);macrocino-methyltransferase
rxn09157	(1) H2O[0] + (1) phosphatidylethanolamine dioctadec-11-enoyl[0] <=> (1) H+[0] + (1) L-2-Lysophosphatidylethanolamine[0] + (1) octadecenoate[0]	fr.7504;Phospholipase A1 precursor (EC 3.1.1.32, EC 3.1.1.4);phospholipasea1precursor(ec,ec)
rxn04852	(1) NADPH[0] + (1) O2[0] + (1) H+[0] + (1) Arachidonate[0] <=> (1) H2O[0] + (1) NADP[0] + (1) 19(S)-HETE[0]	fr.31985;unspecific monooxygenase (EC 1.14.14.1);unspecificmonooxygenase
rxn01860	(1) H2O[0] + (1) (R)-Prunasin[0] <=> (1) D-Glucose[0] + (1) Mandelonitrile[0]	fr.32733;polymannuronate hydrolase (EC 3.2.1.121);polymannur

rxn00834	(1) H2O[0] + (1) NAD[0] + (1) IMP[0] <=> (1) NADH[0] + (1) H+[0] + (1) XMP[0]	fr.8495;Inosine-5'-monophosphate dehydrogenase (EC 1.1.1.205);inosine-5'-monophosphatedehydrogenase|fr.31915;IMP dehydrogenase (EC 1.1.1.205);impdehydrogenase
rxn05053	(1) H2O[0] + (1) N-Acetylphenylethylamine[0] <=> (1) Acetate[0] + (1) Phenethylamine[0]	fr.31553;N-acetylglucosaminylphosphatidylinositol deacetylase (EC 3.5.1.89);n-acetylglucosaminylphosphatidylinositoldeacetylase
rxn02967	(1) NADPH[0] + (1) O2[0] + (1) H+[0] + (1) LTE4[0] <=> (1) H2O[0] + (1) NADP[0] + (1) 20-OH-LTE4[0]	fr.30576;methyltetrahydroprotoberberine 14-monooxygenase (EC 1.14.13.37);methyltetrahydroprotoberberine14-monooxygenase
rxn00188	(1) CO2[0] + (1) NH3[0] + (1) L-Glutamate[0] <=> (1) H2O[0] + (2) H+[0] + (1) N-Carbamyl-L-glutamate[0]	fr.34243;adenosylcobinamide hydrolase (EC 3.5.1.90);adenosylcobinamidehydrolase
rxn02241	(1) O2[0] + (1) 2-Oxoglutarate[0] + (1) Aromadendrin[0] <=> (1) H2O[0] + (1) CO2[0] + (1) Succinat