# GSM of C. necator: model properties

### load in modules

In [1]:
import pandas as pd
import cobra
from cobra.flux_analysis import pfba
import json
import matplotlib.pyplot as plt
from cobra import Model, Reaction, Metabolite
import numpy as np

### Load in the C. necator H16 GSM

In [2]:
m = cobra.io.read_sbml_model('../Model/iCN1361.xml')



Academic license - for non-commercial use only - expires 2022-03-09
Using license file /Users/mbznp/gurobi.lic


### Model Properties

In this paper we present a new genome-scale metabolic model reconstruction of Cupriavidus necator H16, iCN1393, which consists of 1290 reactions, including 94 transport reactions, and 1315 metabolites, covering 20.63% of the open reading frames. 

|  Feature            | Number  | 
|------------------|---------------|
| Genome size     | 7,416,678 |   
| Chromosome 1  | 4,052,032 |  
| Chromosome 2    | 2,912,490 |   
| Megaplasmid PHG  | 452,156 |  
| No. of opening reading frames (ORFs) | 6,626 |      
| Chromosome 1 | 3,651 |   
| Chromosome 2 | 2,555 |  
| Megaplasmic PHG | 420 |
| No. of reactions in the model | 1292 |
| No. of biochemical reactions | 1190 |
| No. of transport reactions | 96 |
| No of metabolites | 1265 |
| No. of genes assigned in the model | 1361 |
| ORF coverage (%) | 20.63% |

In [3]:
intracellular_reactions = 0
transporters = 0
exclude = ['R_Biomass', 'R_ATPM']
for i in m.reactions:
    if 'EX_' not in i.id:
        if 'transport' not in i.name:
            if 'drain' not in i.name:
                if i.id not in exclude:
                    intracellular_reactions += 1
    else:
        if 'EX_' in i.id or 'transport' in i.name:
            transporters += 1

        
                

chrom1 = 0
chrom2 = 0
plasmid = 0
for i in m.genes:
    if 'H16_A' in i.id:
        chrom1 += 1
    elif 'H16_B' in i.id:
        chrom2 += 1
    elif 'PHG' in i.id:
        plasmid += 1

print('Number of reactions: ', len(m.reactions))
print('Number of internal reactions: ', intracellular_reactions)
print('Number of transporters: ', transporters)
print('Number of metabolites: ', len(m.metabolites))
print('Number of genes in the model: ', len(m.genes) - 1)
print('Number of genes in the model from chromosome 1: ', chrom1)
print('Number of genes in the model from chromosome 2: ', chrom2)
print('Number of genes in the model from megaplasmid: ', plasmid)

Number of reactions:  1292
Number of internal reactions:  1190
Number of transporters:  96
Number of metabolites:  1265
Number of genes in the model:  1361
Number of genes in the model from chromosome 1:  822
Number of genes in the model from chromosome 2:  494
Number of genes in the model from megaplasmid:  45


### Functional reactions and biocyc pathways

Calculate the number of dead and functional reactions using built in function in cobra. The exchange reactions are opened to allow uptake of any nutrient source. The dead and functional pathways and the reactions corresponding to each pathway can be found in 'Supplementary/SupplementaryFile1_GSMinfo'.

In [4]:
dead = cobra.flux_analysis.find_blocked_reactions(m, open_exchanges=True)
print('Number of blocked reactions: ', len(dead))
functional = []
for i in m.reactions:
    if i.id not in dead:
        functional.append(i.id)
print('Number of functional reactions: ', len(functional))

Number of blocked reactions:  442
Number of functional reactions:  850


##### load in biocyc-pathways data

In [6]:
### load the pathways from supplementary files - 'biocyc_pathways' includes all biocyc pathways that were included in the BioCyc flatfiles
with open('Data/biocyc_pathways.json', 'r') as fn:
    pathways = json.load(fn)
### Mapping of the biocyc pathway IDs to common pathway name   
with open('Data/pathway_names.json', 'r') as fn:
    pathway_names = json.load(fn)
    
pathway_names2 = {}
for i, j in pathway_names.items():
    pathway_names2[j[0]] = i

##### Map the dead reaction IDs to the biocyc IDs

In [6]:
dead_biocyc = {}
dead_biocyc2 = {}
for i in dead:
    if 'biocyc' in m.reactions.get_by_id(i).annotation:
        if m.reactions.get_by_id(i).annotation['biocyc'] != '':
            dead_biocyc[m.reactions.get_by_id(i).annotation['biocyc']] = i
            dead_biocyc2[i] = m.reactions.get_by_id(i).annotation['biocyc']

In [7]:
functional_biocyc = {}
for i in functional:
    if 'biocyc' in m.reactions.get_by_id(i).annotation:
        if m.reactions.get_by_id(i).annotation['biocyc'] != '':
            functional_biocyc[m.reactions.get_by_id(i).annotation['biocyc']] = i

In [8]:
# How many of the 443 dead reactions are linked to degradation pathways
dead_reacs_deg = []
for i, j in pathway_names.items():
    if 'degradation' in j[0]:    
        for r in pathways[i]:
            if r in dead_biocyc.keys():
                dead_reacs_deg.append(r)


In [9]:
dead_deg = []
dead_bio = []
dead_other = []
for d in dead:
    for i, j in pathways.items():
        if d in dead_biocyc2:
            if dead_biocyc2[d] in j:
                if 'biosynthesis' in pathway_names[i][0]:
                    dead_bio.append(d)
                elif 'degradation' in pathway_names[i][0]:
                    dead_deg.append(d)
                else:
                    dead_other.append(d)
print(len(dead_deg))
print(len(dead_bio))
print(len(dead_other))
print(len(dead_deg) + len(dead_bio) + len(dead_other))

130
96
41
267


##### Get the blocked and functional pathways

In [11]:
blocked_pathways = []
functional_pathways = []
for i, j in pathway_names.items():
    dead_path = 0
    for r in pathways[i]:
        if r in dead_biocyc:
            dead_path = 1
    if dead_path == 0:
        functional_pathways.append(j)
    else:
        blocked_pathways.append(j)



blocked_pathways = []
functional_pathways = []
for i, j in pathway_names.items():
    dead_path = []
    for r in pathways[i]:
        if r in list(dead_biocyc.keys()):
            if dead_biocyc[r] in m.reactions:            
                dead_path.append(r)
    if len(dead_path) < 1:
        functional_pathways.append(j)
    else:
        blocked_pathways.append(j)



##### Total number of blocked pathways

In [12]:
blocked_pathways_reacs = {}
for i in blocked_pathways:
    reacs_pathway = pathways[pathway_names2[i[0]]]
    dead_reacs_path = []
    for r in reacs_pathway:
        if r in dead_biocyc:
            dead_reacs_path.append(r)
    blocked_pathways_reacs[i[0]] = dead_reacs_path
print('Number of blocked pathways: ', len(blocked_pathways_reacs))   

Number of blocked pathways:  168


##### Total number of functional pathways

In [13]:
functional_pathways_reacs = {}
for i in functional_pathways:
    reacs_pathway = pathways[pathway_names2[i[0]]]
    functional_reacs_path = []
    for r in reacs_pathway:
        if r in functional_biocyc:
            functional_reacs_path.append(r)
    if functional_reacs_path != []:
        functional_pathways_reacs[i[0]] = functional_reacs_path
    
print('Number of functional pathways: ', len(functional_pathways_reacs))

Number of functional pathways:  239


#### Any dead reactions in the model due to promiscuous enzymes? check if the reactions have EC numbers which are the same as at least 1 other functional reactions

In [14]:

ec_links = {}
ec_numbers = []
for i in m.reactions:
    if 'ec-code' in i.annotation:
        ec = i.annotation['ec-code'].split(' // ')
        for j in ec:
            ec_numbers.append(j)
        
ec_numbers = list(set(ec_numbers))
for i in ec_numbers:
    r_ec = []
    for r in m.reactions:
        if 'ec-code' in r.annotation:
            ec_nums = r.annotation['ec-code'].split(' // ')
            for e in ec_nums:
                if i == e:
                    r_ec.append(r.id)
    ec_links[i] = r_ec

In [15]:
count = 0
dead_prom = []
for r in dead:
    for i, j in ec_links.items():
        if str(i) != 'nan':
            if r in j:
                if len(j) > 1:
                    # check if any are functional
                    dead_func = 0
                    for reac in j:
                        if reac in functional:
                            dead_func = 1
                    if dead_func == 1:
                        dead_prom.append(r)
                            
print(len(dead_prom))

130
