# Using GECKO for metabolic engineering applications

Benjamín J. Sánchez, 2020-09-10

In [1]:
# requirements
import numpy as np
import pandas as pd

import os
import sys
import wget

from cobra import Reaction
from cobra.flux_analysis import pfba
from cobra.io import load_json_model

from cameo.strain_design.deterministic.flux_variability_based import FSEOF



## 1. Loading models

Load the metabolic and enzyme constrained models:

In [2]:
# Metabolic model:
wget.download("https://github.com/BenjaSanchez/notebooks/blob/master/caffeine-fix-yarrowia/iYali-model.json?raw=true", "model.json", bar = False)
model = load_json_model("model.json")
os.remove("model.json")

# Enzyme-constrained model:
wget.download("https://github.com/BenjaSanchez/notebooks/blob/master/caffeine-fix-yarrowia/iYali-ecModel.json?raw=true", "ec_model.json", bar = False)
ec_model = load_json_model("ec_model.json")
os.remove("ec_model.json")

Using license file C:\Users\bejsab\gurobi.lic
Academic license - for non-commercial use only


The model has two differences with a standard COBRA model. First, the reactions contain another metabolite: the enyzme itself.

In [3]:
model.reactions.NDP7

0,1
Reaction identifier,NDP7
Name,nucleoside diphosphatase
Memory address,0x016d117c1148
Stoichiometry,h2o_c + udp_c --> h_c + pi_c + ump_c  H2O + UDP --> H+ + phosphate + UMP
GPR,YALI0_C19712g
Lower bound,0.0
Upper bound,1000.0


In [4]:
ec_model.reactions.NDP7No1

0,1
Reaction identifier,NDP7No1
Name,nucleoside diphosphatase (No1)
Memory address,0x016d12fd5648
Stoichiometry,h2o_c + 3.4336e-06 prot_Q6CBD9 + udp_c --> h_c + pi_c + ump_c  H2O + 3.4336e-06 prot_Q6CBD9 + UDP --> H+ + phosphate + UMP
GPR,YALI0_C19712g
Lower bound,0.0
Upper bound,1000.0


In [5]:
ec_model.metabolites.prot_Q6CBD9

0,1
Metabolite identifier,prot_Q6CBD9
Name,prot_Q6CBD9
Memory address,0x016d122e5788
Formula,
Compartment,c
In 12 reaction(s),"NDP3No1, NTP5No1, prot_Q6CBD9_exchange, CDPPHNo1, NDP7No1, y300074No1, IDPANo1, NTP3No1, y300075No1, ITPANo1, NDP1No1, NDP8exNo1"


**NB:** All protein ids follow the form `prot_UNIPROT`.

The second difference is the existence of _protein exchange reactions_. These protein exchanges follow the naming `prot_UNIPROT_exchange`.

In [6]:
ec_model.reactions.prot_Q6CBD9_exchange

0,1
Reaction identifier,prot_Q6CBD9_exchange
Name,prot_Q6CBD9_exchange
Memory address,0x016d1381c808
Stoichiometry,--> prot_Q6CBD9  --> prot_Q6CBD9
GPR,YALI0_C19712g
Lower bound,0.0
Upper bound,1000.0


By putting an upper bound on these exchanges, we can integrate proteomics data into the model and treat it as an usual COBRA model without further changes.

**NB:** Without any additional constraints, both models predict more or less the same using simple FBA simulations:

In [7]:
model.optimize()
model.summary()

Unnamed: 0_level_0,IN_FLUXES,IN_FLUXES,OUT_FLUXES,OUT_FLUXES,OBJECTIVES,OBJECTIVES
Unnamed: 0_level_1,ID,FLUX,ID,FLUX,ID,FLUX
0,h_e,8.244823,h2o_e,13.00193,xBIOMASS,0.105692
1,o2_e,1.84639,co2_e,1.979554,,
2,glc__D_e,1.0,,,,
3,nh3_e,0.945233,,,,


In [8]:
ec_model.optimize()
ec_model.summary()

Unnamed: 0_level_0,IN_FLUXES,IN_FLUXES,OUT_FLUXES,OUT_FLUXES,OBJECTIVES,OBJECTIVES
Unnamed: 0_level_1,ID,FLUX,ID,FLUX,ID,FLUX
0,h_e,8.786245,,,xBIOMASS,0.103721
1,o2_e,1.926063,,,,
2,glc__D_e,1.0,,,,
3,nh3_e,0.927612,,,,


## 2. Metabolic engineering applications of models

We will optimize for the production of:

In [9]:
model.metabolites.pmtcoa_c

0,1
Metabolite identifier,pmtcoa_c
Name,palmitoyl-CoA
Memory address,0x016d10b79b08
Formula,C37H62N7O17P3S
Compartment,c
In 5 reaction(s),"y002221, y003577, y001995, y003517, y002140"


For this we add to both models a reaction that "burns" the palmitoyl from the palmitoyl-CoA:

In [10]:
def add_pmt_rxn(model):
    reaction = Reaction('test')
    reaction.name = 'test rxn'
    reaction.lower_bound = 0
    reaction.upper_bound = 1000
    reaction.add_metabolites({
        model.metabolites.pmtcoa_c: -1.0,
        model.metabolites.coa_c: +1})
    model.add_reactions([reaction])
    print(reaction)

add_pmt_rxn(model)
add_pmt_rxn(ec_model)

test: pmtcoa_c --> coa_c
test: pmtcoa_c --> coa_c


Now we can perform any typical analysis, e.g. FSEOF:

In [11]:
biomass_rxn_id = 'xBIOMASS'
test_rxn_id = 'test'

def sorted_fseof(model, biomass_rxn_id, test_rxn_id):
    # Revert the model to its original state:
    model.reactions.get_by_id(biomass_rxn_id).lower_bound = 0
    model.reactions.get_by_id(test_rxn_id).lower_bound = 0
    model.objective = biomass_rxn_id
    # Run analysis
    fseof = FSEOF(model)
    fseof_result = fseof.run(target=model.reactions.get_by_id(test_rxn_id))
    fseof_df = fseof_result.data_frame
    # For each row, create a linear model with the test exchange as prediction, and store the slope of said model:
    fseof_df["slope"] = np.nan
    fseof_df["r2"] = np.nan
    for index, row in fseof_df.iterrows():
        if sum(row) == 0:
            fseof_df.loc[index,"slope"] = 0
        else:
            x = row.iloc[:-2]
            y = fseof_df.loc[test_rxn_id].iloc[:-2]
            A = np.vstack([x, np.ones(len(x))]).T
            m, c = np.linalg.lstsq(A, y, rcond=None)[0]
            resid = np.linalg.lstsq(A, y, rcond=None)[1]
            r2 = 1 - resid / (y.size * y.var())
            fseof_df.loc[index,"slope"] = m
            try:
                fseof_df.loc[index,"r2"] = r2
            except:
                fseof_df.loc[index,"r2"] = 0
    # Sort the dataframe by slope and print only rows with R2 > 0.5:
    fseof_df = fseof_df.sort_values(by=["slope"], ascending=False)
    print(fseof_df.loc[fseof_df.index != "test"].loc[fseof_df["r2"] > 0.5].iloc[:20, :])
    return fseof_df

# Run the FSEOF analysis for both models:
fseof_df = sorted_fseof(model, biomass_rxn_id, test_rxn_id)
ec_fseof_df = sorted_fseof(ec_model, biomass_rxn_id, test_rxn_id)

                 1         2         3         4         5         6  \
y002198  -0.000013 -0.000012 -0.000011 -0.000010 -0.000008 -0.000007   
y003512  -0.000013 -0.000012 -0.000011 -0.000010 -0.000008 -0.000007   
y003515  -0.000033 -0.000030 -0.000026 -0.000023 -0.000020 -0.000017   
y002196  -0.000117 -0.000105 -0.000094 -0.000083 -0.000072 -0.000060   
y003510  -0.000117 -0.000105 -0.000094 -0.000083 -0.000072 -0.000060   
y002199  -0.000206 -0.000186 -0.000166 -0.000146 -0.000126 -0.000106   
y003513  -0.000206 -0.000186 -0.000166 -0.000146 -0.000126 -0.000106   
y003543  -0.000304 -0.000274 -0.000245 -0.000215 -0.000186 -0.000157   
y103554  -0.001205 -0.001089 -0.000972 -0.000855 -0.000738 -0.000622   
y300018  -0.002231 -0.002015 -0.001799 -0.001583 -0.001367 -0.001151   
SHSL1    -0.004529 -0.004090 -0.003652 -0.003213 -0.002774 -0.002336   
GLYC3Ptm -0.004577 -0.004134 -0.003690 -0.003247 -0.002804 -0.002361   
DHAPtm   -0.004658 -0.004207 -0.003756 -0.003305 -0.002854 -0.00

## 3. Integrating proteomics in an ecModel

We will now load some data.

**NB:** Data should come in mmol/gDW units.

In [12]:
proteomics = pd.read_csv("fake-data.csv", index_col=0, squeeze=True)
proteomics.items

<bound method Series.items of ids
Q6C1I4    7.728810e-06
Q6C4R6    2.452620e-06
Q6C1I7    6.206630e-06
Q6CB22    6.751220e-07
Q9B6E8    6.036220e-06
              ...     
Q6CCV6    5.227950e-06
Q6C5I0    9.825330e-06
Q6C793    8.445740e-07
Q6CEQ0    8.268360e-06
Q6C3M8    6.827770e-06
Name: data, Length: 3550, dtype: float64>

We now add the data to the ecModel:

In [13]:
# Add simulations to python's search path & import simulation functions:
sys.path.append('../../simulations/src')
from simulations.modeling.driven import limit_proteins, flexibilize_proteomics

# Limit model with abundances:
limit_proteins(ec_model, proteomics)

In [14]:
# Visualize results:
ec_model.optimize()

Unnamed: 0,fluxes,reduced_costs
xAMINOACID,1.169163e-03,2.308050e-15
xBIOMASS,1.169163e-03,1.422473e-16
xLIPID,1.169163e-06,7.382983e-15
xMAINTENANCE,7.000000e-01,-1.796347e-02
xPOOL_AC_EM,1.321756e-04,1.978669e-17
...,...,...
prot_Q9UVF4_exchange,4.025696e-11,0.000000e+00
prot_Q9Y753_exchange,4.247756e-09,0.000000e+00
prot_W0TYM5_exchange,0.000000e+00,0.000000e+00
prot_W0TYP2_exchange,0.000000e+00,0.000000e+00


Note that the model grows very slow! Probably due to some values in the data (it is fake after all...)

## 4. Flexibilization

Experimental proteomics measurements can be too restrictive, due to instrument error and/or non-accurate kinetic data in the model. Thus, a flexibilization of the proteomics data is usually required to work with enzyme constrained models. Let's do that by requiring at least a growth rate of 0.1 1/h:

In [15]:
# Convert proteomics to list opf dictionaries (required for flexibilizing):
ec_model.reactions.EX_glc__D_e_REV.upper_bound = +10
proteomics_list = []
for key, value in proteomics.items():
    protein = {"identifier":key, "measurement":value, "uncertainty":0}
    proteomics_list.append(protein)

#Flexibilize proteomics:
D = {"measurement":0.1, "uncertainty":0.01}
new_growth_rate, proteomics_filtered, warnings = flexibilize_proteomics(ec_model, biomass_rxn_id, D, proteomics_list, [])
print(f"new growth rate: {new_growth_rate} h-1")
print(f"filtered proteins: {len(proteomics) - len(proteomics_filtered)} proteins")

new growth rate: {'measurement': 0.1, 'uncertainty': 0.01} h-1
filtered proteins: 34 proteins


In [16]:
ec_model.optimize()

Unnamed: 0,fluxes,reduced_costs
xAMINOACID,1.104979e-01,0.0
xBIOMASS,1.104979e-01,0.0
xLIPID,1.104979e-04,0.0
xMAINTENANCE,7.000000e-01,0.0
xPOOL_AC_EM,0.000000e+00,0.0
...,...,...
prot_Q9UVF4_exchange,3.804696e-09,0.0
prot_Q9Y753_exchange,4.014566e-07,0.0
prot_W0TYM5_exchange,0.000000e+00,0.0
prot_W0TYP2_exchange,0.000000e+00,0.0


Now that we have a functional model, we can repeat the previous FSEOF analysis

In [17]:
# Run the FSEOF analysis, filtering out any reaction that is not a protein exchange pseudo-rxn:
ec_fseof_df = sorted_fseof(ec_model, biomass_rxn_id, test_rxn_id)

                    1         2         3         4         5         6  \
y002156No1   0.000263  0.000263  0.000263  0.000263  0.000327  0.000327   
y002163No1   0.000263  0.000263  0.000263  0.000263  0.000327  0.000327   
y002170No1   0.000263  0.000263  0.000263  0.000263  0.000327  0.000327   
y002177No1   0.000263  0.000263  0.000263  0.000263  0.000327  0.000327   
y003517      0.002717  0.002717  0.002717  0.002717  0.002781  0.002781   
y001115      0.988217  0.988217  0.988217  0.988217  0.988217  0.988217   
ADCL         0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
4ABZt        0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
ADCSNo1      0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
y000337No1   0.012676  0.012676  0.012676  0.012676  0.012676  0.012676   
y003537_REV  0.012676  0.012676  0.012676  0.012676  0.012676  0.012676   
y103296No1   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
y103308No1   0.000000  0.

## 5. Enzyme Usage

Finally, we can  look at enzyme usage values between biomass production and our test production. For this, we should first perform simulations with the ecModel now that it has proteomics data integrated:

In [18]:
def simulate_ec_model(model, rxn_id):
    # First optimization: maximize input
    model.objective = rxn_id
    solution = model.optimize()
    # Second optimization: minimize glucose
    model.reactions.get_by_id(rxn_id).lower_bound = solution.fluxes[rxn_id]
    glc_rxn = model.reactions.get_by_id("EX_glc__D_e_REV")
    model.objective = {glc_rxn: -1}
    solution = pfba(model)
    return solution

def print_fluxes(model, solution, bio_rxn_id):
    print("growth: " + str(solution.fluxes[bio_rxn_id]))
    for rxn in model.reactions:
        if (len(rxn.metabolites) == 1 or rxn.id == "test") and solution.fluxes[rxn.id] != 0 and "_exchange" not in rxn.id:
            print(rxn.id + ": " + str(solution.fluxes[rxn.id]))

Condition 1: 100% of carbon going towards biomass.

In [19]:
ec_sol_biomass = simulate_ec_model(ec_model, biomass_rxn_id)
print_fluxes(ec_model, ec_sol_biomass, biomass_rxn_id)

growth: 0.11049787668900697
EX_ac_e: 1.3287528069937633
EX_co2_e: 4.619810724663165
EX_etoh_e: 2.578427041090677
EX_pyr_e: 8.012841153903857
EX_h2o_e: 22.27264031025135
y002111: 0.11049787668900697
y001654_REV: 0.988216544575795
EX_glc__D_e_REV: 6.778929962532047
EX_inost_e_REV: 0.0003977923560804251
EX_o2_e_REV: 3.0236171693685288
EX_pi_e_REV: 0.052573895247725794
EX_so4_e_REV: 0.02154708595435636


Condition 2: 20% going towards the desired metabolite, and the rest to biomass.

In [20]:
ec_model.reactions.get_by_id(biomass_rxn_id).lower_bound = 0.8 * ec_sol_biomass.fluxes[biomass_rxn_id]
ec_sol_test = simulate_ec_model(ec_model, test_rxn_id)
print_fluxes(ec_model, ec_sol_test, biomass_rxn_id)

growth: 0.08839830135120558
EX_ac_e: 1.5318432331897434
EX_co2_e: 5.814448295053125
EX_etoh_e: 2.3125180805491965
EX_h_e: 1.8230694641367489
EX_pyr_e: 7.904501399665252
EX_h2o_e: 21.454681484264515
y002111: 0.08839830135120558
y001654_REV: 0.7905732356606306
EX_glc__D_e_REV: 7.1189439509420245
EX_inost_e_REV: 0.0003182338848643409
EX_o2_e_REV: 3.2719556468458615
EX_pi_e_REV: 0.04205911619818166
EX_so4_e_REV: 0.017237668763485085
test: 0.13351749187542397


Now let's build a dataframe with all enzyme usages under both conditions. We are looking for this in the `prot_XXXXXX_exchange` rxns, and they are all in units of `mmol/gDW`.

In [21]:
usage_df = pd.DataFrame()
for reaction in ec_model.reactions:
    if reaction.id.startswith("prot_") and reaction.id.endswith("_exchange"):
        new_line = pd.DataFrame(index = [reaction.id[5:11]],
                                data = {"gene":reaction.gene_reaction_rule,
                                        "biomass_usage":[ec_sol_biomass.fluxes[reaction.id]],
                                        "test_usage":[ec_sol_test.fluxes[reaction.id]]})
        usage_df = usage_df.append(new_line)

print(usage_df)

                 gene  biomass_usage    test_usage
B5FVA8  YALI0_B01771g   0.000000e+00  0.000000e+00
B5FVG3  YALI0_E32164g   1.948190e-07  1.948190e-07
B5FVG6  YALI0_E32736g   0.000000e+00  0.000000e+00
F2Z5Z7  YALI0_B09361g   0.000000e+00  0.000000e+00
F2Z650  YALI0_E01056g   1.764838e-07  4.421994e-08
...               ...            ...           ...
Q99148  YALI0_F21010g   2.145964e-06  1.716771e-06
Q9UVF4  YALI0_B02948g   3.804696e-09  3.043757e-09
Q9Y753  YALI0_A10076g   4.014566e-07  3.211653e-07
W0TYM5  YALI0_E11355g   0.000000e+00  0.000000e+00
W0TYP2  YALI0_B11154g   0.000000e+00  0.000000e+00

[647 rows x 3 columns]


Let's make sure all values are positive:

In [22]:
usage_df = usage_df.sort_values(by=['biomass_usage'])
print(usage_df.head(n=5))
usage_df = usage_df.sort_values(by=['test_usage'])
print(usage_df.head(n=5))

                 gene  biomass_usage  test_usage
B5FVA8  YALI0_B01771g            0.0         0.0
Q6CA32  YALI0_D06325g            0.0         0.0
Q6CA28  YALI0_D06413g            0.0         0.0
Q6CA24  YALI0_D06501g            0.0         0.0
Q6CA02  YALI0_D06974g            0.0         0.0
                 gene  biomass_usage  test_usage
B5FVA8  YALI0_B01771g            0.0         0.0
Q6C3X3  YALI0_E31515g            0.0         0.0
Q6C3X5  YALI0_E31471g            0.0         0.0
Q6C3Z7  YALI0_E31009g            0.0         0.0
Q6C3Z9  YALI0_E30965g            0.0         0.0


There are a lot of rows with zero usage under both conditions, so let's filter them out:

In [23]:
usage_df = usage_df.query("biomass_usage > 0 or test_usage > 0")
print(usage_df)

                 gene  biomass_usage    test_usage
Q6CG81  YALI0_A21417g   2.376173e-07  0.000000e+00
Q6C1K2  YALI0_F15587g   9.462795e-08  0.000000e+00
Q6C2M6  YALI0_F06578g   1.203819e-12  9.630553e-13
Q6C3U4  YALI0_E32065g   3.966396e-11  3.173117e-11
Q6C2Y8  YALI0_F04015g   4.430784e-11  3.544627e-11
...               ...            ...           ...
Q6C2D9  YALI0_F08701g   6.021968e-05  4.817575e-05
Q6C0G7  YALI0_F24893g   6.468899e-05  5.175119e-05
Q6C968  YALI0_D13596g   2.514036e-04  1.330059e-04
Q6C7T0  YALI0_D25630g   3.110409e-04  3.424800e-04
Q6C4K5  YALI0_E26004g   3.991415e-05  7.245275e-03

[243 rows x 3 columns]


Now let's compute usage changes. We will look at both absolute changes (the difference between both conditions) and relative changes (the fold change or ratio between them).

In [24]:
usage_df["abs_changes"] = usage_df["test_usage"] - usage_df["biomass_usage"]
usage_df["rel_changes"] = usage_df["test_usage"] / usage_df["biomass_usage"]
print(usage_df)

                 gene  biomass_usage    test_usage   abs_changes  rel_changes
Q6CG81  YALI0_A21417g   2.376173e-07  0.000000e+00 -2.376173e-07     0.000000
Q6C1K2  YALI0_F15587g   9.462795e-08  0.000000e+00 -9.462795e-08     0.000000
Q6C2M6  YALI0_F06578g   1.203819e-12  9.630553e-13 -2.407638e-13     0.800000
Q6C3U4  YALI0_E32065g   3.966396e-11  3.173117e-11 -7.932792e-12     0.800000
Q6C2Y8  YALI0_F04015g   4.430784e-11  3.544627e-11 -8.861568e-12     0.800000
...               ...            ...           ...           ...          ...
Q6C2D9  YALI0_F08701g   6.021968e-05  4.817575e-05 -1.204394e-05     0.800000
Q6C0G7  YALI0_F24893g   6.468899e-05  5.175119e-05 -1.293780e-05     0.800000
Q6C968  YALI0_D13596g   2.514036e-04  1.330059e-04 -1.183977e-04     0.529053
Q6C7T0  YALI0_D25630g   3.110409e-04  3.424800e-04  3.143910e-05     1.101077
Q6C4K5  YALI0_E26004g   3.991415e-05  7.245275e-03  7.205361e-03   181.521471

[243 rows x 5 columns]


We can now sort and take a look at the top 20 of enzymes that:

* Increased their absolute usage the most:

In [25]:
usage_df = usage_df.sort_values(by=['abs_changes'])
print(usage_df.tail(n=20).iloc[::-1])

                 gene  biomass_usage    test_usage   abs_changes  rel_changes
Q6C4K5  YALI0_E26004g   3.991415e-05  7.245275e-03  7.205361e-03   181.521471
Q6C7T0  YALI0_D25630g   3.110409e-04  3.424800e-04  3.143910e-05     1.101077
Q6CFH4  YALI0_B06941g   8.674153e-06  1.861228e-05  9.938131e-06     2.145718
Q6CD79  YALI0_C03025g   0.000000e+00  9.407220e-06  9.407220e-06          inf
Q6C7J6  YALI0_E00264g   0.000000e+00  6.245420e-06  6.245420e-06          inf
Q6C549  YALI0_E21021g   0.000000e+00  5.437600e-06  5.437600e-06          inf
Q6CG57  YALI0_B00704g   2.056860e-06  6.272450e-06  4.215590e-06     3.049527
Q6C5X9  YALI0_E14190g   5.120566e-06  8.101054e-06  2.980488e-06     1.582062
P34229  YALI0_B15059g   2.657320e-07  2.808700e-06  2.542968e-06    10.569670
Q6C793  YALI0_E02684g   3.080941e-06  4.870967e-06  1.790026e-06     1.581000
Q6CA04  YALI0_D06930g   1.317868e-06  2.584170e-06  1.266302e-06     1.960871
Q6C1T4  YALI0_F13541g   7.514864e-07  1.715460e-06  9.639733e-07

* Decreased their absolute usage the most:

In [26]:
print(usage_df.head(n=20))

                 gene  biomass_usage  test_usage  abs_changes  rel_changes
Q6C968  YALI0_D13596g       0.000251    0.000133    -0.000118     0.529053
Q6C0G7  YALI0_F24893g       0.000065    0.000052    -0.000013     0.800000
Q6C2D9  YALI0_F08701g       0.000060    0.000048    -0.000012     0.800000
Q6CDD3  YALI0_C01411g       0.000019    0.000010    -0.000009     0.512300
Q6CGM4  YALI0_A18062g       0.000044    0.000035    -0.000009     0.800000
Q6C8C2  YALI0_D20878g       0.000044    0.000035    -0.000009     0.800000
Q6C627  YALI0_E13057g       0.000032    0.000026    -0.000006     0.800000
Q6C0G0  YALI0_F25047g       0.000030    0.000024    -0.000006     0.800000
Q6CER7  YALI0_B13552g       0.000030    0.000024    -0.000006     0.800000
Q6C5V2  YALI0_E14949g       0.000045    0.000039    -0.000006     0.871501
Q6CAF8  YALI0_D03135g       0.000029    0.000023    -0.000006     0.800000
Q6CAF2  YALI0_D03333g       0.000007    0.000003    -0.000004     0.460344
Q6C7B8  YALI0_E02090g    

* Increased their relative usage the most:

In [27]:
usage_df = usage_df.sort_values(by=['rel_changes'])
print(usage_df.tail(n=20).iloc[::-1])

                 gene  biomass_usage    test_usage   abs_changes  rel_changes
Q6CD79  YALI0_C03025g   0.000000e+00  9.407220e-06  9.407220e-06          inf
Q6C7J6  YALI0_E00264g   0.000000e+00  6.245420e-06  6.245420e-06          inf
Q6C549  YALI0_E21021g   0.000000e+00  5.437600e-06  5.437600e-06          inf
Q6C682  YALI0_E11671g   0.000000e+00  2.074256e-08  2.074256e-08          inf
Q6C4Y7  YALI0_E22649g   0.000000e+00  6.225440e-08  6.225440e-08          inf
Q6C7Y1  YALI0_D24431g   0.000000e+00  3.637872e-07  3.637872e-07          inf
Q6C3H5  YALI0_E34793g   0.000000e+00  3.637872e-07  3.637872e-07          inf
Q6CEH4  YALI0_B15598g   0.000000e+00  9.594201e-07  9.594201e-07          inf
Q6C4K5  YALI0_E26004g   3.991415e-05  7.245275e-03  7.205361e-03   181.521471
Q6CC91  YALI0_C11407g   7.381489e-11  7.802066e-10  7.063917e-10    10.569772
Q6C416  YALI0_E30591g   7.381489e-11  7.802066e-10  7.063917e-10    10.569772
P34229  YALI0_B15059g   2.657320e-07  2.808700e-06  2.542968e-06

* Decreased their relative usage the most:

In [28]:
print(usage_df.head(n=20))

                 gene  biomass_usage    test_usage   abs_changes  rel_changes
Q6CG81  YALI0_A21417g   2.376173e-07  0.000000e+00 -2.376173e-07     0.000000
Q6C1K2  YALI0_F15587g   9.462795e-08  0.000000e+00 -9.462795e-08     0.000000
F2Z650  YALI0_E01056g   1.764838e-07  4.421994e-08 -1.322639e-07     0.250561
Q6C2B5  YALI0_F09229g   3.822770e-07  1.262833e-07 -2.559936e-07     0.330345
Q6BZU8  YALI0_F30745g   3.186979e-06  1.370912e-06 -1.816067e-06     0.430160
Q6CAF2  YALI0_D03333g   6.934260e-06  3.192146e-06 -3.742114e-06     0.460344
Q6C1T8  YALI0_F13453g   5.927303e-07  2.738382e-07 -3.188921e-07     0.461995
Q6C0N3  YALI0_F23221g   5.927303e-09  2.738382e-09 -3.188921e-09     0.461995
Q6CDD3  YALI0_C01411g   1.890021e-05  9.682570e-06 -9.217642e-06     0.512300
Q6C968  YALI0_D13596g   2.514036e-04  1.330059e-04 -1.183977e-04     0.529053
Q6CAP2  YALI0_D01089g   1.336084e-06  7.068593e-07 -6.292246e-07     0.529053
Q6C9F1  YALI0_D11704g   1.329024e-07  7.031245e-08 -6.258999e-08