# Using GECKO for metabolic engineering applications

Benjamín J. Sánchez, 2020-09-10

In [1]:
# requirements
import numpy as np
import pandas as pd

import os
import sys
import wget

from cobra import Reaction
from cobra.flux_analysis import pfba
from cobra.io import load_json_model

from cameo.strain_design.deterministic.flux_variability_based import FSEOF



## 1. Loading models

Load the metabolic and enzyme constrained models:

In [2]:
# Metabolic model:
wget.download("https://github.com/BenjaSanchez/notebooks/blob/master/caffeine-fix-yarrowia/iYali-model.json?raw=true", "model.json", bar = False)
model = load_json_model("model.json")
os.remove("model.json")

# Enzyme-constrained model:
wget.download("https://github.com/BenjaSanchez/notebooks/blob/master/caffeine-fix-yarrowia/iYali-ecModel.json?raw=true", "ec_model.json", bar = False)
ec_model = load_json_model("ec_model.json")
os.remove("ec_model.json")

Using license file C:\Users\bejsab\gurobi.lic
Academic license - for non-commercial use only


The model has two differences with a standard COBRA model. First, the reactions contain another metabolite: the enyzme itself.

In [3]:
model.reactions.NDP7

0,1
Reaction identifier,NDP7
Name,nucleoside diphosphatase
Memory address,0x016d117c1148
Stoichiometry,h2o_c + udp_c --> h_c + pi_c + ump_c  H2O + UDP --> H+ + phosphate + UMP
GPR,YALI0_C19712g
Lower bound,0.0
Upper bound,1000.0


In [4]:
ec_model.reactions.NDP7No1

0,1
Reaction identifier,NDP7No1
Name,nucleoside diphosphatase (No1)
Memory address,0x016d12fd5648
Stoichiometry,h2o_c + 3.4336e-06 prot_Q6CBD9 + udp_c --> h_c + pi_c + ump_c  H2O + 3.4336e-06 prot_Q6CBD9 + UDP --> H+ + phosphate + UMP
GPR,YALI0_C19712g
Lower bound,0.0
Upper bound,1000.0


In [5]:
ec_model.metabolites.prot_Q6CBD9

0,1
Metabolite identifier,prot_Q6CBD9
Name,prot_Q6CBD9
Memory address,0x016d122e5788
Formula,
Compartment,c
In 12 reaction(s),"NDP3No1, NTP5No1, prot_Q6CBD9_exchange, CDPPHNo1, NDP7No1, y300074No1, IDPANo1, NTP3No1, y300075No1, ITPANo1, NDP1No1, NDP8exNo1"


**NB:** All protein ids follow the form `prot_UNIPROT`.

The second difference is the existence of _protein exchange reactions_. These protein exchanges follow the naming `prot_UNIPROT_exchange`.

In [6]:
ec_model.reactions.prot_Q6CBD9_exchange

0,1
Reaction identifier,prot_Q6CBD9_exchange
Name,prot_Q6CBD9_exchange
Memory address,0x016d1381c808
Stoichiometry,--> prot_Q6CBD9  --> prot_Q6CBD9
GPR,YALI0_C19712g
Lower bound,0.0
Upper bound,1000.0


By putting an upper bound on these exchanges, we can integrate proteomics data into the model and treat it as an usual COBRA model without further changes.

**NB:** Without any additional constraints, both models predict more or less the same using simple FBA simulations:

In [7]:
model.optimize()
model.summary()

Unnamed: 0_level_0,IN_FLUXES,IN_FLUXES,OUT_FLUXES,OUT_FLUXES,OBJECTIVES,OBJECTIVES
Unnamed: 0_level_1,ID,FLUX,ID,FLUX,ID,FLUX
0,h_e,8.244823,h2o_e,13.00193,xBIOMASS,0.105692
1,o2_e,1.84639,co2_e,1.979554,,
2,glc__D_e,1.0,,,,
3,nh3_e,0.945233,,,,


In [8]:
ec_model.optimize()
ec_model.summary()

Unnamed: 0_level_0,IN_FLUXES,IN_FLUXES,OUT_FLUXES,OUT_FLUXES,OBJECTIVES,OBJECTIVES
Unnamed: 0_level_1,ID,FLUX,ID,FLUX,ID,FLUX
0,h_e,8.786245,,,xBIOMASS,0.103721
1,o2_e,1.926063,,,,
2,glc__D_e,1.0,,,,
3,nh3_e,0.927612,,,,


## 2. Metabolic engineering applications of models

We will optimize for the production of:

In [9]:
model.metabolites.pmtcoa_c

0,1
Metabolite identifier,pmtcoa_c
Name,palmitoyl-CoA
Memory address,0x016d10b79b08
Formula,C37H62N7O17P3S
Compartment,c
In 5 reaction(s),"y002221, y003577, y001995, y003517, y002140"


For this we add to both models a reaction that "burns" the palmitoyl from the palmitoyl-CoA:

In [10]:
def add_pmt_rxn(model):
    reaction = Reaction('test')
    reaction.name = 'test rxn'
    reaction.lower_bound = 0
    reaction.upper_bound = 1000
    reaction.add_metabolites({
        model.metabolites.pmtcoa_c: -1.0,
        model.metabolites.coa_c: +1})
    model.add_reactions([reaction])
    print(reaction)

add_pmt_rxn(model)
add_pmt_rxn(ec_model)

test: pmtcoa_c --> coa_c
test: pmtcoa_c --> coa_c


Now we can perform any typical analysis, e.g. FSEOF:

In [11]:
biomass_rxn_id = 'xBIOMASS'
test_rxn_id = 'test'

def sorted_fseof(model, biomass_rxn_id, test_rxn_id):
    # Revert the model to its original state:
    model.reactions.get_by_id(biomass_rxn_id).lower_bound = 0
    model.reactions.get_by_id(test_rxn_id).lower_bound = 0
    model.objective = biomass_rxn_id
    # Run analysis
    fseof = FSEOF(model)
    fseof_result = fseof.run(target=model.reactions.get_by_id(test_rxn_id))
    fseof_df = fseof_result.data_frame
    # For each row, create a linear model with the test exchange as prediction, and store the slope of said model:
    fseof_df["slope"] = np.nan
    fseof_df["r2"] = np.nan
    for index, row in fseof_df.iterrows():
        if sum(row) == 0:
            fseof_df.loc[index,"slope"] = 0
        else:
            x = row.iloc[:-2]
            y = fseof_df.loc[test_rxn_id].iloc[:-2]
            A = np.vstack([x, np.ones(len(x))]).T
            m, c = np.linalg.lstsq(A, y, rcond=None)[0]
            resid = np.linalg.lstsq(A, y, rcond=None)[1]
            r2 = 1 - resid / (y.size * y.var())
            fseof_df.loc[index,"slope"] = m
            try:
                fseof_df.loc[index,"r2"] = r2
            except:
                fseof_df.loc[index,"r2"] = 0
    # Sort the dataframe by slope and print only rows with R2 > 0.5:
    fseof_df = fseof_df.sort_values(by=["slope"], ascending=False)
    print(fseof_df.loc[fseof_df.index != "test"].loc[fseof_df["r2"] > 0.5].iloc[:20, :])
    return fseof_df

# Run the FSEOF analysis for both models:
fseof_df = sorted_fseof(model, biomass_rxn_id, test_rxn_id)
ec_fseof_df = sorted_fseof(ec_model, biomass_rxn_id, test_rxn_id)

                 1         2         3         4         5         6  \
y002198  -0.000013 -0.000012 -0.000011 -0.000010 -0.000008 -0.000007   
y003512  -0.000013 -0.000012 -0.000011 -0.000010 -0.000008 -0.000007   
y003515  -0.000033 -0.000030 -0.000026 -0.000023 -0.000020 -0.000017   
y002196  -0.000117 -0.000105 -0.000094 -0.000083 -0.000072 -0.000060   
y003510  -0.000117 -0.000105 -0.000094 -0.000083 -0.000072 -0.000060   
y002199  -0.000206 -0.000186 -0.000166 -0.000146 -0.000126 -0.000106   
y003513  -0.000206 -0.000186 -0.000166 -0.000146 -0.000126 -0.000106   
y003543  -0.000304 -0.000274 -0.000245 -0.000215 -0.000186 -0.000157   
y103554  -0.001205 -0.001089 -0.000972 -0.000855 -0.000738 -0.000622   
y300018  -0.002231 -0.002015 -0.001799 -0.001583 -0.001367 -0.001151   
SHSL1    -0.004529 -0.004090 -0.003652 -0.003213 -0.002774 -0.002336   
GLYC3Ptm -0.004577 -0.004134 -0.003690 -0.003247 -0.002804 -0.002361   
DHAPtm   -0.004658 -0.004207 -0.003756 -0.003305 -0.002854 -0.00

## 3. Integrating proteomics in an ecModel

We will now load some data.

**NB:** Data should come in mmol/gDW units.

In [12]:
proteomics = pd.read_csv("fake-data.csv", index_col=0, squeeze=True)
proteomics.items

<bound method Series.items of ids
Q6C1I4    7.728810e-06
Q6C4R6    2.452620e-06
Q6C1I7    6.206630e-06
Q6CB22    6.751220e-07
Q9B6E8    6.036220e-06
              ...     
Q6CCV6    5.227950e-06
Q6C5I0    9.825330e-06
Q6C793    8.445740e-07
Q6CEQ0    8.268360e-06
Q6C3M8    6.827770e-06
Name: data, Length: 3550, dtype: float64>

We now add the data to the ecModel:

In [13]:
# Add simulations to python's search path & import simulation functions:
sys.path.append('../../simulations/src')
from simulations.modeling.driven import limit_proteins, flexibilize_proteomics

# Limit model with abundances:
limit_proteins(ec_model, proteomics)

In [14]:
# Visualize results:
ec_model.optimize()

Unnamed: 0,fluxes,reduced_costs
xAMINOACID,1.169163e-03,2.308050e-15
xBIOMASS,1.169163e-03,1.422473e-16
xLIPID,1.169163e-06,7.382983e-15
xMAINTENANCE,7.000000e-01,-1.796347e-02
xPOOL_AC_EM,1.321756e-04,1.978669e-17
...,...,...
prot_Q9UVF4_exchange,4.025696e-11,0.000000e+00
prot_Q9Y753_exchange,4.247756e-09,0.000000e+00
prot_W0TYM5_exchange,0.000000e+00,0.000000e+00
prot_W0TYP2_exchange,0.000000e+00,0.000000e+00


Note that the model grows very slow! Probably due to some values in the data (it is fake after all...)

## 4. Flexibilization

Experimental proteomics measurements can be too restrictive, due to instrument error and/or non-accurate kinetic data in the model. Thus, a flexibilization of the proteomics data is usually required to work with enzyme constrained models. Let's do that by requiring at least a growth rate of 0.1 1/h:

In [15]:
# Convert proteomics to list opf dictionaries (required for flexibilizing):
proteomics_list = []
for key, value in proteomics.items():
    protein = {"identifier":key, "measurement":value, "uncertainty":0}
    proteomics_list.append(protein)

#Flexibilize proteomics:
D = {"measurement":0.1, "uncertainty":0.01}
new_growth_rate, proteomics_filtered, warnings = flexibilize_proteomics(ec_model, biomass_rxn_id, D, proteomics_list, [])
print(f"new growth rate: {new_growth_rate} h-1")
print(f"filtered proteins: {len(proteomics) - len(proteomics_filtered)} proteins")

new growth rate: {'measurement': 0.1, 'uncertainty': 0.01} h-1
filtered proteins: 46 proteins


In [16]:
ec_model.optimize()

Unnamed: 0,fluxes,reduced_costs
xAMINOACID,9.851700e-02,-3.667205e-15
xBIOMASS,9.851700e-02,2.525757e-15
xLIPID,9.851700e-05,6.217249e-15
xMAINTENANCE,7.000000e-01,-6.684597e-03
xPOOL_AC_EM,0.000000e+00,-9.063930e-17
...,...,...
prot_Q9UVF4_exchange,5.908207e-11,0.000000e+00
prot_Q9Y753_exchange,3.579282e-07,0.000000e+00
prot_W0TYM5_exchange,0.000000e+00,0.000000e+00
prot_W0TYP2_exchange,0.000000e+00,-1.027562e+00


Now that we have a functional model, we can repeat the previous FSEOF analysis

In [17]:
# Run the FSEOF analysis, filtering out any reaction that is not a protein exchange pseudo-rxn:
ec_fseof_df = sorted_fseof(ec_model, biomass_rxn_id, test_rxn_id)

                      1         2         3         4         5         6  \
COAtim         0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
HMGCOAtm_REV   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
y000560No1     0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
ACACT1mNo1     0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
LEUTAm_REVNo1  0.000000  0.000000  0.000000  0.000000  0.000000  0.007929   
y300030        0.000000  0.000000  0.000000  0.000000  0.000000  0.007929   
3C4MOPtm       0.000000  0.000000  0.000000  0.000000  0.000000  0.007929   
OMCDCmNo1      0.000000  0.000000  0.000000  0.000000  0.000000  0.007929   
ME2mNo1        0.000000  0.000000  0.000000  0.000000  0.006942  0.014341   
HSDxiNo1       0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
ICDHymNo1      0.009079  0.003956  0.000000  0.000000  0.000000  0.000000   
arm_MTHFD      0.000000  0.000000  0.014279  0.000000  0.000000  0.040708   

## 5. Enzyme Usage

Finally, we can  look at enzyme usage values between biomass production and our test production. For this, we should first perform simulations with the ecModel now that it has proteomics data integrated:

In [18]:
def simulate_ec_model(model, rxn_id):
    # First optimization: maximize input
    model.objective = rxn_id
    solution = model.optimize()
    # Second optimization: minimize glucose
    model.reactions.get_by_id(rxn_id).lower_bound = solution.fluxes[rxn_id]
    glc_rxn = model.reactions.get_by_id("EX_glc__D_e_REV")
    model.objective = {glc_rxn: -1}
    solution = pfba(model)
    return solution

def print_fluxes(model, solution, bio_rxn_id):
    print("growth: " + str(solution.fluxes[bio_rxn_id]))
    for rxn in model.reactions:
        if (len(rxn.metabolites) == 1 or rxn.id == "test") and solution.fluxes[rxn.id] != 0 and "_exchange" not in rxn.id:
            print(rxn.id + ": " + str(solution.fluxes[rxn.id]))

Condition 1: 100% of carbon going towards biomass.

In [19]:
ec_sol_biomass = simulate_ec_model(ec_model, biomass_rxn_id)
print_fluxes(ec_model, ec_sol_biomass, biomass_rxn_id)

growth: 0.09851700232834214
EX_co2_e: 2.254600561855864
EX_h2o_e: 13.22411702635702
y002111: 0.09851700232834214
y001654_REV: 0.8810678950590556
EX_glc__D_e_REV: 1.0
EX_h_e_REV: 8.861037207464555
EX_inost_e_REV: 0.00035466120838203167
EX_o2_e_REV: 2.130476511070629
EX_pi_e_REV: 0.04687350305479499
EX_so4_e_REV: 0.019210815454026718


Condition 2: 20% going towards the desired metabolite, and the rest to biomass.

In [20]:
ec_model.reactions.get_by_id(biomass_rxn_id).lower_bound = 0.8 * ec_sol_biomass.fluxes[biomass_rxn_id]
ec_sol_test = simulate_ec_model(ec_model, test_rxn_id)
print_fluxes(ec_model, ec_sol_test, biomass_rxn_id)

growth: 0.07881360186267372
EX_co2_e: 2.240301346058031
EX_h2o_e: 11.20362571162318
y002111: 0.07881360186267372
y001654_REV: 0.7048543160472669
EX_glc__D_e_REV: 1.0
EX_h_e_REV: 7.427626345008472
EX_inost_e_REV: 0.0002837289667056254
EX_o2_e_REV: 1.8070237476807125
EX_pi_e_REV: 0.03749880244384594
EX_so4_e_REV: 0.015368652363221377
test: 0.047711193964163304


Now let's build a dataframe with all enzyme usages under both conditions. We are looking for this in the `prot_XXXXXX_exchange` rxns, and they are all in units of `mmol/gDW`.

In [21]:
usage_df = pd.DataFrame()
for reaction in ec_model.reactions:
    if reaction.id.startswith("prot_") and reaction.id.endswith("_exchange"):
        new_line = pd.DataFrame(index = [reaction.id[5:11]],
                                data = {"gene":reaction.gene_reaction_rule,
                                        "biomass_usage":[ec_sol_biomass.fluxes[reaction.id]],
                                        "test_usage":[ec_sol_test.fluxes[reaction.id]]})
        usage_df = usage_df.append(new_line)

print(usage_df)

                 gene  biomass_usage    test_usage
B5FVA8  YALI0_B01771g   0.000000e+00  0.000000e+00
B5FVG3  YALI0_E32164g   1.460908e-04  1.268095e-04
B5FVG6  YALI0_E32736g   0.000000e+00  0.000000e+00
F2Z5Z7  YALI0_B09361g   0.000000e+00  0.000000e+00
F2Z650  YALI0_E01056g   4.732307e-09  0.000000e+00
...               ...            ...           ...
Q99148  YALI0_F21010g   1.913285e-06  1.530628e-06
Q9UVF4  YALI0_B02948g   5.908207e-11  2.713734e-09
Q9Y753  YALI0_A10076g   3.579282e-07  2.863425e-07
W0TYM5  YALI0_E11355g   0.000000e+00  0.000000e+00
W0TYP2  YALI0_B11154g   0.000000e+00  0.000000e+00

[647 rows x 3 columns]


Let's make sure all values are positive:

In [22]:
usage_df = usage_df.sort_values(by=['biomass_usage'])
print(usage_df.head(n=5))
usage_df = usage_df.sort_values(by=['test_usage'])
print(usage_df.head(n=5))

                 gene  biomass_usage  test_usage
B5FVA8  YALI0_B01771g            0.0         0.0
Q6CA24  YALI0_D06501g            0.0         0.0
Q6CA04  YALI0_D06930g            0.0         0.0
Q6CA02  YALI0_D06974g            0.0         0.0
Q6C9Z8  YALI0_D07062g            0.0         0.0
                 gene  biomass_usage  test_usage
B5FVA8  YALI0_B01771g            0.0         0.0
Q6C0Q9  YALI0_F22517g            0.0         0.0
Q6C0Q5  YALI0_F22605g            0.0         0.0
Q6C2G0  YALI0_F08195g            0.0         0.0
Q6C2F9  YALI0_F08217g            0.0         0.0


There are a lot of rows with zero usage under both conditions, so let's filter them out:

In [23]:
usage_df = usage_df.query("biomass_usage > 0 or test_usage > 0")
print(usage_df)

                 gene  biomass_usage  test_usage
Q6CA97  YALI0_D04741g   1.907871e-07    0.000000
Q6C4S9  YALI0_E24013g   1.907871e-07    0.000000
Q6CG81  YALI0_A21417g   6.945781e-08    0.000000
F2Z650  YALI0_E01056g   4.732307e-09    0.000000
Q6C8R8  YALI0_D17534g   8.306858e-11    0.000000
...               ...            ...         ...
Q6C326  YALI0_F03179g   1.460908e-04    0.000127
Q6C2V6  YALI0_F04774g   1.460908e-04    0.000127
Q6CFH9  YALI0_B06831g   1.460908e-04    0.000127
Q6C877  YALI0_D22022g   1.460908e-04    0.000127
Q6C4K5  YALI0_E26004g   2.564441e-02    0.030103

[238 rows x 3 columns]


Now let's compute usage changes. We will look at both absolute changes (the difference between both conditions) and relative changes (the fold change or ratio between them).

In [24]:
usage_df["abs_changes"] = usage_df["test_usage"] - usage_df["biomass_usage"]
usage_df["rel_changes"] = usage_df["test_usage"] / usage_df["biomass_usage"]
print(usage_df)

                 gene  biomass_usage  test_usage   abs_changes  rel_changes
Q6CA97  YALI0_D04741g   1.907871e-07    0.000000 -1.907871e-07     0.000000
Q6C4S9  YALI0_E24013g   1.907871e-07    0.000000 -1.907871e-07     0.000000
Q6CG81  YALI0_A21417g   6.945781e-08    0.000000 -6.945781e-08     0.000000
F2Z650  YALI0_E01056g   4.732307e-09    0.000000 -4.732307e-09     0.000000
Q6C8R8  YALI0_D17534g   8.306858e-11    0.000000 -8.306858e-11     0.000000
...               ...            ...         ...           ...          ...
Q6C326  YALI0_F03179g   1.460908e-04    0.000127 -1.928124e-05     0.868019
Q6C2V6  YALI0_F04774g   1.460908e-04    0.000127 -1.928124e-05     0.868019
Q6CFH9  YALI0_B06831g   1.460908e-04    0.000127 -1.928124e-05     0.868019
Q6C877  YALI0_D22022g   1.460908e-04    0.000127 -1.928124e-05     0.868019
Q6C4K5  YALI0_E26004g   2.564441e-02    0.030103  4.458712e-03     1.173867

[238 rows x 5 columns]


We can now sort and take a look at the top 20 of enzymes that:

* Increased their absolute usage the most:

In [25]:
usage_df = usage_df.sort_values(by=['abs_changes'])
print(usage_df.tail(n=20).iloc[::-1])

                 gene  biomass_usage    test_usage   abs_changes  rel_changes
Q6C4K5  YALI0_E26004g   2.564441e-02  3.010312e-02  4.458712e-03     1.173867
Q6CFH4  YALI0_B06941g   4.716381e-05  5.211255e-05  4.948746e-06     1.104927
Q6C2W9  YALI0_F04444g   4.288555e-06  5.324043e-06  1.035489e-06     1.241454
P34229  YALI0_B15059g   2.369197e-07  1.117232e-06  8.803125e-07     4.715658
Q6CEH4  YALI0_B15598g   3.240830e-06  3.774704e-06  5.338739e-07     1.164734
Q6CG57  YALI0_B00704g   3.615185e-06  3.997173e-06  3.819873e-07     1.105662
Q6C6T4  YALI0_E06479g   1.460020e-06  1.758435e-06  2.984155e-07     1.204391
Q6C9L5  YALI0_D10131g   7.293276e-07  9.233388e-07  1.940111e-07     1.266014
Q6C0A1  YALI0_F26521g   0.000000e+00  1.461931e-07  1.461931e-07          inf
Q6BZZ9  YALI0_F29337g   2.799737e-06  2.882254e-06  8.251687e-08     1.029473
Q6C1T4  YALI0_F13541g   8.094064e-07  8.881473e-07  7.874093e-08     1.097282
P30614  YALI0_F09185g   1.518052e-06  1.565965e-06  4.791287e-08

* Decreased their absolute usage the most:

In [26]:
print(usage_df.head(n=20))

                 gene  biomass_usage  test_usage  abs_changes  rel_changes
Q6C968  YALI0_D13596g       0.000173    0.000119    -0.000055     0.684986
Q6C5V2  YALI0_E14949g       0.000075    0.000048    -0.000027     0.639825
Q6C877  YALI0_D22022g       0.000146    0.000127    -0.000019     0.868019
Q6CFH9  YALI0_B06831g       0.000146    0.000127    -0.000019     0.868019
Q6C2V6  YALI0_F04774g       0.000146    0.000127    -0.000019     0.868019
Q6C326  YALI0_F03179g       0.000146    0.000127    -0.000019     0.868019
Q6C338  YALI0_F02893g       0.000146    0.000127    -0.000019     0.868019
Q6C105  YALI0_F20306g       0.000146    0.000127    -0.000019     0.868019
Q6CFT7  YALI0_B03982g       0.000146    0.000127    -0.000019     0.868019
Q6C9E6  YALI0_D11814g       0.000146    0.000127    -0.000019     0.868019
B5FVG3  YALI0_E32164g       0.000146    0.000127    -0.000019     0.868019
Q6C9B1  YALI0_D12584g       0.000146    0.000127    -0.000019     0.868019
Q6C8S0  YALI0_D17490g    

* Increased their relative usage the most:

In [27]:
usage_df = usage_df.sort_values(by=['rel_changes'])
print(usage_df.tail(n=20).iloc[::-1])

                 gene  biomass_usage    test_usage   abs_changes  rel_changes
Q6C0A1  YALI0_F26521g   0.000000e+00  1.461931e-07  1.461931e-07          inf
Q6C138  YALI0_F19514g   0.000000e+00  3.081273e-08  3.081273e-08          inf
Q9UVF4  YALI0_B02948g   5.908207e-11  2.713734e-09  2.654652e-09    45.931591
Q6CC91  YALI0_C11407g   6.581142e-11  3.103468e-10  2.445354e-10     4.715699
Q6C416  YALI0_E30591g   6.581142e-11  3.103468e-10  2.445354e-10     4.715699
P34229  YALI0_B15059g   2.369197e-07  1.117232e-06  8.803125e-07     4.715658
Q6C2Q5  YALI0_F05962g   1.407104e-08  1.791223e-08  3.841190e-09     1.272986
Q6C9L5  YALI0_D10131g   7.293276e-07  9.233388e-07  1.940111e-07     1.266014
Q6C2W9  YALI0_F04444g   4.288555e-06  5.324043e-06  1.035489e-06     1.241454
Q6C6T4  YALI0_E06479g   1.460020e-06  1.758435e-06  2.984155e-07     1.204391
Q6CC71  YALI0_C11880g   1.094272e-07  1.317932e-07  2.236598e-08     1.204391
Q6C4K5  YALI0_E26004g   2.564441e-02  3.010312e-02  4.458712e-03

* Decreased their relative usage the most:

In [28]:
print(usage_df.head(n=20))

                 gene  biomass_usage    test_usage   abs_changes  rel_changes
F2Z650  YALI0_E01056g   4.732307e-09  0.000000e+00 -4.732307e-09     0.000000
Q6CG81  YALI0_A21417g   6.945781e-08  0.000000e+00 -6.945781e-08     0.000000
Q6C8R8  YALI0_D17534g   8.306858e-11  0.000000e+00 -8.306858e-11     0.000000
Q6CA97  YALI0_D04741g   1.907871e-07  0.000000e+00 -1.907871e-07     0.000000
Q6C4S9  YALI0_E24013g   1.907871e-07  0.000000e+00 -1.907871e-07     0.000000
Q6C0W4  YALI0_F21197g   8.036518e-07  1.226920e-07 -6.809598e-07     0.152668
Q6CF46  YALI0_B10340g   8.041738e-07  1.231097e-07 -6.810642e-07     0.153088
Q6C2I1  YALI0_F07711g   2.894179e-08  1.166083e-08 -1.728097e-08     0.402906
Q6C3M8  YALI0_E33517g   1.734581e-07  7.171424e-08 -1.017439e-07     0.413438
Q6C5L8  YALI0_E16929g   1.734581e-07  7.171424e-08 -1.017439e-07     0.413438
Q6CA33  YALI0_D06303g   3.165788e-06  1.628464e-06 -1.537324e-06     0.514395
Q6C340  YALI0_F02849g   1.065339e-07  5.623237e-08 -5.030157e-08