In [1]:
import pandas as pd
import re
import os
import numpy as np

In [2]:
from models import rt

exchanges = set(i.id for i in rt.exchanges)

In [3]:
biolog_map = pd.read_csv('plate_to_bigg.csv', index_col=False)
biolog_map

Unnamed: 0,plate,ex_rxn,compound_name,exchange
0,PM1-A10,EX_tre(e),D-Trehalose,EX_tre_e
1,PM1-A11,EX_man(e),D-Mannose,EX_man_e
2,PM1-A12,EX_galt(e),Dulcitol,EX_galt_e
3,PM1-A2,EX_arab_L(e),L-Arabinose,EX_arab__L_e
4,PM1-A3,EX_acgam(e),N-Acetyl-DGlucosamine,EX_acgam_e
...,...,...,...,...
153,PM4-D6,EX_pser_L(e),O-Phospho-L-Serine,EX_pser__L_e
154,PM4-D7,EX_thrp(e),O-Phospho-L-Threonine,EX_thrp_e
155,PM4-D9,EX_3ump(e),Uridine-3’-monophosphate,EX_3ump_e
156,PM4-E1,EX_tyrp(e),O-Phospho-D-Tyrosine,EX_tyrp_e


In [4]:
fname = 'data_biolog_rhodo/Table_2_Multi-Omics Driven Metabolic Network Reconstruction and Analysis of Lignocellulosic Carbon Utilization in Rhodosporidium toruloides.XLSX'

bl_data = pd.read_excel(fname, sheet_name='Biolog')
bl_data['growth'] = bl_data['Average > 0.1 and all replicates greater than negative control']

In [5]:
bl_data['plate'] = bl_data.PlateType + '-' + bl_data.Well
bl_data.growth = bl_data.growth.astype(bool)
bl_data.head()

Unnamed: 0,PlateType,Experiment,Well,Compound,Average ABS (590nm - 750nm),All replicates > negtive control,Average > 0.1,Average > 0.1 and all replicates greater than negative control,pVal (T-test),growth,plate
0,PM1,Carbon,A1,Negative Control,0.0864,False,False,False,0.5,False,PM1-A1
1,PM1,Carbon,A2,L-Arabinose,0.344767,True,True,True,8.2e-05,True,PM1-A2
2,PM1,Carbon,A3,N-Acetyl-DGlucosamine,0.068867,False,False,False,0.03061,False,PM1-A3
3,PM1,Carbon,A4,D-Saccharic Acid,0.0712,False,False,False,0.015131,False,PM1-A4
4,PM1,Carbon,A5,Succinic Acid,0.1112,True,True,True,0.081596,True,PM1-A5


In [6]:
carbon_plate = bl_data.loc[bl_data.PlateType.isin(['PM1', 'PM2'])].copy()
nitrogren_plate = bl_data.loc[bl_data.PlateType.isin(['PM3B'])].copy()
nitrogren_plate.plate = nitrogren_plate.plate.str.replace('B', '')
                           

In [7]:
nitrogren_plate[['plate', 'Compound', 'growth']]

Unnamed: 0,plate,Compound,growth
192,PM3-A1,Negative Control,False
193,PM3-A2,Ammonia,True
194,PM3-A3,Nitrite,True
195,PM3-A4,Nitrate,True
196,PM3-A5,Urea,True
...,...,...,...
283,PM3-H8,Gly-Asn,True
284,PM3-H9,Gly-Gln,True
285,PM3-H10,Gly-Glu,True
286,PM3-H11,Gly-Met,True


In [8]:
def merge_with_plate(plate):
    merged = pd.concat(
        [plate.set_index('plate'), biolog_map.set_index('plate')],
        axis=1,
        ignore_index=False
    )
    merged.dropna(subset=['growth', 'ex_rxn'], inplace=True)

    merged = merged.reset_index()
    merged#[['exchange', 'growth']]
    merged = subset_to_in_model(merged)[['exchange', 'growth']]
    merged['uptake'] = 10
    return merged

def subset_to_in_model(plate, model_ex=exchanges):
    return plate.loc[plate.exchange.isin(model_ex)]

carbon = merge_with_plate(carbon_plate)
nit = merge_with_plate(nitrogren_plate)
carbon.shape

(70, 3)

In [9]:
nit.shape

(37, 3)

In [10]:
carbon.to_csv('pm1_pm2_biolog.csv', index=False)
nit.to_csv('pm3b_biolog.csv', index=False)

## Gene ess. parsing

In [10]:
genes = set(i.id for i in rt.genes)
genes

{'RT_13286',
 'RT_13167',
 'RT_11266',
 'RT_9629',
 'RT_10811',
 'RT_12639',
 'RT_8376',
 'RT_14229',
 'RT_12966',
 'RT_13824',
 'RT_9210',
 'RT_16280',
 'RT_10212',
 'RT_16046',
 'RT_11876',
 'RT_15504',
 'RT_15994',
 'RT_10848',
 'RT_10428',
 'RT_12545',
 'RT_8972',
 'RT_15218',
 'RT_14541',
 'RT_10583',
 'RT_14880',
 'RT_10452',
 'RT_9838',
 'RT_14126',
 'RT_9938',
 'RT_16799',
 'RT_9176',
 'RT_12269',
 'RT_NAD3',
 'RT_8845',
 'RT_16460',
 'RT_11178',
 'RT_15156',
 'RT_14023',
 'RT_11682',
 'RT_9244',
 'RT_15545',
 'RT_15874',
 'RT_11290',
 'RT_14849',
 'RT_11343',
 'RT_16228',
 'RT_15248',
 'RT_15358',
 'RT_15240',
 'RT_12704',
 'RT_NAD2',
 'RT_12421',
 'RT_13161',
 'RT_11114',
 'RT_15179',
 'RT_14418',
 'RT_11160',
 'RT_9664',
 'RT_9857',
 'RT_9728',
 'RT_11328',
 'RT_11469',
 'RT_10374',
 'RT_11257',
 'RT_16208',
 'RT_15336',
 'RT_9037',
 'RT_16284',
 'RT_9856',
 'RT_12221',
 'RT_11075',
 'RT_10475',
 'RT_13562',
 'RT_10961',
 'RT_10206',
 'RT_10677',
 'RT_14258',
 'RT_11618',
 '

In [20]:
ge_data = pd.read_excel(fname, sheet_name='RB-TDNA Seq')
ge_data.RTO4_ID = ge_data.RTO4_ID.astype(str)
ge_data['genename'] = 'RT_'+ge_data.RTO4_ID.astype(str)
ge_data[['genename', 'M9_Glucose']]

Unnamed: 0,genename,M9_Glucose
0,RT_10000,
1,RT_10001,0.470687
2,RT_10002,0.283788
3,RT_10003,-0.030815
4,RT_10004,-0.071440
...,...,...
8339,RT_9995,-0.808815
8340,RT_9996,
8341,RT_9997,0.128856
8342,RT_9998,0.360599


In [23]:
genes_in_data = set(ge_data.genename.values)
genes_in_data

{'RT_16373',
 'RT_12873',
 'RT_15077',
 'RT_12208',
 'RT_11637',
 'RT_13222',
 'RT_9707',
 'RT_15983',
 'RT_16413',
 'RT_14229',
 'RT_12966',
 'RT_11238',
 'RT_10328',
 'RT_12250',
 'RT_14126',
 'RT_8685',
 'RT_9788',
 'RT_11765',
 'RT_11671',
 'RT_14227',
 'RT_15732',
 'RT_14160',
 'RT_13977',
 'RT_10433',
 'RT_16003',
 'RT_11385',
 'RT_13161',
 'RT_11114',
 'RT_14418',
 'RT_15412',
 'RT_12809',
 'RT_16063',
 'RT_13916',
 'RT_9037',
 'RT_16284',
 'RT_9039',
 'RT_15062',
 'RT_12028',
 'RT_11015',
 'RT_12903',
 'RT_10356',
 'RT_14082',
 'RT_12236',
 'RT_13404',
 'RT_10268',
 'RT_11837',
 'RT_13919',
 'RT_8639',
 'RT_8375',
 'RT_13359',
 'RT_10999',
 'RT_8994',
 'RT_8812',
 'RT_12523',
 'RT_15865',
 'RT_9989',
 'RT_8785',
 'RT_12600',
 'RT_13797',
 'RT_10874',
 'RT_10675',
 'RT_14373',
 'RT_15708',
 'RT_8729',
 'RT_12008',
 'RT_16698',
 'RT_14920',
 'RT_8725',
 'RT_14994',
 'RT_15553',
 'RT_12609',
 'RT_16595',
 'RT_8788',
 'RT_9847',
 'RT_9503',
 'RT_12176',
 'RT_16248',
 'RT_16833',
 '

In [31]:
print(f'genes in model {len(genes)}')
print(f'genes in data {len(genes_in_data)}')
print(f'model genes not in data {len(genes.difference(genes_in_data))}')
print(f'data genes not in model {len(genes_in_data.difference(genes))}')

genes in model 1142
genes in data 8344
model genes not in data 31
data genes not in model 7233


In [32]:
ge_in_our_data = ge_data.loc[ge_data.genename.isin(genes)].copy()
ge_in_our_data

Unnamed: 0,RTO4_ID,M9_Glucose,YNB Glucose,YNB Glucose plus Arginine,YNB Glucose plus Methionine,YNB Glucose plus Dropout Complete,Fitness During Lipid Mobilization,YNB Oleic Acid,YNB Cellobiose,YNB_CSM_KPO4 Glucose,...,YNB_PO4_L-arabitol,YNB_PO4_xylitol,YNB_PO4_D-ribulose,YNB_PO4_D-xylulose,YNB_PO4_CSM_Galactose,YNB_PO4_CSM_Lactate,YNB_PO4_CSM_Valine,YNB_PO4_CSM_Leucine,YNB_PO4_CSM_Phenylalanine,genename
0,10000,,,,,,,,,,...,,,,,,,,,,RT_10000
7,10007,,,,,,,,,,...,,,,,,,,,,RT_10007
10,10010,-0.130360,-0.558565,-0.332740,-0.260252,-0.383313,-0.030872,-0.611898,-0.097438,-0.035705,...,-0.732452,-0.836823,-0.490688,-0.534801,-0.035402,-0.427705,-0.143231,-0.240726,-0.376989,RT_10010
12,10012,0.120928,-0.056685,-0.143451,-0.205516,-0.240177,-0.282986,-0.039371,0.021825,-0.185444,...,-0.185901,-0.347117,-0.043478,0.049640,-0.330369,-0.571357,-0.390520,-4.921371,-0.586259,RT_10012
16,10017,-0.117902,0.001326,0.216430,0.169223,-0.072431,0.382748,0.455964,0.161420,0.012681,...,0.073421,0.076093,-0.266397,0.038548,0.024525,0.112220,-0.159979,-0.229817,-0.086100,RT_10017
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8306,9962,-0.102349,0.101140,0.044222,0.206130,0.142080,0.189212,-0.012657,-0.097314,0.004740,...,0.039896,0.240334,0.321752,0.089398,-0.254636,-0.723065,-0.622660,-0.047665,0.811095,RT_9962
8323,9979,,,,,,,,,,...,,,,,,,,,,RT_9979
8325,9981,-0.182969,-1.210120,-1.083946,-0.985173,-0.844407,-0.952909,-0.652234,0.200798,0.016013,...,-0.450650,-0.102575,-0.311876,-0.298921,-0.471137,-0.915556,-0.221405,-0.589374,-0.123179,RT_9981
8334,9990,-0.272603,-0.184758,0.337242,0.525320,-0.020653,0.544806,-0.110552,-0.359997,0.187456,...,-4.105096,-4.463727,-0.667114,-4.072184,-0.077310,0.459774,0.053585,0.211604,-0.229242,RT_9990


In [33]:
ge_in_our_data.dropna(subset=['M9_Glucose'], inplace=True)
ge_in_our_data

Unnamed: 0,RTO4_ID,M9_Glucose,YNB Glucose,YNB Glucose plus Arginine,YNB Glucose plus Methionine,YNB Glucose plus Dropout Complete,Fitness During Lipid Mobilization,YNB Oleic Acid,YNB Cellobiose,YNB_CSM_KPO4 Glucose,...,YNB_PO4_L-arabitol,YNB_PO4_xylitol,YNB_PO4_D-ribulose,YNB_PO4_D-xylulose,YNB_PO4_CSM_Galactose,YNB_PO4_CSM_Lactate,YNB_PO4_CSM_Valine,YNB_PO4_CSM_Leucine,YNB_PO4_CSM_Phenylalanine,genename
10,10010,-0.130360,-0.558565,-0.332740,-0.260252,-0.383313,-0.030872,-0.611898,-0.097438,-0.035705,...,-0.732452,-0.836823,-0.490688,-0.534801,-0.035402,-0.427705,-0.143231,-0.240726,-0.376989,RT_10010
12,10012,0.120928,-0.056685,-0.143451,-0.205516,-0.240177,-0.282986,-0.039371,0.021825,-0.185444,...,-0.185901,-0.347117,-0.043478,0.049640,-0.330369,-0.571357,-0.390520,-4.921371,-0.586259,RT_10012
16,10017,-0.117902,0.001326,0.216430,0.169223,-0.072431,0.382748,0.455964,0.161420,0.012681,...,0.073421,0.076093,-0.266397,0.038548,0.024525,0.112220,-0.159979,-0.229817,-0.086100,RT_10017
28,10029,-0.020871,0.052095,0.224890,0.241546,-0.067308,0.622053,-0.412340,0.096512,-0.167850,...,-0.759319,-0.288770,-0.648312,0.551737,-0.069712,-0.414340,0.039014,-0.424238,-1.523899,RT_10029
37,10038,-0.365987,-0.198609,-0.156681,0.504791,-0.298004,-0.330527,-0.754754,-0.425817,-0.064758,...,0.157148,0.081089,-0.082244,-0.037806,-0.397060,-0.226573,-0.212943,-0.605702,-0.036510,RT_10038
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8303,9959,-0.496589,-0.049252,-0.280461,-0.192801,-0.186293,0.094831,0.158215,-0.052001,-0.190215,...,-0.449143,-0.391230,-0.526536,-0.692617,-0.658693,-0.382288,-0.727372,-0.582871,0.019887,RT_9959
8306,9962,-0.102349,0.101140,0.044222,0.206130,0.142080,0.189212,-0.012657,-0.097314,0.004740,...,0.039896,0.240334,0.321752,0.089398,-0.254636,-0.723065,-0.622660,-0.047665,0.811095,RT_9962
8325,9981,-0.182969,-1.210120,-1.083946,-0.985173,-0.844407,-0.952909,-0.652234,0.200798,0.016013,...,-0.450650,-0.102575,-0.311876,-0.298921,-0.471137,-0.915556,-0.221405,-0.589374,-0.123179,RT_9981
8334,9990,-0.272603,-0.184758,0.337242,0.525320,-0.020653,0.544806,-0.110552,-0.359997,0.187456,...,-4.105096,-4.463727,-0.667114,-4.072184,-0.077310,0.459774,0.053585,0.211604,-0.229242,RT_9990


In [35]:
ge_in_our_data.loc[ge_in_our_data.M9_Glucose>.5, 'essential'] = True
ge_in_our_data.loc[ge_in_our_data.M9_Glucose<.5, 'essential'] = False
ge_in_our_data['comment'] = ''

In [36]:
ge_for_export = ge_in_our_data[['genename', 'essential','comment']].copy()
ge_for_export

Unnamed: 0,genename,essential,comment
10,RT_10010,False,
12,RT_10012,False,
16,RT_10017,False,
28,RT_10029,False,
37,RT_10038,False,
...,...,...,...
8303,RT_9959,False,
8306,RT_9962,False,
8325,RT_9981,False,
8334,RT_9990,False,


In [37]:
import csv
ge_for_export.to_csv('ge_m9_glucose.csv', index=False, quoting=csv.QUOTE_ALL)