# Notebook to inspect the reprocessed 4/30/21

### Imports

In [1]:
import cobra
import pandas as pd

### Load model

In [2]:
model = cobra.io.read_sbml_model("../GSMs/Ropacus_annotated_curated_with_phenol_custom_biomass.xml")
model_genes = [g.id for g in model.genes]
print(f'There are {len(model_genes)} genes in the model (WP)')
model_genes[:5]

There are 1576 genes in the model (WP)


['WP_187300246_1',
 'WP_025432775_1',
 'WP_005248578_1',
 'WP_025433613_1',
 'WP_005249637_1']

### Load Yoneda reprocessed transcriptomic data

In [44]:
df = pd.read_table('../winston_data/yoneda/yoneda_reprocess_CPM_melted.txt', delim_whitespace=True)
df.head()

Unnamed: 0,Strain,variable,Condition,Replicate,value,Units
1,3A,WP_000104864.1,1g/L_glucose,1,0.0,CPM
2,3A,WP_000104864.1,1g/L_glucose,2,0.0,CPM
3,3A,WP_000104864.1,1g/L_glucose,3,0.0,CPM
4,3B,WP_000104864.1,0.75g/L_phenol,1,0.0,CPM
5,3B,WP_000104864.1,0.75g/L_phenol,2,0.0,CPM


In [51]:
transcriptomic_genes = set([gene.replace('.', '_') for gene in df.variable])
print(f'There are {len(transcriptomic_genes)} genes in the transcriptomic data set')
list(transcriptomic_genes)[:5]

There are 8007 genes in the transcriptomic data set


['WP_025432362_1',
 'WP_025432132_1',
 'WP_005246623_1',
 'WP_005250821_1',
 'WP_037182412_1']

### Find number of  genes in model AND transcriptomic data set

In [52]:
overlapping_genes = [g for g in model_genes if g in transcriptomic_genes]
print(f'There are {len(overlapping_genes)} genes in both data sets')

There are 1575 genes in both data sets


### Check the one gene that is not in both data sets

In [41]:
for g in model_genes:
    if g not in transcriptomic_genes:
        print(g)

spontaneous


### Load Henson Data

In [43]:
df = pd.read_table('../winston_data/henson/henson_reprocess_CPM_melted.txt', delim_whitespace=True)
df.head()

Unnamed: 0,Strain,Units,Media,Time.point,variable,Replicate,value
1,WT,CPM,Mixture,t=1,WP_000104864.1,1,0.0
2,WT,CPM,Mixture,t=1,WP_000104864.1,2,0.0
3,WT,CPM,Mixture,t=1,WP_000104864.1,3,0.0
4,WT,CPM,Mixture,t=2,WP_000104864.1,1,0.0
5,WT,CPM,Mixture,t=2,WP_000104864.1,2,0.0


### Get transcriptomics genes

In [54]:
transcriptomic_genes = set([gene.replace('.', '_') for gene in df.variable])
print(f'There are {len(transcriptomic_genes)} genes in the transcriptomic data set')
list(transcriptomic_genes)[:5]

There are 8007 genes in the transcriptomic data set


['WP_025432362_1',
 'WP_025432132_1',
 'WP_005246623_1',
 'WP_005250821_1',
 'WP_037182412_1']

### Find overlapping genes

In [55]:
overlapping_genes = [g for g in model_genes if g in transcriptomic_genes]
print(f'There are {len(overlapping_genes)} genes in both data sets')

There are 1575 genes in both data sets
