# Import Packages

In [1]:
from pymodulon.core import IcaData
from pymodulon.plotting import *
from pymodulon.io import load_json_model, save_to_json

In [2]:
pd.set_option('display.max_rows', 300)

# Load IcaData object

In [3]:
ica_data = load_json_model('../data/precise2/precise2.json.gz')

In [4]:
imod_table = ica_data.imodulon_table

In [5]:
imod_table.head()

Unnamed: 0,regulator,pvalue,qvalue,recall,precision,f1score,TP,regulon_size,imodulon_size,n_regs,single_gene,functional_category,system_category,enrichment_category,exp_var,trn_enrich_params,note
e14 Removal,,,,,,,,,13,,,Prophage,ALE Effects,Genomic,0.348726,,
SG_yzfA,,,,,,,,,3,,1.0,Unknown,Single Gene,Technical,0.053012,,
SG_ytiD,,,,,,,,,1,,1.0,Unknown,Single Gene,Technical,0.047134,,
Membrane-1,,,,,,,,,81,,,Membrane Homeostasis,Stress Responses,Biological,0.971942,,Somewhat broad annotation
CysB-1,CysB,0.0,1.98e-07,0.411765,0.225806,0.291667,7.0,17.0,31,1.0,,Amino Acid Metabolism,Metabolism,Regulatory,0.964488,,


In [6]:
reg_counts = []
for iM in imod_table.index:
    reg_count = len([name for name in ica_data.view_imodulon(iM).gene_name if name.lower() in ica_data.trn.regulator.str.lower().to_list()])
    reg_counts.append(reg_count)
    

imod_table_reg = imod_table.copy()
imod_table_reg["Number of regulator genes"] = reg_counts
imod_table_reg = imod_table_reg[imod_table_reg.enrichment_category == 'Regulatory']

imod_table_reg

Unnamed: 0,regulator,pvalue,qvalue,recall,precision,f1score,TP,regulon_size,imodulon_size,n_regs,single_gene,functional_category,system_category,enrichment_category,exp_var,trn_enrich_params,note,Number of regulator genes
CysB-1,CysB,0.0,1.98e-07,0.411765,0.225806,0.291667,7.0,17.0,31,1.0,,Amino Acid Metabolism,Metabolism,Regulatory,0.964488,,,2
CueR/CusR/HprR,CueR/CusR/HprR,6.09e-22,4.64e-16,0.888889,0.727273,0.8,8.0,9.0,11,3.0,,Metal Homeostasis,Two-Component Systems,Regulatory,0.795047,,,2
YieP,YieP,2.7200000000000003e-17,0.0,1.0,0.6,0.75,6.0,6.0,10,1.0,,Membrane Homeostasis,Stress Responses,Regulatory,0.098174,,,2
PhoB-1,PhoB,1.1900000000000001e-17,0.0,0.191489,0.9,0.315789,9.0,47.0,10,1.0,,Phosphate Metabolism,Two-Component Systems,Regulatory,0.103377,,,2
Curli,BasR+Cra+Crp+CsgD+IHF+MlrA+OmpR+ppGpp+RcdA+Cpx...,0.0,,1.0,1.0,1.0,4.0,4.0,4,16.0,,Extracellular Structures,Stress Responses,Regulatory,0.440961,used compute_regulon_enrichment,,1
Fnr-1,Fnr,7.5e-24,2.84e-19,0.207921,0.488372,0.291667,21.0,101.0,43,1.0,,Anaerobiosis,Energy Production,Regulatory,1.230918,,,0
TdcAR/CadC,TdcA/TdcR/CadC,1.93e-25,1.47e-19,1.0,0.75,0.857143,9.0,9.0,12,3.0,,Amino Acid Metabolism,Energy Production,Regulatory,0.510933,max_regs=3,,1
Fimbriae,Lrp+H-NS,1.71e-19,0.0,0.583333,1.0,0.736842,7.0,12.0,7,2.0,,Extracellular Structures,Stress Responses,Regulatory,0.370398,,,0
UTP,PurR,1.4e-08,7e-06,0.15625,0.416667,0.227273,5.0,32.0,12,1.0,,Nucleotide Metabolism,Metabolism,Regulatory,1.878394,"max_regs=1, evidence=[0, 1, 2]",,1
Fnr+NarL,Fnr+NarL,3.24e-18,0.0,0.185185,0.833333,0.30303,10.0,54.0,12,2.0,,Nitrogen Metabolism,Energy Production,Regulatory,0.251103,,,0


In [7]:
tcs_mean = imod_table_reg[imod_table_reg.system_category == 'Two-Component Systems']['Number of regulator genes'].mean()
tcs_median = imod_table_reg[imod_table_reg.system_category == 'Two-Component Systems']['Number of regulator genes'].median()

print('TCS mean number of regulator genes in iM:', tcs_mean)
print('TCS median number of regulator genes in iM:', tcs_median)

TCS mean number of regulator genes in iM: 1.0526315789473684
TCS median number of regulator genes in iM: 1.0


In [8]:
other_mean = imod_table_reg[imod_table_reg.system_category != 'Two-Component Systems']['Number of regulator genes'].mean()
other_median = imod_table_reg[imod_table_reg.system_category != 'Two-Component Systems']['Number of regulator genes'].median()

print('Other regulator mean number of regulator genes in iM:', other_mean)
print('Other regulator median number of regulator genes in iM:', other_median)

Other regulator mean number of regulator genes in iM: 1.092783505154639
Other regulator median number of regulator genes in iM: 1.0


In [9]:
deg_TF = pd.read_csv('data/TF-KO_DEG_regulon.csv', index_col=0)
deg_TF.head()

Unnamed: 0,TF_gene,lowercase,TF,regulon_count,control_name,KO_name,gene,category,deg_count
0,fur,fur,Fur,132,fur_wt_dpd,fur_delfur_dpd,fur,Other Regulators,42.0
1,fur,fur,Fur,132,fur_wt_fe,fur_delfur_fe2,fur,Other Regulators,160.0
2,gadX,gadx,GadX,34,acid_wt_ph5,acid_delgadx_ph5,gadX,Other Regulators,3.0
3,gadW,gadw,GadW,15,acid_wt_ph5,acid_delgadw_ph5,gadW,Other Regulators,28.0
4,gadE,gade,GadE,32,acid_wt_ph5,acid_delgade_ph5,gadE,Other Regulators,10.0


In [10]:
reg_list = imod_table.regulator.dropna().to_list()

new_list =[]
for i in reg_list:
    new_list+=i.split("+")
    new_list+=i.split("/")
    
new_list = list(set(new_list))

In [11]:
regs = [reg for reg in new_list if reg.lower() in deg_TF.lowercase.to_list()]

In [12]:
reg_counts = []
for reg in regs:
#     reg_count = len([name for name in ica_data.view_imodulon(iM).gene_name if name.lower() in ica_data.trn.regulator.str.lower().to_list()])
    regulon_genes = list(set(ica_data.trn[ica_data.trn.regulator == reg].gene_name.str.lower().to_list()))
    reg_count = len([name for name in regulon_genes if name.lower() in ica_data.trn.regulator.str.lower().to_list()])
    reg_counts.append(reg_count)
    

In [13]:
df = pd.DataFrame([regs, reg_counts]).T.merge(deg_TF[['TF','category']], how= 'left', left_on=0, right_on='TF').drop_duplicates()
df

Unnamed: 0,0,1,TF,category
0,GadW,3,GadW,Other Regulators
1,Crp,61,Crp,Global Regulators
3,Fur,9,Fur,Other Regulators
5,CpxR,7,CpxR,TCS Response Regulators
7,BaeR,1,BaeR,TCS Response Regulators
9,BtsR,2,BtsR,TCS Response Regulators
12,Nac,43,Nac,Other Regulators
15,GadX,4,GadX,Other Regulators
16,GadE,4,GadE,Other Regulators
17,KdpE,0,KdpE,TCS Response Regulators


In [14]:
tcs_mean = df[df.category == 'TCS Response Regulators'][1].mean()
tcs_median = df[df.category == 'TCS Response Regulators'][1].median()

print('TCS mean number of regulator genes in regulon:', tcs_mean)
print('TCS median number of regulator genes in regulon:', tcs_median)

TCS mean number of regulator genes in regulon: 2.75
TCS median number of regulator genes in regulon: 1.0


In [15]:
other_mean = df[df.category == 'Other Regulators'][1].mean()
other_median = df[df.category == 'Other Regulators'][1].median()

print('Other regulator mean number of regulator genes in regulon:', other_mean)
print('Other regulator median number of regulator genes in regulon:', other_median)

Other regulator mean number of regulator genes in regulon: 11.0
Other regulator median number of regulator genes in regulon: 4.0
