In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
os.chdir(os.path.expanduser('~/wcEcoli/out/saved_PDRs/'))
import plotly.graph_objects as go
import glob
import plotly.express as px


# Compare CLClimNE differences to CLNE

plot the rates that were assigned to Clim rates in the CLClimNE model and compare them to the rate those proteins were assigned to in the CLNE model

In [121]:
# import the Clim rates from the CLClimNE model
current_sequence = "CLClimNE"
CLIM_pth = os.path.expanduser('~/wcEcoli/out/saved_PDRs/' + current_sequence + '/')
df_Clim = pd.read_csv(CLIM_pth + 'Clim_rate_constants.csv')
new_name = current_sequence + ' Clim rate constant (s^-1)'
df_Clim.rename(columns={'Rate Constant': new_name}, inplace=True)

# import the rates from the other models, CLMLNE files:
CLMLNE_full = pd.read_excel('~/wcEcoli/models/ecoli/analysis/mia_local_notebooks/C_limited_PDR_analyses/CLMLNE_files/CLMLNE_full.xlsx')
CLMLNE_ML = pd.read_excel('~/wcEcoli/models/ecoli/analysis/mia_local_notebooks/C_limited_PDR_analyses/CLMLNE_files/CLMLNE_ML_rates_only.xlsx')
CLMLNE_CL = pd.read_excel('~/wcEcoli/models/ecoli/analysis/mia_local_notebooks/C_limited_PDR_analyses/CLMLNE_files/CLMLNE_CL_rates_only.xlsx')
CLMLNE_NE = pd.read_excel('~/wcEcoli/models/ecoli/analysis/mia_local_notebooks/C_limited_PDR_analyses/CLMLNE_files/CLMLNE_NE_rates_only.xlsx')

# CLNE files: 
df_CLNE = pd.read_excel('~/wcEcoli/models/ecoli/analysis/mia_local_notebooks/C_limited_PDR_analyses/CLNE_files/CLNE_full.xlsx')
CLNE_CL = pd.read_excel('~/wcEcoli/models/ecoli/analysis/mia_local_notebooks/C_limited_PDR_analyses/CLNE_files/CLNE_CL_rates_only.xlsx')
CLNE_NE = pd.read_excel('~/wcEcoli/models/ecoli/analysis/mia_local_notebooks/C_limited_PDR_analyses/CLNE_files/CLNE_NE_rates_only.xlsx')

In [30]:
# first, merge the two dataframes on the protein names
df = pd.merge(df_Clim, df_CLNE, on='Protein ID', how='inner')

# convert the rates to half life units like this: log(2) / rate / 60 seconds/mins = rate in seconds 
df['Clim half life (min)'] = df[new_name] * 60 * (1/np.log(2)) 
df['Clim half life (min)'] = 1/ df['Clim half life (min)'] # units of mins 

df['NE half life (min)'] = df['Macklin rate constant (s^-1)'] * 60 * (1/np.log(2))
df['NE half life (min)'] = 1/ df['NE half life (min)'] # units of mins
df

Unnamed: 0,Protein ID,CLClimNE Clim rate constant (s^-1),Macklin rate constant (s^-1),Clim half life (min),NE half life (min)
0,1-PFK-MONOMER,0.000016,0.000019,720.0,598.572695
1,2-ISOPROPYLMALATESYN-MONOMER,0.000052,0.000019,222.8,598.572695
2,2-OCTAPRENYL-METHOXY-BENZOQ-METH-MONOMER,0.000041,0.000019,280.5,598.572695
3,2-OCTAPRENYLPHENOL-HYDROX-MONOMER,0.000016,0.000019,720.0,598.572695
4,3-CH3-2-OXOBUTANOATE-OH-CH3-XFER-MONOMER,0.000016,0.000019,720.0,598.572695
...,...,...,...,...,...
1907,YRBF-MONOMER,0.000016,0.000019,720.0,598.572695
1908,YTFQ-MONOMER,0.000016,0.000019,718.3,598.572695
1909,YTFR-MONOMER,0.000018,0.000019,640.3,598.572695
1910,ZNUA-MONOMER,0.000035,0.000019,332.7,598.572695


In [31]:
# make a new data frame with all proteins that have a half life of greater than 10 mins in the NE half life (min) column 
df_10hrs = df[df['NE half life (min)'] > 10]
print(np.shape(df_10hrs))

# make a plotly scatter plot of the protein ID on the x axis and the half life on the y axis. Arrange the proteins by their half life in the Clim model. Do not list the protein names on the x axis
df_10hrs = df_10hrs.sort_values(by='Clim half life (min)')
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_10hrs['Protein ID'], y=df_10hrs['Clim half life (min)'], mode='markers', name='Clim half life (min)', yaxis="y1"))

# add a second y axis
fig.add_trace(go.Scatter(x=df_10hrs['Protein ID'], y=(df_10hrs['Clim half life (min)']/60), mode='markers', name='Clim half life (min)', yaxis="y2", marker=dict(color="lightseagreen")))
fig.update_layout(yaxis2=dict(title='Clim half life (hrs)', overlaying='y', side='right'))    

# Plot Specs
fig.update_traces(marker_size=3)
fig.update_xaxes(visible=False)
fig.update_layout(autosize=False, width=800, height=700, showlegend=False)
fig.update_layout(title='The C-limited (Clim) half life for the 1906 proteins in the ' + current_sequence + ' PDR combo<br> that were assinged to a half life of 10 hours in the 2020 model (CLNE)', xaxis_title='Protein ID', yaxis_title='Clim Half Life (min)')

# todo: note that the first limit is probably the doubling time of 6 hours (3600 mins) 

(1906, 5)


In [13]:
# make a plot of the rates that oritaly had a half life of 2 mins in the CLNE model
df_2mins = df[df['NE half life (min)'] < 10]
df_2mins = df_2mins.sort_values(by='Clim half life (min)')

# make a plotly scatter plot of the protein ID on the x axis and the half life on the y axis. 
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_2mins['Protein ID'], y=df_2mins['Clim half life (min)'], mode='markers', name='Clim half life (min)', yaxis="y1"))

# add a second y axis
fig.add_trace(go.Scatter(x=df_2mins['Protein ID'], y=(df_2mins['Clim half life (min)']/60), mode='markers', name='Clim half life (min)', yaxis="y2", marker=dict(color="lightseagreen")))
fig.update_layout(yaxis2=dict(title='Clim half life (hrs)', overlaying='y', side='right'))    

# plot specs
fig.update_traces(marker_size=10)
fig.update_layout(autosize=False, width=600, height=600, showlegend=False)
fig.update_layout(title='The C-limited (Clim) half life for the 6 proteins that were assinged <br> to a half life of 2 mins in the 2020 whole cell model (CLNE)', xaxis_title='Protein ID', yaxis_title='NH3 Half Life (min)')

## Compare CLMLNE differences to the C-lim rates

Plot of the CLMLNE half lives (representing the current model's half life values) against those assgined to Clim values in the CLClimNE model.

In [14]:
# create a dataframe with the CLMLNE rates tacked onto the end of the dataframe
big_df = pd.merge(df, CLMLNE_full, on='Protein ID', how='inner')
big_df = big_df.rename(columns={'Rate Constant': 'CLMLNE rate constant (s^-1)'})
big_df = big_df[['Protein ID','Clim half life (min)', 'CLMLNE rate constant (s^-1)']]

# convert the rates to half life units
big_df['CLMLNE half life (min)'] = big_df['CLMLNE rate constant (s^-1)'] * 60 * (1/np.log(2))
big_df['CLMLNE half life (min)'] = 1 / big_df['CLMLNE half life (min)'] # units of mins

# plot CLMLNE half life vs Clim half life
fig = go.Figure()
fig.add_trace(go.Scatter(x=big_df['Clim half life (min)'], y=big_df['CLMLNE half life (min)'], mode='markers', name='CLMLNE half life (min)', hovertext=big_df['Protein ID'], marker_size=3))

# Plot Specs
fig.update_layout(title='The C-limited (Clim) half life plotted against its half life used in<br> the current model (CLMLNE)', yaxis_title='CLMLNE Half Life (min)', xaxis_title='Clim Half Life (min)')

In [15]:
# same plot as above but in hours! 
fig = go.Figure()
fig.add_trace(go.Scatter(x=(big_df['Clim half life (min)'] / 60), y=(big_df['CLMLNE half life (min)']/60), mode='markers', name='CLMLNE half life (min)', hovertext=big_df['Protein ID'], marker_size=3))

# Plot Specs
fig.update_layout(title='The C-limited (Clim) half life plotted against its half life used in<br> the current model (CLMLNE)', yaxis_title='CLMLNE Half Life (hr)', xaxis_title='Clim Half Life (hr)')

same graphs as above but with the sources differentiated by color

In [16]:
# split the colors up by the source of the half life in the CLMLNE model: 

# ML: machine learning half lives: 
big_df_ML = pd.merge(CLMLNE_ML, big_df, on='Protein ID', how='inner')
big_df_ML = big_df_ML.rename(columns={'Rate Constant': 'CLMLNE_ML rate constant (s^-1)'})
# convert to the right units
big_df_ML['CLMLNE_ML half life (min)'] = big_df_ML['CLMLNE_ML rate constant (s^-1)'] * 60 * (1/np.log(2))
big_df_ML['CLMLNE_ML half life (min)'] = 1/ big_df_ML['CLMLNE_ML half life (min)'] # units of mins
# drop the protiens in the ML_df from the big_df
big_df1 = big_df[~big_df['Protein ID'].isin(big_df_ML['Protein ID'])]

# CL: experimentally measured half lives (CL): # note there will not be any here, because the Clim rates are the backbone of the big_df, and since CL proteins are normally assigned to their CL rate no matter what, they will not be found in the Clim rates. # todo: try assigning CL rates to Clim rates! 
CL_values = ["CARBPSYN-SMALL", "CDPDIGLYSYN-MONOMER", "DCUR-MONOMER", "DETHIOBIOTIN-SYN-MONOMER", "EG10743-MONOMER", "EG10863-MONOMER", "GLUTCYSLIG-MONOMER"]
CL_df = big_df1[big_df1['Protein ID'].isin(CL_values)]

# NE: N-end rule half lives (these are all that should be left in the big_df):
big_df_10hrs = big_df1[big_df1['CLMLNE half life (min)'] > 598]  # 9.97621 hrs (the 10 hour conversion is approximate)
big_df_2mins = big_df1[big_df1['CLMLNE half life (min)'] < 3] # 2 mins ish (the actual # is 1.999999)

# plot CLMLNE half life vs Clim half life
fig = go.Figure()
fig.add_trace(go.Scatter(x=big_df_ML['Clim half life (min)'], y=big_df_ML['CLMLNE_ML half life (min)'], mode='markers', name='ML', hovertext=big_df_ML['Protein ID'], marker_size=3), )
fig.add_trace(go.Scatter(x=big_df_2mins['Clim half life (min)'], y=big_df_2mins['CLMLNE half life (min)'], mode='markers', name='NE (2 mins)', hovertext=big_df_2mins['Protein ID'], marker_size=5, ))
fig.add_trace(go.Scatter(x=big_df_10hrs['Clim half life (min)'], y=big_df_10hrs['CLMLNE half life (min)'], mode='markers', name='NE (10 hrs)', hovertext=big_df_10hrs['Protein ID'], marker_size=3))

# Plot Specs
fig.update_layout(title='The C-limited (Clim) half life plotted against its half life used in<br> the current model (CLMLNE)', yaxis_title='CLMLNE Half Life (min)', xaxis_title='Clim Half Life (min)')
fig.update_layout(legend_title_text='PDR Source')


In [17]:
# plot CLMLNE half life vs Clim half life in hours
fig = go.Figure()
fig.add_trace(go.Scatter(x=(big_df_ML['Clim half life (min)']/60), y=(big_df_ML['CLMLNE_ML half life (min)']/60), mode='markers', name='ML', hovertext=big_df_ML['Protein ID'], marker_size=3))
fig.add_trace(go.Scatter(x=(big_df_2mins['Clim half life (min)']/60), y=(big_df_2mins['CLMLNE half life (min)']/60), mode='markers', name='NE (2 mins)', hovertext=big_df_2mins['Protein ID'], marker_size=5))
fig.add_trace(go.Scatter(x=(big_df_10hrs['Clim half life (min)']/60), y=(big_df_10hrs['CLMLNE half life (min)']/60), mode='markers', name='NE (10 hrs)', hovertext=big_df_10hrs['Protein ID'], marker_size=3))

# Plot Specs
fig.update_layout(title='The C-limited (Clim) half life plotted against its half life used in<br> the current model (CLMLNE)', yaxis_title='CLMLNE Half Life (hrs)', xaxis_title='Clim Half Life (hrs)')
fig.update_layout(legend_title_text='PDR Source')

# Plot against full CLNE and CLMLNE models

In [122]:
# convert each data source in the saved PDR data file folders 
def convert_data(df, PDR_source):
    name_s = PDR_source + ' rate constant (s^1)'
    name_m = PDR_source + ' half life (min)'
    df.rename(columns={'Rate Constant': name_s}, inplace=True)
    df[name_m] = df[name_s] * 60 * (1/np.log(2))
    df[name_m] = 1 / df[name_m] # units of mins
    return df

CLMLNE_ML_df = convert_data(CLMLNE_ML, "CLMLNE ML")
CLMLNE_CL_df = convert_data(CLMLNE_CL, "CLMLNE CL")
CLMLNE_NE_df = convert_data(CLMLNE_NE, "CLMLNE NE")
CLNE_CL_df = convert_data(CLNE_CL, "CLNE CL")
CLNE_NE_df = convert_data(CLNE_NE, "CLNE NE")

In [88]:
# construct a dataframe of all the rates in one file: 

# Read all CSV files
csv_files = glob.glob(CLIM_pth + "*.csv")
dataframes = [pd.read_csv(file) for file in csv_files]  # Read each CSV file into a DataFrame
# get the name of each source file
names = [(os.path.basename(file))[:-19] for file in csv_files]
# add the source as a column for each dataframe: 
for i in range(len(dataframes)): 
    df = dataframes[i]
    df["PDR Source"] = names[i]
   
# concatenate all the CSV files:  
concatenated_df = pd.concat(dataframes, ignore_index=True)  # Concatenate  row-wise

# convert the rates to half life units like this: log(2) / rate / 60 seconds/mins = rate in seconds
c_new_name = current_sequence + " rate constant (s^-1)"
c_new_name1 = current_sequence + ' half life (min)'
concatenated_df.rename(columns={'Rate Constant': c_new_name}, inplace=True)
concatenated_df[c_new_name1] = concatenated_df[c_new_name] * 60 * (1/np.log(2)) 
concatenated_df[c_new_name1] = 1 / concatenated_df[c_new_name1] # units of mins 

concatenated_df

Unnamed: 0,Protein ID,CLClimNE rate constant (s^-1),PDR Source,CLClimNE half life (min)
0,CARBPSYN-SMALL,0.000014,CL,846.0
1,CDPDIGLYSYN-MONOMER,0.000019,CL,600.0
2,DCUR-MONOMER,0.000107,CL,108.0
3,DETHIOBIOTIN-SYN-MONOMER,0.000036,CL,324.0
4,EG10743-MONOMER,0.000012,CL,972.0
...,...,...,...,...
4304,YRBF-MONOMER,0.000016,Clim,720.0
4305,YTFQ-MONOMER,0.000016,Clim,718.3
4306,YTFR-MONOMER,0.000018,Clim,640.3
4307,ZNUA-MONOMER,0.000035,Clim,332.7


In [102]:
# make a plotly of the rates separated but ordered 

# sort the rates first:
concatenated_df = concatenated_df.sort_values(by=c_new_name1, ascending=True)
fig = px.scatter(concatenated_df, x="Protein ID", y=c_new_name1, color="PDR Source")
fig.update_traces(marker=dict(size=3, ), )

# plot specs:
fig.update_xaxes(visible=False)
fig.update_layout(title='The half life values for proteins in the ' + current_sequence + ' PDR combo<br>', xaxis_title='Protein ID', yaxis_title='Half Life (min)')

# wont open in pycharm, so save as a html:
out_pth = "~/wcEcoli/out/random_plotlys/" + current_sequence + "_partitioned.html"
out_pth = os.path.expanduser(out_pth)  
output_dir = os.path.dirname(out_pth)
os.makedirs(output_dir, exist_ok=True)  
fig.write_html(out_pth, auto_open=True)

In [101]:
# plot in order! (plotly does not naturally plot in order while merging over the different subsets globally)
ordered_df = concatenated_df.sort_values(by=c_new_name1, ascending=True)
ordered_df

fig = px.scatter(ordered_df, x="Protein ID", y=c_new_name1, )
fig.update_traces(marker_size=.5, opacity=.3)

# create a dataframe of just the CL rates: 
ordered_CL_df = ordered_df[ordered_df['PDR Source'] == "CL"]
ordered_ML_df = ordered_df[ordered_df['PDR Source'] == "ML"] # should be zero
ordered_Clim_df = ordered_df[ordered_df['PDR Source'] == "Clim"]
ordered_NE_df = ordered_df[ordered_df['PDR Source'] == "NE"]

fig.add_trace(go.Scatter(x=(ordered_ML_df['Protein ID']), y=(ordered_ML_df[c_new_name1]), mode='markers', name='ML', hovertext=ordered_ML_df['Protein ID'], marker_size=5, marker=dict(color="lightsalmon")))
fig.add_trace(go.Scatter(x=(ordered_Clim_df['Protein ID']), y=(ordered_Clim_df[c_new_name1]), mode='markers', name='Clim', hovertext=ordered_Clim_df['Protein ID'], marker_size=5, marker=dict(color="darkorange")))
fig.add_trace(go.Scatter(x=(ordered_NE_df['Protein ID']), y=(ordered_NE_df[c_new_name1]), mode='markers', name='NE', hovertext=ordered_NE_df['Protein ID'], marker_size=5, marker=dict(color="lightseagreen")))
fig.add_trace(go.Scatter(x=(ordered_CL_df['Protein ID']), y=(ordered_CL_df[c_new_name1]), mode='markers', name='CL', hovertext=ordered_CL_df['Protein ID'], marker_size=5, marker=dict(color="deeppink"))) # putting this last so that I can see it stacked on top of the others

# plot specs 
fig.update_xaxes(visible=False)
fig.update_layout(legend_title_text='PDR Source')
fig.update_layout(title='The half life values for proteins in the ' + current_sequence + ' PDR combo<br>', xaxis_title='Protein ID', yaxis_title='Half Life (min)')

# wont open in pycharm, so save as a html:
out_pth = "~/wcEcoli/out/random_plotlys/" + current_sequence + "_ordered.html"
out_pth = os.path.expanduser(out_pth) 
output_dir = os.path.dirname(out_pth)
os.makedirs(output_dir, exist_ok=True)  # Create the directory if it does not exist
fig.write_html(out_pth, auto_open=True) # displays the figure on a default web brouser 

In [120]:
# plot against the CLMLNE model: 

# plot a small ordered value line under the rest of the data
fig = px.scatter(ordered_df, x="Protein ID", y=c_new_name1, )
fig.update_traces(marker_size=.5, opacity=.3)

# create a dataframe of just the CL rates: 
ordered_CL_df = ordered_df[ordered_df['PDR Source'] == "CL"]
ordered_ML_df = ordered_df[ordered_df['PDR Source'] == "ML"] # should be zero
ordered_Clim_df = ordered_df[ordered_df['PDR Source'] == "Clim"]
ordered_NE_df = ordered_df[ordered_df['PDR Source'] == "NE"]

fig.add_trace(go.Scatter(x=(ordered_ML_df['Protein ID']), y=(ordered_ML_df[c_new_name1]), mode='markers', name=current_sequence+': ML', hovertext=ordered_ML_df['Protein ID'], marker_size=5, marker=dict(color="lightsalmon")))
fig.add_trace(go.Scatter(x=(ordered_Clim_df['Protein ID']), y=(ordered_Clim_df[c_new_name1]), mode='markers', name=current_sequence+': Clim', hovertext=ordered_Clim_df['Protein ID'], marker_size=5, marker=dict(color="orange")))
fig.add_trace(go.Scatter(x=(ordered_NE_df['Protein ID']), y=(ordered_NE_df[c_new_name1]), mode='markers', name=current_sequence+': NE', hovertext=ordered_NE_df['Protein ID'], marker_size=5, marker=dict(color="lightseagreen")))
fig.add_trace(go.Scatter(x=(ordered_CL_df['Protein ID']), y=(ordered_CL_df[c_new_name1]), mode='markers', name=current_sequence+': CL', hovertext=ordered_CL_df['Protein ID'], marker_size=5, marker=dict(color="deeppink"))) # putting this last so that I can see it stacked on top of the others

# plot CLMLNE with the data as well: 

fig.add_trace(go.Scatter(x=(CLMLNE_CL_df['Protein ID']), y=(CLMLNE_CL_df['CLMLNE CL half life (min)']), mode='markers', marker_symbol='diamond', name='CLMLNE: CL', hovertext=CLMLNE_CL_df['Protein ID'], marker_size=3, marker=dict(color="cyan")))
fig.add_trace(go.Scatter(x=(CLMLNE_ML_df['Protein ID']), y=(CLMLNE_ML_df['CLMLNE ML half life (min)']), mode='markers', marker_symbol='diamond', name='CLMLNE: ML', hovertext=CLMLNE_ML_df['Protein ID'], marker_size=3, marker=dict(color="slategray")))
fig.add_trace(go.Scatter(x=(CLMLNE_NE_df['Protein ID']), y=(CLMLNE_NE_df['CLMLNE NE half life (min)']), mode='markers', marker_symbol='diamond', name='CLMLNE: NE', hovertext=CLMLNE_NE_df['Protein ID'], marker_size=2, marker=dict(color="yellow")))

# plot specs 
fig.update_xaxes(visible=False)
fig.update_layout(legend_title_text='PDR Source')
fig.update_layout(title='The half life values for proteins in the ' + current_sequence + ' PDR combo<br> compared to their CLMLNE half life assignment', xaxis_title='Protein ID', yaxis_title='Half Life (min)')

# wont open in pycharm, so save as a html:
out_pth = "~/wcEcoli/out/random_plotlys/" + current_sequence + "_ordered_with_CLMLNE.html"
out_pth = os.path.expanduser(out_pth) 
output_dir = os.path.dirname(out_pth)
os.makedirs(output_dir, exist_ok=True)  # Create the directory if it does not exist
fig.write_html(out_pth, auto_open=True) # displays the figure on a default web brouser 


In [123]:
# plot against the CLMLNE model: 

# plot a small ordered value line under the rest of the data
fig = px.scatter(ordered_df, x="Protein ID", y=c_new_name1, )
fig.update_traces(marker_size=.5, opacity=.3)

# create a dataframe of just the CL rates: 
ordered_CL_df = ordered_df[ordered_df['PDR Source'] == "CL"]
ordered_ML_df = ordered_df[ordered_df['PDR Source'] == "ML"] # should be zero
ordered_Clim_df = ordered_df[ordered_df['PDR Source'] == "Clim"]
ordered_NE_df = ordered_df[ordered_df['PDR Source'] == "NE"]

fig.add_trace(go.Scatter(x=(ordered_ML_df['Protein ID']), y=(ordered_ML_df[c_new_name1]), mode='markers', name=current_sequence+': ML', hovertext=ordered_ML_df['Protein ID'], marker_size=5, marker=dict(color="lightsalmon")))
fig.add_trace(go.Scatter(x=(ordered_Clim_df['Protein ID']), y=(ordered_Clim_df[c_new_name1]), mode='markers', name=current_sequence+': Clim', hovertext=ordered_Clim_df['Protein ID'], marker_size=5, marker=dict(color="orange")))
fig.add_trace(go.Scatter(x=(ordered_NE_df['Protein ID']), y=(ordered_NE_df[c_new_name1]), mode='markers', name=current_sequence+': NE', hovertext=ordered_NE_df['Protein ID'], marker_size=5, marker=dict(color="lightseagreen")))
fig.add_trace(go.Scatter(x=(ordered_CL_df['Protein ID']), y=(ordered_CL_df[c_new_name1]), mode='markers', name=current_sequence+': CL', hovertext=ordered_CL_df['Protein ID'], marker_size=5, marker=dict(color="deeppink"))) # putting this last so that I can see it stacked on top of the others

# plot CLNE with the data as well: 
fig.add_trace(go.Scatter(x=(CLNE_CL_df['Protein ID']), y=(CLNE_CL_df['CLNE CL half life (min)']), mode='markers', marker_symbol='diamond', name='CLNE: CL', hovertext=CLNE_CL_df['Protein ID'], marker_size=3, marker=dict(color="cyan")))
fig.add_trace(go.Scatter(x=(CLNE_NE_df['Protein ID']), y=(CLNE_NE_df['CLNE NE half life (min)']), mode='markers', marker_symbol='diamond', name='CLNE: NE', hovertext=CLNE_NE_df['Protein ID'], marker_size=2, marker=dict(color="yellow")))

# plot specs 
fig.update_xaxes(visible=False)
fig.update_layout(legend_title_text='PDR Source')
fig.update_layout(title='The half life values for proteins in the ' + current_sequence + ' PDR combo<br> compared to their CLNE half life assignment', xaxis_title='Protein ID', yaxis_title='Half Life (min)')

# wont open in pycharm, so save as a html:
out_pth = "~/wcEcoli/out/random_plotlys/" + current_sequence + "_ordered_with_CLMLNE.html"
out_pth = os.path.expanduser(out_pth) 
output_dir = os.path.dirname(out_pth)
os.makedirs(output_dir, exist_ok=True)  # Create the directory if it does not exist
fig.write_html(out_pth, auto_open=True) # displays the figure on a default web brouser 
# todo: double check that there is only 1 rate that is 2 mins with Nora

Unnamed: 0,Protein ID,CLNE NE rate constant (s^1),CLNE NE half life (min)
0,1-ACYLGLYCEROL-3-P-ACYLTRANSFER-MONOMER,0.000019,598.572695
1,1-PFK-MONOMER,0.000019,598.572695
2,2-DEHYDROPANTOATE-REDUCT-MONOMER,0.000019,598.572695
3,2-ISOPROPYLMALATESYN-MONOMER,0.000019,598.572695
4,2-OCTAPRENYL-METHOXY-BENZOQ-METH-MONOMER,0.000019,598.572695
...,...,...,...
4297,YTFR-MONOMER,0.000019,598.572695
4298,YTFT-MONOMER,0.000019,598.572695
4299,ZNUA-MONOMER,0.000019,598.572695
4300,ZNUB-MONOMER,0.000019,598.572695


In [125]:
CLNE_NE_df[CLNE_NE_df['CLNE NE half life (min)'] < 3] # 2 mins ish (the actual # is 1.999999)


Unnamed: 0,Protein ID,CLNE NE rate constant (s^1),CLNE NE half life (min)
513,EG10506-MONOMER,0.005776,1.999999
606,EG10765-MONOMER,0.005776,1.999999
2748,G7426-MONOMER,0.005776,1.999999
3286,ISOCIT-LYASE-MONOMER,0.005776,1.999999
3780,OROPRIBTRANS-MONOMER,0.005776,1.999999
3880,PGPPHOSPHAB-MONOMER,0.005776,1.999999
3919,PPENTOMUT-MONOMER,0.005776,1.999999
