# solubility project

This is the "bioinformatic project" from the scratch form the work of 'Delaney
'

Load data
Here, we're loading the Delaney data set (reference- https://pubs.acs.org/doi/10.1021/ci034243x)

In [None]:
SELECT * FROM BIOINFORMATIC_PROJECT. PUBLIC.SOLUBILTY

In [None]:
sql_data.to_pandas()

In [None]:
df

In [None]:
df['MOLWT_CLASS'] = pd.Series(['small' if x < 300 else 'large' for x in df['MOLWT']])
df
     

Data Aggregation
Here, we're aggregating the data (grouping it) by its molecular weight:

small if <300

large if >= 300

In [None]:
df_class = df.groupby('MOLWT_CLASS').mean().reset_index()
df_class

In [None]:
import streamlit as st

st.subheader('Molecular Weight (MOLWT)')

st.bar_chart(df_class, x='MOLWT_CLASS', y='MOLWT', color='MOLWT_CLASS', stack=True)

In [None]:
df_class['MOLWT'][0]

# Building a Solubility Dashboard

In [None]:
import streamlit as st

st.title('☘️ Solubility Dashboard')

# Data Filtering
mol_size = st.slider('Select a value', 100, 500, 300)
df['MOLWT_CLASS'] = pd.Series(['small' if x < mol_size else 'large' for x in df['MOLWT']])
df_class = df.groupby('MOLWT_CLASS').mean().reset_index()

st.divider()

# Calculate Metrics
molwt_large = round(df_class['MOLWT'][0], 2)
molwt_small = round(df_class['MOLWT'][1], 2)
numrotatablebonds_large = round(df_class['NUMROTATABLEBONDS'][0], 2)
numrotatablebonds_small = round(df_class['NUMROTATABLEBONDS'][1], 2)
mollogp_large = round(df_class['MOLLOGP'][0], 2)
mollogp_small = round(df_class['MOLLOGP'][1], 2)
aromaticproportion_large = round(df_class['AROMATICPROPORTION'][0], 2)
aromaticproportion_small = round(df_class['AROMATICPROPORTION'][1], 2)

# Data metrics and visualizations
col = st.columns(2)
with col[0]:
    st.subheader('Molecular Weight')
    st.metric('Large', molwt_large)
    st.metric('Small', molwt_small)
    st.bar_chart(df_class, x='MOLWT_CLASS', y='MOLWT', color='MOLWT_CLASS')

    st.subheader('Number of Rotatable Bonds')
    st.metric('Large', numrotatablebonds_large)
    st.metric('Small', numrotatablebonds_small)
    st.bar_chart(df_class, x='MOLWT_CLASS', y='NUMROTATABLEBONDS', color='MOLWT_CLASS')
with col[1]:
    st.subheader('Molecular LogP')
    st.metric('Large', mollogp_large)
    st.metric('Small', mollogp_small)
    st.bar_chart(df_class, x='MOLWT_CLASS', y='MOLLOGP', color='MOLWT_CLASS')

    st.subheader('Aromatic Proportion')
    st.metric('Large', mollogp_large)
    st.metric('Small', mollogp_small)
    st.bar_chart(df_class, x='MOLWT_CLASS', y='AROMATICPROPORTION', color='MOLWT_CLASS')

with st.expander('Show Original DataFrame'):
    st.dataframe(df)
with st.expander('Show Aggregated DataFrame'):
    st.dataframe(df_class)