In [6]:
!conda activate drugdiscovery

/bin/bash: line 1: conda: command not found


In [8]:
!pip install mols2grid rdkit

Collecting mols2grid
  Downloading mols2grid-2.0.0-py3-none-any.whl.metadata (16 kB)
Collecting rdkit
  Downloading rdkit-2024.9.5-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.0 kB)
Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets<8,>=7->mols2grid)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading mols2grid-2.0.0-py3-none-any.whl (107 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.0/107.0 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading rdkit-2024.9.5-cp311-cp311-manylinux_2_28_x86_64.whl (34.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.3/34.3 MB[0m [31m47.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading jedi-0.19.2-py2.py3-none-any.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m68.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rdkit, jedi, mols2grid
Successfully installed jedi-0.19.2 mols2grid-2.0.0 rdkit-2024.9.5

In [10]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.42.0-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.42.0-py2.py3-none-any.whl (9.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.6/9.6 MB[0m [31m65.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m97.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[

In [31]:
import mols2grid
import pandas as pd
import streamlit as st
import streamlit.components.v1 as components
from rdkit import Chem
from rdkit.Chem.Descriptors import ExactMolWt, MolLogP, NumHDonors, NumHAcceptors

st.title("Filter FDA Approved Drugs by Lipinski's Rule-of-Five with Streamlit")

st.markdown("""
- App modified by [Chanin Nantasenamat](http://medium.dataprofessor.org) (aka [Data Professor](http://youtube.com/dataprofessor))
- Original app by [Justin Chavez](https://blog.reverielabs.com/building-web-applications-from-python-scripts-with-streamlit/)
""")

@st.cache(allow_output_mutation=True)
def download_dataset():
    """Loads once then cached for subsequent runs"""
    df = pd.read_csv(
        "https://www.cureffi.org/wp-content/uploads/2013/10/drugs.txt", sep="\t"
    ).dropna()
    return df

# Calculate descriptors
def calc_mw(smiles_string):
    """Given a smiles string (ex. C1CCCCC1), calculate and return the molecular weight"""
    mol = Chem.MolFromSmiles(smiles_string)
    return ExactMolWt(mol)

def calc_logp(smiles_string):
    """Given a smiles string (ex. C1CCCCC1), calculate and return the LogP"""
    mol = Chem.MolFromSmiles(smiles_string)
    return MolLogP(mol)

def calc_NumHDonors(smiles_string):
    """Given a smiles string (ex. C1CCCCC1), calculate and return the NumHDonors"""
    mol = Chem.MolFromSmiles(smiles_string)
    return NumHDonors(mol)

def calc_NumHAcceptors(smiles_string):
    """Given a smiles string (ex. C1CCCCC1), calculate and return the NumHAcceptors"""
    mol = Chem.MolFromSmiles(smiles_string)
    return NumHAcceptors(mol)


# Copy the dataset so any changes are not applied to the original cached version
df = download_dataset().copy()
df["MW"] = df.apply(lambda x: calc_mw(x["smiles"]), axis=1)
df["LogP"] = df.apply(lambda x: calc_logp(x["smiles"]), axis=1)
df["NumHDonors"] = df.apply(lambda x: calc_NumHDonors(x["smiles"]), axis=1)
df["NumHAcceptors"] = df.apply(lambda x: calc_NumHAcceptors(x["smiles"]), axis=1)

# Sidebar panel
st.sidebar.header('Set parameters')
st.sidebar.write('*Note: Display compounds having values less than the following thresholds*')
weight_cutoff = st.sidebar.slider(
    label="Molecular weight",
    min_value=0,
    max_value=1000,
    value=500,
    step=10,
)
logp_cutoff = st.sidebar.slider(
    label="LogP",
    min_value=-10,
    max_value=10,
    value=5,
    step=1,
)
NumHDonors_cutoff = st.sidebar.slider(
    label="NumHDonors",
    min_value=0,
    max_value=15,
    value=5,
    step=1,
)
NumHAcceptors_cutoff = st.sidebar.slider(
    label="NumHAcceptors",
    min_value=0,
    max_value=20,
    value=10,
    step=1,
)

df_result = df[df["MW"] < weight_cutoff]
df_result2 = df_result[df_result["LogP"] < logp_cutoff]
df_result3 = df_result2[df_result2["NumHDonors"] < NumHDonors_cutoff]
df_result4 = df_result3[df_result3["NumHAcceptors"] < NumHAcceptors_cutoff]

st.write(df_result4.shape)
st.write(df_result4)

# Rename columns to match expected names for mols2grid display
df_result4 = df_result4.rename(columns={'smiles': 'SMILES', 'generic_name': 'Name'})

# Create HTML representation of the filtered results
raw_html = mols2grid.display(df_result4,
                             subset=["img", "Name", "MW", "LogP", "NumHDonors", "NumHAcceptors"],
                             mapping={"SMILES": "SMILES", "Name": "Name"})._repr_html_()

# Display the filtered molecules in Streamlit
components.html(raw_html, width=900, height=1100, scrolling=False)

2025-02-09 09:38:41.609 
`st.cache` is deprecated and will be removed soon. Please use one of Streamlit's new
caching commands, `st.cache_data` or `st.cache_resource`. More information
[in our docs](https://docs.streamlit.io/develop/concepts/architecture/caching).

**Note**: The behavior of `st.cache` was updated in Streamlit 1.36 to the new caching
logic used by `st.cache_data` and `st.cache_resource`. This might lead to some problems
or unexpected behavior in certain edge cases.



MolGridWidget()



DeltaGenerator()

In [33]:
!pip install streamlit pandas rdkit mols2grid



In [37]:
import streamlit as st
import pandas as pd
import mols2grid
from rdkit import Chem
from rdkit.Chem import PandasTools

# Use the new caching function for data
@st.cache_data
def download_dataset():
    """Loads the dataset and caches it for subsequent runs."""
    df = pd.read_csv(
        "https://www.cureffi.org/wp-content/uploads/2013/10/drugs.txt", sep="\t"
    ).dropna()

    # Print the columns to check the correct column name for SMILES
    st.write("Dataset Columns:", df.columns)  # Show the dataset's columns
    return df

# Load and process the dataset
def process_data():
    df = download_dataset()
    # Display first few rows to inspect data
    st.write("Dataset Preview (First 5 Rows):", df.head())  # Show first few rows

    # Check the column names to find the correct one for SMILES
    if 'SMILES' not in df.columns:
        st.write("Available columns in the dataset: ", df.columns)
        st.error("'SMILES' column not found in dataset. Please check the column name.")
        return None

    # Process the molecules and create a PandasTools dataframe
    mols = df['SMILES'].apply(Chem.MolFromSmiles)
    PandasTools.AddMoleculeColumnToFrame(df, 'SMILES', 'Molecule')
    return df

# Main Streamlit app structure
def main():
    st.title("Molecule Viewer and Data Explorer")

    # Load the data
    df = process_data()

    if df is not None:
        # Display the data in the app
        st.header("Dataset Preview")
        st.dataframe(df)

        # Show a sample molecule grid using mols2grid
        st.header("Molecule Grid Viewer")
        mol_grid = mols2grid.display(df, size=(300, 300))
        st.write(mol_grid)

# Run the app
if __name__ == "__main__":
    main()

2025-02-09 09:56:31.789 No runtime found, using MemoryCacheStorageManager
2025-02-09 09:56:31.803 No runtime found, using MemoryCacheStorageManager


In [40]:
!streamlit run molecule_viewer.py

Usage: streamlit run [OPTIONS] TARGET [ARGS]...
Try 'streamlit run --help' for help.

Error: Invalid value: File does not exist: molecule_viewer.py


In [43]:
Local URL:  http://localhost:8501
Network URL:  http://<your-ip>:8501

SyntaxError: invalid syntax (<ipython-input-43-19370e81b3d0>, line 1)

In [32]:
!conda create -n drugdiscovery python=3.7.9

/bin/bash: line 1: conda: command not found


In [16]:
!conda --version

/bin/bash: line 1: conda: command not found


In [17]:
!wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
!chmod +x Miniconda3-latest-Linux-x86_64.sh
!bash ./Miniconda3-latest-Linux-x86_64.sh -b -p /usr/local
!rm -rf Miniconda3-latest-Linux-x86_64.sh

--2025-02-09 08:38:22--  https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
Resolving repo.anaconda.com (repo.anaconda.com)... 104.16.32.241, 104.16.191.158, 2606:4700::6810:bf9e, ...
Connecting to repo.anaconda.com (repo.anaconda.com)|104.16.32.241|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 154615620 (147M) [application/octet-stream]
Saving to: ‘Miniconda3-latest-Linux-x86_64.sh’


2025-02-09 08:38:23 (174 MB/s) - ‘Miniconda3-latest-Linux-x86_64.sh’ saved [154615620/154615620]

ERROR: File or directory already exists: '/usr/local'
If you want to update an existing installation, use the -u option.


Create conda environment.
Firstly, we will create a conda environment called drugdiscovery

In [18]:
!conda create -n drugdiscovery python=3.7.9

/bin/bash: line 1: conda: command not found


In [19]:
!conda update conda

/bin/bash: line 1: conda: command not found


In [20]:
!conda env list

/bin/bash: line 1: conda: command not found


Download requirements.txt file

In [22]:
!wget https://raw.githubusercontent.com/dataprofessor/drugdiscovery/main/requirements.txt

--2025-02-09 08:38:40--  https://raw.githubusercontent.com/dataprofessor/drugdiscovery/main/requirements.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 404 Not Found
2025-02-09 08:38:40 ERROR 404: Not Found.



Pip install libraries.

In [23]:
!pip install -r requirements.txt

[31mERROR: Could not open requirements file: [Errno 2] No such file or directory: 'requirements.txt'[0m[31m
[0m

Launch the app

In [24]:
!streamlit run app.py

Usage: streamlit run [OPTIONS] TARGET [ARGS]...
Try 'streamlit run --help' for help.

Error: Invalid value: File does not exist: app.py


In [25]:
!pip install streamlit

import streamlit as st



In [26]:
!pip install mols2grid



In [27]:
import mols2grid
import pandas as pd
import streamlit as st
import streamlit.components.v1 as components
from rdkit import Chem
from rdkit.Chem.Descriptors import ExactMolWt

st.title("Filter FDA Approved Drugs by Molecular Weight with Streamlit")


@st.cache(allow_output_mutation=True)
def download_dataset():
    """Loads once then cached for subsequent runs"""
    df = pd.read_csv(
        "https://www.cureffi.org/wp-content/uploads/2013/10/drugs.txt", sep="\t"
    ).dropna()
    return df


def calc_mw(smiles_string):
    """Given a smiles string (ex. C1CCCCC1), calculate and return the molecular weight"""
    mol = Chem.MolFromSmiles(smiles_string)
    return ExactMolWt(mol)


# Copy the dataset so any changes are not applied to the original cached version
df = download_dataset().copy()
df["mol_weight"] = df.apply(lambda x: calc_mw(x["smiles"]), axis=1)

weight_cutoff = st.slider(
    label="Show compounds that weigh below:",
    min_value=0,
    max_value=500,
    value=150,
    step=10,
)


df_result = df[df["mol_weight"] < weight_cutoff]
st.write(df_result)


raw_html = mols2grid.display(df_result, mapping={"smiles": "SMILES"})._repr_html_()
components.html(raw_html, width=900, height=900, scrolling=True)

2025-02-09 08:39:39.175 
`st.cache` is deprecated and will be removed soon. Please use one of Streamlit's new
caching commands, `st.cache_data` or `st.cache_resource`. More information
[in our docs](https://docs.streamlit.io/develop/concepts/architecture/caching).

**Note**: The behavior of `st.cache` was updated in Streamlit 1.36 to the new caching
logic used by `st.cache_data` and `st.cache_resource`. This might lead to some problems
or unexpected behavior in certain edge cases.



MolGridWidget()

KeyError: "['SMILES'] not in index"

In [44]:
@st.cache(allow_output_mutation=True)
def download_dataset():
    """Loads once then cached for subsequent runs"""
    df = pd.read_csv(
        "https://www.cureffi.org/wp-content/uploads/2013/10/drugs.txt", sep="\t"
    ).dropna()
    return df

2025-02-09 10:01:56.150 
`st.cache` is deprecated and will be removed soon. Please use one of Streamlit's new
caching commands, `st.cache_data` or `st.cache_resource`. More information
[in our docs](https://docs.streamlit.io/develop/concepts/architecture/caching).

**Note**: The behavior of `st.cache` was updated in Streamlit 1.36 to the new caching
logic used by `st.cache_data` and `st.cache_resource`. This might lead to some problems
or unexpected behavior in certain edge cases.

