In [3]:
!pip install streamlit
!pip install rdkit


Collecting streamlit
  Downloading streamlit-1.45.0-py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.45.0-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m36.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m60.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hInst

In [4]:
import streamlit as st
from rdkit import Chem
from rdkit.Chem import Draw
import joblib
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import os

In [8]:
# load rf model
rf= joblib.load('/content/sample_data/rf.pkl')

In [10]:
# Dummy feature extractor – replace with your actual fingerprinting method
def featurize(smiles):
    from rdkit.Chem import AllChem
    mol = Chem.MolFromSmiles(smiles)
    if mol:
        return AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024)
    return None

# Dummy applicability domain plot
def plot_ad(test_fp, train_fps, label="Query"):
    from sklearn.decomposition import PCA

    all_data = np.vstack([train_fps, test_fp])
    pca = PCA(n_components=2)
    reduced = pca.fit_transform(all_data)

    fig, ax = plt.subplots()
    ax.scatter(reduced[:-1, 0], reduced[:-1, 1], alpha=0.4, label="Training")
    ax.scatter(reduced[-1, 0], reduced[-1, 1], c='red', label=label)
    ax.set_title("Applicability Domain (PCA)")
    ax.legend()
    st.pyplot(fig)

# Streamlit UI
st.title("SMILES Activity Predictor")

smiles_input = st.text_input("Enter a SMILES string:")
# selected_model_name = st.selectbox("Select a trained model:", list(rf.keys()))

if smiles_input:
    mol = Chem.MolFromSmiles(smiles_input)
    if mol:
        st.image(Draw.MolToImage(mol), caption="Molecule Structure")

        fp = featurize(smiles_input)
        if fp is not None:
            fp_array = np.array(fp).reshape(1, -1)
            model = rf
            pred_prob = model.predict_proba(fp_array)[0]
            pred_class = model.predict(fp_array)[0]

            st.markdown(f"**Prediction:** `{pred_class}`")
            st.markdown(f"**Confidence Score:** `{max(pred_prob):.2f}`")

            # Load training set fingerprints for AD plot (replace with your actual training data)
            if os.path.exists("train_fps.npy"):
                train_fps = np.load("train_fps.npy")
                plot_ad(fp_array, train_fps)
            else:
                st.warning("Training fingerprints not found for AD plot.")
        else:
            st.error("Failed to featurize SMILES.")
    else:
        st.error("Invalid SMILES string.")


