In [10]:
import streamlit as st
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Crippen, Descriptors
import pickle

# Class
class Featurizer:
    def __init__(self, smiles):
        # create properties from SMILES
        # store in a data frame
        df = pd.DataFrame(smiles, columns=['SMILES'])
        df['mol'] = df['SMILES'].apply(Chem.MolFromSmiles)
        
        df['mol_wt'] = df['mol'].apply(rdMolDescriptors.CalcExactMolWt)             # Molecular weight
        df['logp'] = df['mol'].apply(Crippen.MolLogP)                               # LogP (lipophilicity)
        df['num_heavy_atoms'] = df['mol'].apply(rdMolDescriptors.CalcNumHeavyAtoms) # Number of heavy atoms
        df['num_HBD'] = df['mol'].apply(rdMolDescriptors.CalcNumHBD)                # Number of hydrogen bond donors
        df['num_HBA'] = df['mol'].apply(rdMolDescriptors.CalcNumHBA)                # Number of hydrogen bond acceptors
        df['aromatic_rings'] = df['mol'].apply(rdMolDescriptors.CalcNumAromaticRings) # Number of aromatic rings
        
        # scale
        X = df[['mol_wt', 'logp', 'num_heavy_atoms', 'num_HBD', 'num_HBA', 'aromatic_rings']]  

        self.scaler = StandardScaler()
        self.scaler.fit(X)
    
    def featurize(self, smiles):
        # calculate descriptors
        df = pd.DataFrame(smiles, columns=['SMILES'])
        df['mol'] = df['SMILES'].apply(Chem.MolFromSmiles)
        
        df['mol_wt'] = df['mol'].apply(rdMolDescriptors.CalcExactMolWt)             # Molecular weight
        df['logp'] = df['mol'].apply(Crippen.MolLogP)                               # LogP (lipophilicity)
        df['num_heavy_atoms'] = df['mol'].apply(rdMolDescriptors.CalcNumHeavyAtoms) # Number of heavy atoms
        df['num_HBD'] = df['mol'].apply(rdMolDescriptors.CalcNumHBD)                # Number of hydrogen bond donors
        df['num_HBA'] = df['mol'].apply(rdMolDescriptors.CalcNumHBA)                # Number of hydrogen bond acceptors
        df['aromatic_rings'] = df['mol'].apply(rdMolDescriptors.CalcNumAromaticRings) # Number of aromatic rings
        
        # scale them
        descriptors = df[['mol_wt', 'logp', 'num_heavy_atoms', 'num_HBD', 'num_HBA', 'aromatic_rings']]  
        scaled_descriptors = self.scaler.transform(descriptors)

        return pd.DataFrame(
                scaled_descriptors, 
                columns=['mol_wt', 'logp', 'num_heavy_atoms', 'num_HBD', 'num_HBA', 'aromatic_rings']
                )

# Load
with open("featurizer.pkl", "rb") as f:
    featurizer = pickle.load(f)

with open("model.pkl", "rb") as f:
    model = pickle.load(f)

# Streamlit App
st.title("RozpuszczalnoscAI+")

smiles_input = st.text_area("Enter SMILES strings (one per line):")

if st.button("Predict Solubility"):
    if smiles_input.strip():
        smiles_list = smiles_input.splitlines()

        X = featurizer.featurize(smiles_list)
        y_pred = svr.predict(X)
        results_df = pd.DataFrame({
            "SMILES": smiles_list,
            "Predicted Solubility": y_pred
        })

        st.table(results_df)
    else:
        st.error("No SMILES")



