In [1]:
! pip install rdkit

Defaulting to user installation because normal site-packages is not writeable
Collecting rdkit
  Downloading rdkit-2023.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.4/34.4 MB[0m [31m53.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: rdkit
Successfully installed rdkit-2023.9.4


In [2]:
from rdkit import Chem
from rdkit.Chem import Descriptors, Draw
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import numpy as np
from IPython.display import display, Image

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [3]:
from ipywidgets import interact, widgets

In [4]:
# Function definitions (unchanged)
def generate_synthetic_data():
    base_smiles = ["CCCl", "CCBr", "CCC", "CCO"]
    functional_groups = ["Cl", "Br", "I", "O", "C=O"]
    
    synthetic_molecules = []
    for base in base_smiles:
        base_mol = Chem.MolFromSmiles(base)
        if not base_mol:
            continue
        for fg in functional_groups:
            synthetic_smiles = base + "." + fg
            synthetic_mol = Chem.MolFromSmiles(synthetic_smiles)
            if synthetic_mol:
                synthetic_molecules.append(synthetic_smiles)
    
    synthetic_molecules = list(set(synthetic_molecules))
    
    data = []
    for smi in synthetic_molecules:
        mol = Chem.MolFromSmiles(smi)
        if mol:
            mw = Descriptors.MolWt(mol)
            logp = Descriptors.MolLogP(mol)
            sn1_reaction = np.random.choice([0, 1])
            data.append({"SMILES": smi, "Molecular Weight": mw, "logP": logp, "SN1": sn1_reaction})
    
    return pd.DataFrame(data)

In [5]:
def train_model(df):
    X = df[['Molecular Weight', 'logP']]
    y = df['SN1']
    clf = RandomForestClassifier(random_state=42)
    clf.fit(X, y)
    return clf

def calculate_descriptors(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol:
        mw = Descriptors.MolWt(mol)
        logp = Descriptors.MolLogP(mol)
        return mw, logp
    else:
        return None, None

In [7]:
# Visualization function
def visualize_molecule(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol:
        img = Draw.MolToImage(mol)
        display(img)
    else:
        print("Invalid SMILES string. Please enter a valid SMILEs")

In [8]:
# Interactive SMILES input and visualization
def main(smiles="CCCl"):
    mw, logp = calculate_descriptors(smiles)
    if mw and logp:
        print(f"Molecular Weight: {mw}, logP: {logp}")
        visualize_molecule(smiles)
        
        # Predict the reaction outcome if a model has been trained
        if 'model' in globals():
            prediction = model.predict(np.array([[mw, logp]]))
            outcome = "likely" if prediction[0] == 1 else "unlikely"
            print(f"The molecule is {outcome} to undergo an SN1 reaction.")
        else:
            print("Model has not been trained yet.")
    else:
        print("Invalid SMILES string. Please enter a valid SMILES.")

# Generate synthetic data and train the model
df_synthetic = generate_synthetic_data()
model = train_model(df_synthetic)

# Create an interactive widget for SMILES input
interact(main, smiles=widgets.Text(value="CCCl", description="SMILES:"))

interactive(children=(Text(value='CCCl', description='SMILES:'), Output()), _dom_classes=('widget-interact',))

<function __main__.main(smiles='CCCl')>