In [26]:
import pandas as pd
import numpy as np
import joblib  # << use joblib instead of pickle
from rdkit import Chem
from rdkit import DataStructs  # << missing in your version
from rdkit.Chem import AllChem
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier

# Load the scaler and model
scaler = joblib.load('scaler.pkl')
model = joblib.load('mlp_classifier_model.pkl')

# Function to compute Morgan fingerprint from SMILES
def smiles_to_fingerprint(smiles, radius=2, n_bits=2048):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        raise ValueError("Invalid SMILES string")
    fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)
    arr = np.zeros((1,), dtype=int)
    DataStructs.ConvertToNumpyArray(fp, arr)
    return arr

# Load the reaction templates
file_path = "/Users/giuliogarotti/Documents/GitHub/Projet_chem/uspto50/uspto50/reaction_templates_50k_test.csv"
templates_df = pd.read_csv(file_path, sep='\t')

# Predict function
def predict_reaction(smiles):
    # Step 1: Convert SMILES to fingerprint
    fingerprint = smiles_to_fingerprint(smiles)
    fingerprint = fingerprint.reshape(1, -1)

    # Step 2: Scale the fingerprint
    fingerprint_scaled = scaler.transform(fingerprint)

    # Step 3: Predict the reaction class (actually a template ID string)
    predicted_id = model.predict(fingerprint_scaled)[0]

    # Step 4: Map predicted ID to reaction template
    row = templates_df[templates_df['TemplateHash'] == predicted_id]
    if not row.empty:
        predicted_template = row.iloc[0]['RetroTemplate']
    else:
        raise ValueError(f"Predicted ID {predicted_id} not found in templates.")


    return predicted_template


if __name__ == "__main__":
    smiles_input = input("Enter a SMILES string of the product: ")
    try:
        template = predict_reaction(smiles_input)
        print(f"Predicted Reaction Template: {template}")
    except Exception as e:
        print(f"Error: {e}")



Predicted Reaction Template: [O;D1;H0:3]=[C:2](-[OH;D1;+0:1])-[c:4]1:[c:5]:[c:6]:[c:7]:[c:8]:[c:9]:1>>C-[O;H0;D2;+0:1]-[C:2](=[O;D1;H0:3])-[c:4]1:[c:5]:[c:6]:[c:7]:[c:8]:[c:9]:1




In [29]:
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import rdChemReactions

# Reaction SMARTS (your template)
reaction_smarts = "[O;D1;H0:3]=[C:2](-[OH;D1;+0:1])-[c:4]1:[c:5]:[c:6]:[c:7]:[c:8]:[c:9]:1>>C-[O;H0;D2;+0:1]-[C:2](=[O;D1;H0:3])-[c:4]1:[c:5]:[c:6]:[c:7]:[c:8]:[c:9]:1"

# Create RDKit reaction object
reaction = rdChemReactions.ReactionFromSmarts(reaction_smarts)

# Draw the reaction
img = Draw.ReactionToImage(reaction, subImgSize=(300, 300))

# Display
img.show()



In [None]:
OCC1=CC=CC=C1
