In [2]:
import pandas as pd
import numpy as np
from rdkit import Chem
from SAscore.sascorer import calculateScore
from RAscore.RAscore_NN import RAScorerNN
import os

In [None]:
# Initialize the RA scorer
model_path = os.path.join("RAscore", "model", "DNN_chembl_fcfp_counts", "model.h5")
nn_scorer = RAScorerNN(model_path=model_path)

def compute_sa_score(smiles):
    """
    Compute the synthetic accessibility score.
    Returns None if the SMILES is invalid.
    """
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return None
    return calculateScore(mol)

def compute_ra_score(smiles):
    """
    Compute the ML-based retrosynthetic accessibility score.
    Returns None if prediction fails.
    """
    try:
        return nn_scorer.predict(smiles)
    except Exception as e:
        print(f"RA prediction failed for: {smiles} | Error: {e}")
        return None

input_path = "1K_SMILES.csv"
df = pd.read_csv(input_path)


In [5]:
# Apply SA scoring
df['SA_score'] = df['SMILES'].apply(compute_sa_score)

In [None]:
# Apply RA scoring
df['RA_score'] = df['SMILES'].apply(compute_ra_score)

In [None]:
# Save to CSV (optional)
df.to_csv("1K_SMILES_SA_RA.csv", index=False)