In [None]:
# If RdKit is not install uncomment the command below to install
!pip install rdkit-pypi

In [None]:
# Import Necessary Libraries
import rdkit
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import rdDistGeom
from rdkit.Chem import rdMolAlign
from rdkit.Chem.Draw import IPythonConsole
from rdkit.ML.Cluster import Butina
print(rdkit.__version__)

"""
We will use the newer version of Distance Geomentry (DG) module called ETKDG.
The conformer generator ETKDG is a stochastic search method that utilizes distance 
geometry together with knowledge derived from experimental crystal structures.
It has been shown to generate good conformers for acyclic, flexible molecules.
This work builds on ETKDG to improve conformer generation of molecules containing
small or large aliphatic (i.e., non-aromatic) rings.
Reference:
Improving Conformer Generation for Small Rings and Macrocycles Based on Distance Geometry and Experimental Torsional-Angle Preferences
https://pubs.acs.org/doi/10.1021/acs.jcim.0c00025

"""

def generate_conformers(molecule, num_conformers=300, random_seed=0xd06f00d, num_threads=10):
    """
    Generate conformers for a given molecule using the ETKDG method.
    
    Args:
    - molecule (rdkit.Chem.rdchem.Mol): RDKit molecule object.
    - num_conformers (int): Number of conformers to generate.
    - random_seed (int): Seed for random number generation.
    - num_threads (int): Number of threads to use.
    
    Returns:
    - list: List of conformer IDs.
    """
    ps = rdDistGeom.ETKDGv3()
    ps.randomSeed = random_seed
    ps.numThreads = num_threads
    cids = rdDistGeom.EmbedMultipleConfs(molecule, num_conformers, ps)
    return cids

def align_and_cluster_conformers(molecule, cids, core_smiles, threshold=1.5):
    """
    Align and cluster the conformers of a molecule.
    
    Args:
    - molecule (rdkit.Chem.rdchem.Mol): RDKit molecule object with conformers.
    - cids (list): List of conformer IDs.
    - core_smiles (str): SMILES string for the core substructure used for alignment.
    - threshold (float): RMSD threshold for clustering.
    
    Returns:
    - list: List of clusters.
    """
    dists = []
    for i in range(len(cids)):
        for j in range(i):
            dists.append(rdMolAlign.GetBestRMS(molecule, molecule, i, j))

    core = Chem.MolFromSmiles(core_smiles)
    rdMolAlign.AlignMolConformers(molecule, atomIds=molecule.GetSubstructMatch(core))
    clusters = Butina.ClusterData(dists, len(cids), threshold, isDistData=True, reordering=True)
    
    return clusters

if __name__ == '__main__':
    # We are using "-Methoxycoumarin-4-acetic Acid N-Succinimidyl Ester" as an example
    m_noH = Chem.MolFromSmiles('COC1=CC2=C(C=C1)C(=CC(=O)O2)CC(=O)ON3C(=O)CCC3=O')
    m_H = Chem.AddHs(m_noH)
    
    # Generate conformers
    cids = generate_conformers(m_H)
    
    # Remove hydrogens for alignment and clustering
    m_3d = Chem.RemoveHs(m_H)
    
    # Align and cluster conformers
    core_smiles = 'C1=CC2=C(C=C1)C(=CC(=O)O2)C'
    clusters = align_and_cluster_conformers(m_3d, cids, core_smiles)
    
    # Visualization and further analysis can be added as needed
