# nvMolKit Conformer Generation and MMFF Optimization Tutorial
This notebook demonstrates the complete workflow for:
1. Loading molecules from SDF files
2. GPU-accelerated ETKDG conformer generation
3. MMFF force field optimization

In [None]:
import os
import time
from rdkit import Chem
from rdkit.Chem.rdDistGeom import ETKDGv3
from nvmolkit.embedMolecules import EmbedMolecules as nvMolKitEmbed
from nvmolkit.types import HardwareOptions
from nvmolkit.mmffOptimization import MMFFOptimizeMoleculesConfs as nvMolKitMMFFOptimize

In [None]:
# Configuration
SDF_FILE = "../benchmarks/data/MPCONF196.sdf"
MAX_MOLECULES = 50
CONFORMERS_PER_MOLECULE = 5
RANDOM_SEED = 42

In [None]:
# Setup ETKDG parameters
params = ETKDGv3()
params.randomSeed = RANDOM_SEED
params.useRandomCoords = True  # Required for nvMolKit

## Step 1: Load molecules from SDF file

In [None]:
if not os.path.exists(SDF_FILE):
    raise FileNotFoundError(f"SDF file not found: {SDF_FILE}")

supplier = Chem.SDMolSupplier(SDF_FILE, removeHs=False, sanitize=True)
molecules = []

for i, mol in enumerate(supplier):
    if mol is None:
        continue
    if i >= MAX_MOLECULES:
        break
    
    # Clear any existing conformers for clean embedding tests
    mol.RemoveAllConformers()
    molecules.append(mol)

print(f"Successfully loaded {len(molecules)} molecules from {SDF_FILE}")

## Step 2: Generate conformers using GPU-accelerated ETKDG

In [None]:
hardware_opts = HardwareOptions(
    preprocessingThreads=2,
    batchSize=25,
    batchesPerGpu=2,
)

start_time = time.time()

nvMolKitEmbed(
    molecules=molecules,
    params=params,
    confsPerMolecule=CONFORMERS_PER_MOLECULE,
    maxIterations=-1,  # Automatic iteration calculation
    hardwareOptions=hardware_opts
)

embedding_time = time.time() - start_time
total_conformers = sum(mol.GetNumConformers() for mol in molecules)

print(f"Conformer generation completed in {embedding_time:.2f} seconds")
print(f"Generated {total_conformers} total conformers")
print(f"Rate: {total_conformers/embedding_time:.1f} conformers/second")


## Step 3: Optimize conformers using MMFF force field

In [None]:
mmff_hardware_opts = HardwareOptions(
    preprocessingThreads=4,
    batchSize=0,  # Process all conformers together
)

total_conformers = sum(mol.GetNumConformers() for mol in molecules)
start_time = time.time()

energies = nvMolKitMMFFOptimize(
    molecules=molecules,
    maxIters=200,
    nonBondedThreshold=100.0,
    hardwareOptions=mmff_hardware_opts
)

optimization_time = time.time() - start_time

print(f"MMFF optimization completed in {optimization_time:.2f} seconds")
print(f"Rate: {total_conformers/optimization_time:.1f} conformers/second")