## Outline

This notebook is meant to detail setting up the MM/ML simulations 

## fitting the LJs terms

In [None]:
import mmml
import ase
import os
from pathlib import Path
import argparse
import sys
import numpy as np
import jax
import jax.numpy as jnp

# Set environment variables
os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"] = ".99"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Check JAX configuration
devices = jax.local_devices()
print(devices)
print(jax.default_backend())
print(jax.devices())


# Setup: Mock CLI Arguments (following run_sim.py structure)

This cell creates a mock args object that mimics the CLI arguments from `run_sim.py`.
This allows the notebook to follow the same structure as the script.

In [None]:
# Import required modules (following run_sim.py structure)
from mmml.cli.base import (
    load_model_parameters,
    resolve_checkpoint_paths,
    setup_ase_imports,
    setup_mmml_imports,
)
from mmml.pycharmmInterface import import_pycharmm
import pycharmm
from mmml.pycharmmInterface.mmml_calculator import setup_calculator, CutoffParameters
from mmml.physnetjax.physnetjax.data.data import prepare_datasets
from mmml.physnetjax.physnetjax.data.batches import prepare_batches_jit
from mmml.pycharmmInterface.setupBox import setup_box_generic
from mmml.pycharmmInterface.import_pycharmm import reset_block, coor
from mmml.pycharmmInterface.pycharmmCommands import CLEAR_CHARMM

# Setup ASE imports
Atoms = setup_ase_imports()
CutoffParameters, ev2kcalmol, setup_calculator, get_ase_calc = setup_mmml_imports()

# Additional imports for simulation
import ase.io as ase_io
from ase.md.velocitydistribution import MaxwellBoltzmannDistribution, Stationary, ZeroRotation
from ase.md.verlet import VelocityVerlet
import ase.optimize as ase_opt

/scicore/home/meuwly/boitti0000/mmml/mmml/data/top_all36_cgenff.rtf
/scicore/home/meuwly/boitti0000/mmml/mmml/data/par_all36_cgenff.prm
CHARMM_HOME /scicore/home/meuwly/boitti0000/mmml/setup/charmm
CHARMM_LIB_DIR /scicore/home/meuwly/boitti0000/mmml/setup/charmm
  
 CHARMM>     BLOCK
 Block structure initialized with   3 blocks.
 All atoms have been assigned to block 1.
 All interaction coefficients have been set to unity.
  Setting number of block exclusions nblock_excldPairs=0
  
  BLOCK>            CALL 1 SELE ALL END
 SELRPN>      0 atoms have been selected out of      0
 The selected atoms have been reassigned to block   1
  
  BLOCK>              COEFF 1 1 1.0
  
  BLOCK>            END
 Matrix of Interaction Coefficients
 
    1.00000
    1.00000   1.00000
    1.00000   1.00000   1.00000
 Matrix of BOND Interaction Coefficients
 
    1.00000
    1.00000   1.00000
    1.00000   1.00000   1.00000
 Matrix of ANGLE Interaction Coefficients
 
    1.00000
    1.00000   1.00000
    1.0

In [None]:
# ========================================================================
# MOCK CLI ARGUMENTS (spoofing run_sim.py CLI)
# ========================================================================
# Create a mock args object that mimics the CLI arguments from run_sim.py
# This allows the notebook to follow the same structure as the script

class MockArgs:
    """Mock CLI arguments following run_sim.py structure"""
    def __init__(self):
        # Paths
        self.pdbfile = None  # Will be created from valid_data if needed
        self.checkpoint = Path(RESTART) if 'RESTART' in globals() else None
        
        # System parameters
        self.n_monomers = 2
        self.n_atoms_monomer = 10
        self.atoms_per_monomer = 10  # Alias for compatibility
        
        # Calculator parameters
        self.ml_cutoff = 2.0
        self.mm_switch_on = 4.0
        self.mm_cutoff = 1.0
        self.include_mm = True
        self.skip_ml_dimers = False
        self.debug = False
        
        # MD simulation parameters
        self.temperature = 210.0
        self.timestep = 0.1
        self.nsteps_jaxmd = 100_000
        self.nsteps_ase = 10000
        self.ensemble = "nvt"
        self.heating_interval = 500
        self.write_interval = 100
        self.energy_catch = 0.5
        
        # Output
        self.output_prefix = "md_simulation"
        self.cell = None  # No PBC by default
        
        # Validation
        self.validate = False

# Create mock args object
args = MockArgs()

# Override with notebook-specific values if needed
if 'ATOMS_PER_MONOMER' in globals():
    args.n_atoms_monomer = ATOMS_PER_MONOMER
    args.atoms_per_monomer = ATOMS_PER_MONOMER
if 'N_MONOMERS' in globals():
    args.n_monomers = N_MONOMERS

print(f"Mock args created:")
print(f"  n_monomers: {args.n_monomers}")
print(f"  n_atoms_monomer: {args.n_atoms_monomer}")
print(f"  ml_cutoff: {args.ml_cutoff}")
print(f"  mm_switch_on: {args.mm_switch_on}")
print(f"  mm_cutoff: {args.mm_cutoff}")

[31mInit signature:[39m
CutoffParameters(
    ml_cutoff: [33m'float'[39m = [32m2.0[39m,
    mm_switch_on: [33m'float'[39m = [32m5.0[39m,
    mm_cutoff: [33m'float'[39m = [32m1.0[39m,
)
[31mDocstring:[39m      Parameters for ML and MM cutoffs and switching functions
[31mInit docstring:[39m
Args:
    ml_cutoff: Distance where ML potential is cut off
    mm_switch_on: Distance where MM potential starts switching on
    mm_cutoff: Final cutoff for MM potential
[31mFile:[39m           ~/mmml/mmml/pycharmmInterface/mmml_calculator.py
[31mType:[39m           type
[31mSubclasses:[39m     

In [None]:
# System parameters (can be overridden by args)
ATOMS_PER_MONOMER = args.n_atoms_monomer
N_MONOMERS = args.n_monomers

In [None]:
# Load Data and Prepare Batches (following run_sim.py structure)

This cell loads the validation data and prepares batches that will be used to initialize simulations.
Note: The residue numbers in the PDB/PSF may need to be adjusted based on the actual system.

[31mSignature:[39m
setup_calculator(
    ATOMS_PER_MONOMER,
    N_MONOMERS: [33m'int'[39m = [32m2[39m,
    ml_cutoff_distance: [33m'float'[39m = [32m2.0[39m,
    mm_switch_on: [33m'float'[39m = [32m5.0[39m,
    mm_cutoff: [33m'float'[39m = [32m1.0[39m,
    doML: [33m'bool'[39m = [38;5;28;01mTrue[39;00m,
    doMM: [33m'bool'[39m = [38;5;28;01mTrue[39;00m,
    doML_dimer: [33m'bool'[39m = [38;5;28;01mTrue[39;00m,
    debug: [33m'bool'[39m = [38;5;28;01mFalse[39;00m,
    ep_scale=[38;5;28;01mNone[39;00m,
    sig_scale=[38;5;28;01mNone[39;00m,
    model_restart_path=[38;5;28;01mNone[39;00m,
    MAX_ATOMS_PER_SYSTEM=[32m100[39m,
    ml_energy_conversion_factor: [33m'float'[39m = [32m23.060548012069496[39m,
    ml_force_conversion_factor: [33m'float'[39m = [32m23.060548012069496[39m,
    cell=[38;5;28;01mFalse[39;00m,
    verbose: [33m'bool'[39m = [38;5;28;01mFalse[39;00m,
)
[31mDocstring:[39m <no docstring>
[31mFile:[39m      ~/mmm

In [None]:
# ========================================================================
# LOAD DATA AND PREPARE BATCHES (following run_sim.py structure)
# ========================================================================

# Initialize random key for data loading
if 'data_key' not in globals():
    data_key = jax.random.PRNGKey(42)

# Load datasets (assuming SCICORE and data file path are defined)
# Note: Adjust data file path as needed
if 'SCICORE' in globals():
    data_file = SCICORE / "mmml/mmml/data/fixed-acetone-only_MP2_21000.npz"
else:
    # Fallback: adjust path as needed
    data_file = Path("/scicore/home/meuwly/boitti0000/mmml/mmml/data/fixed-acetone-only_MP2_21000.npz")

print(f"Loading data from: {data_file}")

# Prepare datasets
train_data, valid_data = prepare_datasets(
    data_key, 
    10500,  # num_train
    10500,  # num_valid
    [data_file], 
    natoms=ATOMS_PER_MONOMER * N_MONOMERS
)

# Prepare batches for validation data (used to initialize simulations)
valid_batches = prepare_batches_jit(data_key, valid_data, 1, num_atoms=ATOMS_PER_MONOMER * N_MONOMERS)
train_batches = prepare_batches_jit(data_key, train_data, 1, num_atoms=ATOMS_PER_MONOMER * N_MONOMERS)

print(f"Loaded {len(valid_data['R'])} validation samples")
print(f"Prepared {len(valid_batches)} validation batches")
print(f"Each batch contains {len(valid_batches[0]['R'])} atoms")

ERROR:asyncio:Exception in callback Task.__step()
handle: <Handle Task.__step()>
Traceback (most recent call last):
  File "/scicore/home/meuwly/boitti0000/.conda/envs/mmml-full/lib/python3.12/asyncio/events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
RuntimeError: cannot enter context: <_contextvars.Context object at 0x14b09fda8340> is already entered
ERROR:asyncio:Exception in callback Task.__step()
handle: <Handle Task.__step()>
Traceback (most recent call last):
  File "/scicore/home/meuwly/boitti0000/.conda/envs/mmml-full/lib/python3.12/asyncio/events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
RuntimeError: cannot enter context: <_contextvars.Context object at 0x14b09fda8340> is already entered
ERROR:asyncio:Exception in callback Task.__step()
handle: <Handle Task.__step()>
Traceback (most recent call last):
  File "/scicore/home/meuwly/boitti0000/.conda/envs/mmml-full/lib/python3.12/asyncio/events.py", line 88, in _run
  

[setup_calculator] Cutoff inputs -> ml_cutoff_distance=2.0000, mm_switch_on=4.0000, mm_cutoff=1.0000
[setup_calculator] CutoffParameters stored -> ml_cutoff=2.0000, mm_switch_on=4.0000, mm_cutoff=1.0000
0 1
unique_res_ids [0, 1]
len(dimer_perms) 1


ERROR:asyncio:Task was destroyed but it is pending!
task: <Task pending name='Task-868' coro=<_async_in_context.<locals>.run_in_context() done, defined at /scicore/home/meuwly/boitti0000/.local/lib/python3.12/site-packages/ipykernel/utils.py:57> wait_for=<Task pending name='Task-890' coro=<Kernel.shell_main() running at /scicore/home/meuwly/boitti0000/.local/lib/python3.12/site-packages/ipykernel/kernelbase.py:590> cb=[Task.__wakeup()]> cb=[ZMQStream._run_callback.<locals>._log_error() at /scicore/home/meuwly/boitti0000/.local/lib/python3.12/site-packages/zmq/eventloop/zmqstream.py:563]>
  def __init__(self, callback, args, loop, context=None):
ERROR:asyncio:Task was destroyed but it is pending!
task: <Task pending name='Task-890' coro=<Kernel.shell_main() running at /scicore/home/meuwly/boitti0000/.local/lib/python3.12/site-packages/ipykernel/kernelbase.py:590> cb=[Task.__wakeup()]>
ERROR:asyncio:Task was destroyed but it is pending!
task: <Task pending name='Task-891' coro=<_async_in

dict_keys(['opt_state', 'params', 'step'])


In [None]:
# Additional utility imports (if needed)
from ase.visualize.plot import plot_atoms

In [None]:
# Additional PyCHARMM imports (already imported in cell 3, but kept for reference)
from mmml.pycharmmInterface import setupRes, setupBox

CHARMM_HOME:  /scicore/home/meuwly/boitti0000/mmml/setup/charmm
CHARMM_LIB_DIR:  /scicore/home/meuwly/boitti0000/mmml/setup/charmm
/scicore/home/meuwly/boitti0000/mmml/setup/charmm
/scicore/home/meuwly/boitti0000/mmml/setup/charmm
['O', 'H', 'H']
['O', 'H', 'H']
[8, 1, 1]
['C', 'H', 'H', 'O', 'H', 'C', 'H', 'H', 'C', 'H', 'H', 'C', 'H', 'H', 'C', 'H', 'H', 'C', 'H', 'H', 'C', 'H', 'H', 'C', 'H', 'H', 'H']
['C', 'H', 'H', 'O', 'H', 'C', 'H', 'H', 'C', 'H', 'H', 'C', 'H', 'H', 'C', 'H', 'H', 'C', 'H', 'H', 'C', 'H', 'H', 'C', 'H', 'H', 'H']
[6, 1, 1, 8, 1, 6, 1, 1, 6, 1, 1, 6, 1, 1, 6, 1, 1, 6, 1, 1, 6, 1, 1, 6, 1, 1, 1]


In [None]:
# This cell is now handled in cell 15 (Setup PyCHARMM Box)
# Keeping this cell empty for now - can be removed if not needed

  
 CHARMM>     DELETE ATOM SELE ALL END
  
  
 CHARMM>     DELETE PSF SELE ALL END
  
  
 CHARMM>     BLOCK
  
  BLOCK>            CALL 1 SELE ALL END
 SELRPN>      0 atoms have been selected out of      0
 The selected atoms have been reassigned to block   1
  
  BLOCK>              COEFF 1 1 1.0
  
  BLOCK>            END
 Matrix of Interaction Coefficients
 
    1.00000
    1.00000   1.00000
    1.00000   1.00000   1.00000
 Matrix of BOND Interaction Coefficients
 
    1.00000
    1.00000   1.00000
    1.00000   1.00000   1.00000
 Matrix of ANGLE Interaction Coefficients
 
    1.00000
    1.00000   1.00000
    1.00000   1.00000   1.00000
 Matrix of DIHE Interaction Coefficients
 
    1.00000
    1.00000   1.00000
    1.00000   1.00000   1.00000
 Matrix of CROSS Interaction Coefficients
 
    1.00000
    1.00000   1.00000
    1.00000   1.00000   1.00000
 Matrix of ELEC Interaction Coefficients
 
    1.00000
    1.00000   1.00000
    1.00000   1.00000   1.00000
 Matrix of VDW Interac

In [None]:
# ========================================================================
# LOAD MODEL AND SETUP CALCULATOR (following run_sim.py structure)
# ========================================================================

# Resolve checkpoint paths
if args.checkpoint is not None:
    base_ckpt_dir, epoch_dir = resolve_checkpoint_paths(args.checkpoint)
    print(f"Checkpoint base dir: {base_ckpt_dir}")
    print(f"Checkpoint epoch dir: {epoch_dir}")
else:
    # Fallback if RESTART is defined
    if 'RESTART' in globals():
        base_ckpt_dir = Path(RESTART)
        epoch_dir = base_ckpt_dir
    else:
        raise ValueError("Checkpoint path must be provided via args.checkpoint or RESTART variable")

# Load model parameters
natoms = ATOMS_PER_MONOMER * N_MONOMERS
params, model = load_model_parameters(epoch_dir, natoms)
model.natoms = natoms
print(f"Model loaded: {model}")

# Setup calculator factory (following run_sim.py)
calculator_factory = setup_calculator(
    ATOMS_PER_MONOMER=args.n_atoms_monomer,
    N_MONOMERS=args.n_monomers,
    ml_cutoff_distance=args.ml_cutoff,
    mm_switch_on=args.mm_switch_on,
    mm_cutoff=args.mm_cutoff,
    doML=True,
    doMM=args.include_mm,
    doML_dimer=not args.skip_ml_dimers,
    debug=args.debug,
    model_restart_path=base_ckpt_dir,
    MAX_ATOMS_PER_SYSTEM=natoms,
    ml_energy_conversion_factor=1,
    ml_force_conversion_factor=1,
    cell=args.cell,
)

# Create cutoff parameters
CUTOFF_PARAMS = CutoffParameters(
    ml_cutoff=args.ml_cutoff,
    mm_switch_on=args.mm_switch_on,
    mm_cutoff=args.mm_cutoff,
)
print(f"Cutoff parameters: {CUTOFF_PARAMS}")

dataR (21000, 20, 3)
dataE [-81.79712432 -81.48244884 -81.38548297 -81.44645775 -81.74704898
 -81.67295344 -81.32876002 -81.82201676 -81.8124061  -81.80508929]
dataE [-81.79712432 -81.48244884 -81.38548297 -81.44645775 -81.74704898
 -81.67295344 -81.32876002 -81.82201676 -81.8124061  -81.80508929]
D (21000, 3)
Q 1 (21000,) 21000
Q (21000,)


ERROR:asyncio:Task was destroyed but it is pending!
task: <Task pending name='Task-833' coro=<_async_in_context.<locals>.run_in_context() running at /scicore/home/meuwly/boitti0000/.local/lib/python3.12/site-packages/ipykernel/utils.py:60> wait_for=<Task pending name='Task-835' coro=<Kernel.shell_main() running at /scicore/home/meuwly/boitti0000/.local/lib/python3.12/site-packages/ipykernel/kernelbase.py:590> cb=[Task.__wakeup()]> cb=[ZMQStream._run_callback.<locals>._log_error() at /scicore/home/meuwly/boitti0000/.local/lib/python3.12/site-packages/zmq/eventloop/zmqstream.py:563]>
  outs = fun(*args)
ERROR:asyncio:Task was destroyed but it is pending!
task: <Task pending name='Task-835' coro=<Kernel.shell_main() running at /scicore/home/meuwly/boitti0000/.local/lib/python3.12/site-packages/ipykernel/kernelbase.py:590> cb=[Task.__wakeup()]>
ERROR:asyncio:Task was destroyed but it is pending!
task: <Task pending name='Task-858' coro=<Kernel.shell_main() running at /scicore/home/meuwly/b

In [None]:
# Initialize Simulations from valid_data Batches

This section initializes simulations using positions and atomic numbers from `valid_data` batches.
Each batch can be used to create an ASE Atoms object and run a simulation.

  
 CHARMM>     DELETE ATOM SELE ALL END
  
  
 CHARMM>     DELETE PSF SELE ALL END
  
  
 CHARMM>     read rtf card -
 CHARMM>     name /scicore/home/meuwly/boitti0000/mmml/mmml/data/top_all36_cgenff.rtf
 VOPEN> Attempting to open::/scicore/home/meuwly/boitti0000/mmml/mmml/data/top_all36_cgenff.rtf::
 MAINIO> Residue topology file being read from unit  91.
 TITLE> *  --------------------------------------------------------------------------  *
 TITLE> *          CGENFF: TOPOLOGY FOR THE CHARMM GENERAL FORCE FIELD V. 4.6          *
 TITLE> *                    FOR SMALL MOLECULE DRUG DESIGN                            *
 TITLE> *  --------------------------------------------------------------------------  *
 TITLE> *

 DRUDES PARTICLES WILL BE GENERATED AUTOMATICALLY FOR ALL ATOMS WITH NON-ZERO ALPHA
 Thole-type dipole screening, Slater-Delta shape {S(u) = 1 - (1+u/2)*exp(-u)}, default radius =  1.300000
 VCLOSE: Closing unit   91 with status "KEEP"
  
 CHARMM>     
  
  
 CHARMM>     r

In [None]:
# ========================================================================
# INITIALIZE SIMULATIONS FROM VALID_DATA BATCHES
# ========================================================================
# Following run_sim.py structure, we'll initialize simulations using valid_data batches

def initialize_simulation_from_batch(batch_idx=0):
    """
    Initialize a simulation from a valid_data batch.
    
    Args:
        batch_idx: Index of the batch to use (default: 0)
    
    Returns:
        atoms: ASE Atoms object initialized from the batch
        hybrid_calc: Hybrid calculator for the system
    """
    # Get positions and atomic numbers from batch
    R = valid_batches[batch_idx]["R"]
    Z = valid_batches[batch_idx]["Z"]
    
    # Extract the first configuration from the batch
    # Note: batches may contain multiple configurations
    if R.ndim == 3:
        # Batch shape: (batch_size, n_atoms, 3)
        R = R[0]
        Z = Z[0]
    elif R.ndim == 2:
        # Already flattened: (n_atoms, 3)
        pass
    else:
        raise ValueError(f"Unexpected R shape: {R.shape}")
    
    # Ensure we have the right number of atoms
    n_atoms_expected = ATOMS_PER_MONOMER * N_MONOMERS
    if len(R) != n_atoms_expected:
        print(f"Warning: Expected {n_atoms_expected} atoms, got {len(R)}")
        R = R[:n_atoms_expected]
        Z = Z[:n_atoms_expected]
    
    print(f"Initializing simulation from batch {batch_idx}")
    print(f"  Positions shape: {R.shape}")
    print(f"  Atomic numbers shape: {Z.shape}")
    print(f"  Number of atoms: {len(R)}")
    
    # Create ASE Atoms object
    atoms = ase.Atoms(Z, R)
    
    # Create hybrid calculator (following run_sim.py)
    hybrid_calc, _ = calculator_factory(
        atomic_numbers=Z,
        atomic_positions=R,
        n_monomers=args.n_monomers,
        cutoff_params=CUTOFF_PARAMS,
        doML=True,
        doMM=args.include_mm,
        doML_dimer=not args.skip_ml_dimers,
        backprop=True,
        debug=args.debug,
        energy_conversion_factor=1,
        force_conversion_factor=1,
    )
    
    atoms.calc = hybrid_calc
    
    # Get initial energy and forces
    hybrid_energy = float(atoms.get_potential_energy())
    hybrid_forces = np.asarray(atoms.get_forces())
    print(f"Initial energy: {hybrid_energy:.6f} eV")
    print(f"Initial forces shape: {hybrid_forces.shape}")
    print(f"Max force: {np.abs(hybrid_forces).max():.6f} eV/Å")
    
    return atoms, hybrid_calc

# Initialize first simulation from batch 0
atoms, hybrid_calc = initialize_simulation_from_batch(batch_idx=0)

In [None]:
# Setup PyCHARMM Box (if needed)

Note: Residue numbers may need to be adjusted based on your system.
The setupBox.initialize_psf function may need different residue names/numbers.


 NONBOND OPTION FLAGS: 
     ELEC     VDW      ATOMs    CDIElec  FSHIft   VATOm    VFSWIt  
     BYGRoup  NOEXtnd  NOEWald 
 CUTNB  = 14.000 CTEXNB =999.000 CTONNB = 10.000 CTOFNB = 12.000
 CGONNB =  0.000 CGOFNB = 10.000
 WMIN   =  1.500 WRNMXD =  0.500 E14FAC =  1.000 EPS    =  1.000
 NBXMOD =      5
 There are        0 atom  pairs and        0 atom  exclusions.
 There are        0 group pairs and        0 group exclusions.
 <MAKINB> with mode   5 found   1200 exclusions and    600 interactions(1-4)
 <MAKGRP> found      0 group exclusions.
ENER ENR:  Eval#     ENERgy      Delta-E         GRMS
 ----------       ---------    ---------    ---------    ---------    ---------
ENER>        0     -0.00000      0.00000      0.00000
 ----------       ---------    ---------    ---------    ---------    ---------


In [None]:
# ========================================================================
# SETUP Pycharmm BOX (following run_sim.py structure)
# ========================================================================
# Note: Residue numbers and names may need to be adjusted based on your system

# Clear CHARMM state
CLEAR_CHARMM()
reset_block()

# Setup box (adjust residue name "ACO" and numbers as needed)
# The residue name should match your system (e.g., "ACO" for acetone)
# The numbers (2, 30) may need adjustment based on your system
try:
    setupBox.initialize_psf("ACO", N_MONOMERS, 30, None)
    print("PSF initialized successfully")
except Exception as e:
    print(f"Warning: Could not initialize PSF: {e}")
    print("You may need to adjust residue names/numbers or create a PDB file first")

# View PyCHARMM state
mmml.pycharmmInterface.import_pycharmm.view_pycharmm_state()

In [None]:
# Initialize Multiple Simulations from valid_data Batches

This cell demonstrates how to initialize multiple simulations from different batches.
Each simulation can be run independently.

In [None]:
# ========================================================================
# INITIALIZE MULTIPLE SIMULATIONS FROM VALID_DATA BATCHES
# ========================================================================
# Following run_sim.py structure, we can initialize multiple simulations

def initialize_multiple_simulations(n_simulations=5):
    """
    Initialize multiple simulations from different valid_data batches.
    
    Args:
        n_simulations: Number of simulations to initialize (default: 5)
    
    Returns:
        List of (atoms, hybrid_calc) tuples
    """
    simulations = []
    n_batches = len(valid_batches)
    
    for i in range(min(n_simulations, n_batches)):
        try:
            atoms, calc = initialize_simulation_from_batch(batch_idx=i)
            simulations.append((atoms, calc))
            print(f"Successfully initialized simulation {i+1}/{n_simulations}")
        except Exception as e:
            print(f"Warning: Failed to initialize simulation from batch {i}: {e}")
            continue
    
    return simulations

# Initialize multiple simulations
# Adjust n_simulations as needed
simulations = initialize_multiple_simulations(n_simulations=5)
print(f"\nInitialized {len(simulations)} simulations from valid_data batches")

In [None]:
# Example: Run a Simple Energy Calculation

This demonstrates how to use the initialized simulations.

In [None]:
# ========================================================================
# EXAMPLE: RUN ENERGY CALCULATIONS
# ========================================================================

# Example: Calculate energy for the first simulation
if len(simulations) > 0:
    atoms_example, calc_example = simulations[0]
    energy = atoms_example.get_potential_energy()
    forces = atoms_example.get_forces()
    print(f"Example simulation energy: {energy:.6f} eV")
    print(f"Example simulation forces shape: {forces.shape}")
    print(f"Max force magnitude: {np.abs(forces).max():.6f} eV/Å")
else:
    print("No simulations initialized. Check batch data and system parameters.")

  
 CHARMM>     BLOCK
  
  BLOCK>            CALL 1 SELE ALL END
 SELRPN>    500 atoms have been selected out of    500
 The selected atoms have been reassigned to block   1
  
  BLOCK>              COEFF 1 1 1.0
  
  BLOCK>            END
 Matrix of Interaction Coefficients
 
    1.00000
    1.00000   1.00000
    1.00000   1.00000   1.00000
 Matrix of BOND Interaction Coefficients
 
    1.00000
    1.00000   1.00000
    1.00000   1.00000   1.00000
 Matrix of ANGLE Interaction Coefficients
 
    1.00000
    1.00000   1.00000
    1.00000   1.00000   1.00000
 Matrix of DIHE Interaction Coefficients
 
    1.00000
    1.00000   1.00000
    1.00000   1.00000   1.00000
 Matrix of CROSS Interaction Coefficients
 
    1.00000
    1.00000   1.00000
    1.00000   1.00000   1.00000
 Matrix of ELEC Interaction Coefficients
 
    1.00000
    1.00000   1.00000
    1.00000   1.00000   1.00000
 Matrix of VDW Interaction Coefficients
 
    1.00000
    1.00000   1.00000
    1.00000   1.00000   1.00000
 

Array gpu:0 -2.325e+04

In [None]:
# Next Steps: Running MD Simulations

To run MD simulations following `run_sim.py`, you can:
1. Use the `minimize_structure` function from run_sim.py
2. Use the `run_ase_md` function for ASE-based MD
3. Use JAX-MD for more advanced simulations

See `run_sim.py` for complete MD simulation setup.

[31mSignature:[39m
calculator_factory(
    atomic_numbers: [33m'Array'[39m,
    atomic_positions: [33m'Array'[39m,
    n_monomers: [33m'int'[39m,
    cutoff_params: [33m'CutoffParameters'[39m = [38;5;28;01mNone[39;00m,
    doML: [33m'bool'[39m = [38;5;28;01mTrue[39;00m,
    doMM: [33m'bool'[39m = [38;5;28;01mTrue[39;00m,
    doML_dimer: [33m'bool'[39m = [38;5;28;01mTrue[39;00m,
    backprop: [33m'bool'[39m = [38;5;28;01mFalse[39;00m,
    debug: [33m'bool'[39m = [38;5;28;01mFalse[39;00m,
    energy_conversion_factor: [33m'float'[39m = [32m1.0[39m,
    force_conversion_factor: [33m'float'[39m = [32m1.0[39m,
    verbose: [33m'bool'[39m = [38;5;28;01mNone[39;00m,
) -> [33m'Tuple[AseDimerCalculator, Callable]'[39m
[31mDocstring:[39m
Factory function to create calculator instances

Args:
    verbose: If True, store full ModelOutput breakdown in results.
             If None, defaults to debug value.
[31mFile:[39m      ~/mmml/mmml/pycharmmInte

In [None]:
# ========================================================================
# HELPER FUNCTIONS (from run_sim.py)
# ========================================================================
# These functions can be copied from run_sim.py for running MD simulations

def minimize_structure(atoms, run_index=0, nsteps=60, fmax=0.0006, charmm=False):
    """Minimize structure using BFGS optimizer (from run_sim.py)"""
    if charmm:
        pycharmm.minimize.run_abnr(nstep=1000, tolenr=1e-6, tolgrd=1e-6)
        pycharmm.lingo.charmm_script("ENER")
        pycharmm.energy.show()
        atoms.set_positions(coor.get_positions())

    traj = ase_io.Trajectory(f'bfgs_{run_index}_{args.output_prefix}_minimized.traj', 'w')
    print("Minimizing structure with hybrid calculator")
    print(f"Running BFGS for {nsteps} steps")
    print(f"Running BFGS with fmax: {fmax}")
    _ = ase_opt.BFGS(atoms, trajectory=traj).run(fmax=fmax, steps=nsteps)
    # Sync with PyCHARMM
    import pandas as pd
    xyz = pd.DataFrame(atoms.get_positions(), columns=["x", "y", "z"])
    coor.set_positions(xyz)
    traj.write(atoms)
    traj.close()
    return atoms

# Example: Minimize the first simulation
if len(simulations) > 0:
    atoms_to_minimize = simulations[0][0].copy()
    print("Running minimization...")
    # Uncomment to run minimization:
    # atoms_minimized = minimize_structure(atoms_to_minimize, run_index=0, nsteps=100, fmax=0.0006)


# Notes on Residue Numbers

When setting up PyCHARMM simulations:
- Residue names (e.g., "ACO" for acetone) must match your system
- Residue numbers in `setupBox.initialize_psf()` may need adjustment
- The number of residues should match `N_MONOMERS`
- If using a PDB file, ensure residue numbers are correct

To adjust:
1. Check your system's residue names in the topology file
2. Update `setupBox.initialize_psf()` call with correct residue name
3. Ensure residue numbers match the number of monomers


In [None]:
# ========================================================================
# SUMMARY
# ========================================================================
print("=" * 60)
print("Simulation Setup Complete")
print("=" * 60)
print(f"Number of simulations initialized: {len(simulations)}")
print(f"Number of atoms per simulation: {ATOMS_PER_MONOMER * N_MONOMERS}")
print(f"Number of monomers: {N_MONOMERS}")
print(f"Atoms per monomer: {ATOMS_PER_MONOMER}")
print(f"ML cutoff: {args.ml_cutoff} Å")
print(f"MM switch on: {args.mm_switch_on} Å")
print(f"MM cutoff: {args.mm_cutoff} Å")
print(f"Valid data batches available: {len(valid_batches)}")
print("=" * 60)
print("\nTo run MD simulations, use the helper functions or refer to run_sim.py")
print("Note: Residue numbers may need adjustment based on your system")


In [None]:
energy.show()


 NONBOND OPTION FLAGS: 
     ELEC     VDW      ATOMs    CDIElec  FSHIft   VATOm    VFSWIt  
     BYGRoup  NOEXtnd  NOEWald 
 CUTNB  = 14.000 CTEXNB =999.000 CTONNB = 10.000 CTOFNB = 12.000
 CGONNB =  0.000 CGOFNB = 10.000
 WMIN   =  1.500 WRNMXD =  0.500 E14FAC =  1.000 EPS    =  1.000
 NBXMOD =      5
 There are        0 atom  pairs and        0 atom  exclusions.
 There are        0 group pairs and        0 group exclusions.
 <MAKINB> with mode   5 found   1200 exclusions and    600 interactions(1-4)
 <MAKGRP> found      0 group exclusions.
ENER ENR:  Eval#     ENERgy      Delta-E         GRMS
 ----------       ---------    ---------    ---------    ---------    ---------
ENER>        0     -0.00000      0.00000      0.00000
 ----------       ---------    ---------    ---------    ---------    ---------


In [None]:
R = valid_batches[0]["R"]
Z = valid_batches[0]["Z"]
R,Z