In [1]:
import warnings
from crimm import fetch_rcsb
from crimm.StructEntities.OrganizedModel import OrganizedModel
from crimm.Modeller.Solvator import Solvator

from crimm.Fetchers import fetch_rcsb
from crimm.Modeller import TopologyGenerator
from crimm.Modeller.CoordManipulator import CoordManipulator
from crimm.Modeller.LoopBuilder import ChainLoopBuilder
from crimm.Adaptors.PropKaAdaptors import PropKaProtonator
from crimm.Utils.StructureUtils import get_coords

# #NEW: PSF/CRD native I/O - read and write CHARMM files without pyCHARMM
from crimm.IO import read_psf, write_psf, write_crd, CRDParser

import pycharmm
from pycharmm.settings import set_verbosity as pcm_set_verbosity
from pycharmm import write as pcm_write
from pycharmm import NonBondedScript

from crimm.Adaptors.pyCHARMMAdaptors import (
    load_model,  # NEW: Unified loading function (recommended)
    load_chain, load_topology, load_water, load_ions, load_ligands, load_solvent_toppar,
    create_water_hs_from_charmm, fetch_coords_from_charmm, patch_disu_from_model,
    sd_minimize, get_charmm_coord_dict
)

import pycharmm.minimize as minimize
import pycharmm.energy as energy
from pycharmm import coor, crystal, image, cons_harm, cons_fix, generate



# Structure Preparation with crimm and pyCHARMM

This tutorial demonstrates how to prepare a molecular structure for CHARMM simulations using `crimm` and `pyCHARMM`. 

## Workflow Overview

The tutorial covers two main pathways:

### Pathway 1: Start from PDB/mmCIF (Default)
1. **Fetch structure** from RCSB PDB
2. **Organize** into `OrganizedModel` (protein, ligand, solvent, ions)
3. **Build missing loops** using AlphaFold templates
4. **Generate topology** with CHARMM force field parameters
5. **Apply protonation** patches based on pH
6. **Load into pyCHARMM** for minimization and solvation
7. **Write output** as PSF/CRD files

### Pathway 2: Start from Existing PSF/CRD (#NEW)
If you already have PSF/CRD files from a previous session, you can load them directly using crimm's native readers.

## Requirements
- `crimm` with topology generation support
- `pyCHARMM` for CHARMM integration
- (Optional) CGenFF executable for ligand parameterization

In [2]:
# cgenff excutable path is used later in topology generation
CGENFF_PATH = "/export/app/cgenff/silcsbio.2024.1/cgenff/cgenff"
PDBID = '4pti' #'5iev'#'1bg8' #'3q4k' #'4pti' #'2HZI' 

## Fetch from RCSB

The fetch_rcsb has be updated that it takes argument `organize`. When it is `True`, the structure will be organized into chain types, and an `OrganizedModel` will be returned instead of the unorganized structure entity.

In [3]:
structure = fetch_rcsb(
    PDBID,
    include_solvent=True, # We want to incude crystallographic water
    use_bio_assembly=True,
    organize=False,
    first_model_only=False
)

## Alternative: Load from Existing PSF/CRD Files (#NEW)

If you already have PSF and CRD files from a previous simulation or another source, you can skip the 
fetch/build steps and load directly using crimm's native readers. This is useful when:
- Resuming work from a previous session
- Loading structures prepared with standalone CHARMM
- Working with pre-parameterized systems

**Note**: The `read_psf` function returns a `PSFData` container with atoms, bonds, angles, etc.
The `CRDParser` returns coordinate data that can be applied to your structure.

In [4]:
# #NEW: Example of loading from existing PSF/CRD files
# Uncomment and modify paths to use this alternative workflow

# PSF_FILE = 'your_system.psf'
# CRD_FILE = 'your_system.crd'

# # Read PSF file - returns PSFData container with topology information
# psf_data = read_psf(PSF_FILE)
# print(f"Loaded {len(psf_data.atoms)} atoms from PSF")
# print(f"Bonds: {len(psf_data.bonds)}, Angles: {len(psf_data.angles)}, Dihedrals: {len(psf_data.dihedrals)}")

# # Read CRD file - returns coordinate information
# crd_parser = CRDParser(CRD_FILE)
# coords = crd_parser.parse()
# print(f"Loaded coordinates for {len(coords)} atoms")

In [5]:
## the OrganinzedModel is improved with more feature and APIs
## and has become the main object that deals with modeling and interfacing pyCHARMM
## There will be another notebook showcasing more about OrganizedModel

model = OrganizedModel(structure)
model

NGLWidget()

<OrganizedModel model=4PTI Polypeptide(L)=1 Solvent=1 >
	│
	├───<Polypeptide(L) id=A Residues=58>
	├──────Description: TRYPSIN INHIBITOR
	│
	├───<Solvent id=B Molecules=60>
	├──────Residue ID(s): HOH
	├──────Description: water


In [6]:
## Place the model center to (0, 0, 0) and place the principle axis along x-axis
coord_man = CoordManipulator()
coord_man.load_entity(model)
coord_man.orient_coords()

In [7]:
# build missing loops if exist
for chain in model.protein:
    if not chain.is_continuous():
        # chain can be built in place now by specifying `inplace = True`
        looper = ChainLoopBuilder(chain, inplace = True)
        # looper.build_from_homology(max_num_match=10, identity_score_cutoff=0.95)
        # missing terminals will also be built if `include_terminal = True`
        looper.build_from_alphafold(include_terminal = False)

In [8]:
chain.is_continuous()

True

## Generate Topology

Topology generation is simplified by using organized model. If `cgenff_path` is specified, ligands are also generated. Missing hydrogens from water molecules will be added.

In [9]:
TopologyGenerator?

[0;31mInit signature:[0m [0mTopologyGenerator[0m[0;34m([0m[0mcgenff_excutable_path[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mcgenff_output_path[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Class for generating topology elements from the topology definition
and parameters.
The topology definition the parameters are loaded from the CHARMM 36 RTF and 
PRM files.
If the cgenff_excutable_path is provided, the topology definition and 
parameters for the heterogen residues are generated by cgenff, and ligand mol2
file and cgenff rtf file will be saved if cgenff_output_path is specified.
[0;31mInit docstring:[0m Initialize the topology generator.
[0;31mFile:[0m           ~/crimm/crimm/Modeller/TopoLoader.py
[0;31mType:[0m           type
[0;31mSubclasses:[0m     

In [10]:
topo = TopologyGenerator(
    cgenff_excutable_path=CGENFF_PATH,
    cgenff_output_path='./cgenff/'
)
topo.generate_model(
    model,
    prot_first_patch='ACE',
    prot_last_patch='CT3',
    coerce=True
)



### Printing out the TOPPAR and their Versions Being Used and Loaded

In [11]:
for rtf_type, topo_loader in topo.res_def_dict.items():
    print(rtf_type, 'toppar version:', topo_loader.rtf_version)

cgenff toppar version: 36.1
protein toppar version: 36.2
water_ions toppar version: 31.1


In [12]:
TopologyGenerator?

[0;31mInit signature:[0m [0mTopologyGenerator[0m[0;34m([0m[0mcgenff_excutable_path[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mcgenff_output_path[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Class for generating topology elements from the topology definition
and parameters.
The topology definition the parameters are loaded from the CHARMM 36 RTF and 
PRM files.
If the cgenff_excutable_path is provided, the topology definition and 
parameters for the heterogen residues are generated by cgenff, and ligand mol2
file and cgenff rtf file will be saved if cgenff_output_path is specified.
[0;31mInit docstring:[0m Initialize the topology generator.
[0;31mFile:[0m           ~/crimm/crimm/Modeller/TopoLoader.py
[0;31mType:[0m           type
[0;31mSubclasses:[0m     

Modified residue creates breaks in chain after coersion

## Protonation
For the purpose of illustration, we make it really acidic so it protonates

In [13]:
protonator = PropKaProtonator(topo, pH = 4)
protonator.load_model(model)
# if there is any pathching applied in crimm, CHARMM PATCH command will be automatically run 
# when protein chains are loaded into CHARMM
protonator.apply_patches()

Unexpected number (14) of atoms in residue ARG   1 A   in conformation 1A
Unexpected number (7) of atoms in residue ALA  58 A   in conformation 1A


Protonation patches applied on chain A:
{7: 'GLUP', 49: 'GLUP'}


## Load Model into pyCHARMM

### Option 1: Unified Loading with `load_model()` (Recommended)

**New in crimm 2026.1**: The `load_model()` function provides a simplified one-call approach to load your entire model into pyCHARMM. This uses the native PSF/CRD format internally, which is more reliable and preserves all topology information including disulfide bonds.

```python
load_model(model)  # Loads topology params + entire model via PSF/CRD
```

This single call:
- Loads all topology parameters (RTF/PRM files) automatically
- Loads protein chains, ligands, water, and ions via PSF/CRD format
- Preserves disulfide bonds without needing separate `patch_disu_from_model()` call
- Handles lone pairs for CGENFF ligands automatically

In [14]:
# NEW: Load entire model in one call (recommended approach)
# This loads topology params AND structure via PSF/CRD format
# when separate_crystal_segid is True, water and ions from crystal structure
# will be assigned segid of "CRTW" and "CRTI". (False by default)
load_model(model, separate_crystal_segids=True)

  
 CHARMM>     read rtf card -
 CHARMM>     name /tmp/tmp9pjic7yb
 VOPEN> Attempting to open::/tmp/tmp9pjic7yb::
 MAINIO> Residue topology file being read from unit  91.
 TITLE> * PROTEIN RTF LOADED FROM CRIMM
 TITLE> 36  2
 VCLOSE: Closing unit   91 with status "KEEP"
  
 CHARMM>     
  
  
 CHARMM>     read param card -
 CHARMM>     name /tmp/tmpt7ohcead -
 CHARMM>     flex
 VOPEN> Attempting to open::/tmp/tmpt7ohcead::

          PARAMETER FILE BEING READ FROM UNIT 91
 TITLE> * PROTEIN PRM LOADED FROM CRIMM
 TITLE> *>>>> CHARMM36 ALL-HYDROGEN PARAMETER FILE FOR PROTEINS <<<<<<<<<<
 TITLE> *>>>>> INCLUDES PHI, PSI CROSS TERM MAP (CMAP) CORRECTION <<<<<<<<
 TITLE> *>>>>>>>>>>>>>>>>>>>>>>>>>> JAN. 2016 <<<<<<<<<<<<<<<<<<<<<<<<<<<<
 TITLE> * ALL COMMENTS TO THE CHARMM WEB SITE: WWW.CHARMM.ORG
 TITLE> *             PARAMETER SET DISCUSSION FORUM
 TITLE> *
 PARMIO> NONBOND, HBOND lists and IMAGE atoms cleared.
 VCLOSE: Closing unit   91 with status "KEEP"
  
 CHARMM>     
  
  
 CHARMM> 

### Option 2: Sequential Loading (Legacy/Advanced)

**Note**: The following sequential loading approach is preserved for backwards compatibility and for users who need fine-grained control over the loading process. In previous versions, this was the only way to load structures into pyCHARMM.

If you used `load_model()` above, **skip this section** and proceed to "Minimize the Protein Chain First".

The sequential approach requires:
1. Loading topology parameters separately
2. Loading each component type individually
3. Manually patching disulfide bonds

In [15]:
# LEGACY APPROACH: Load topology separately
# Skip this if you used load_model() above

## All the topology definition and parameter generated for the model is 
## organized in model.topology_loader. load_topology() takes care of 
## loading sequence and only loads what is need for the model
load_topology(model.topology_loader)

  
 CHARMM>     read rtf card -
 CHARMM>     name /tmp/tmpg0gx6h4_
 VOPEN> Attempting to open::/tmp/tmpg0gx6h4_::
 MAINIO> Residue topology file being read from unit  91.
 TITLE> * PROTEIN RTF LOADED FROM CRIMM
 TITLE> 36  2
 VCLOSE: Closing unit   91 with status "KEEP"
  
 CHARMM>     
  
  
 CHARMM>     read param card -
 CHARMM>     name /tmp/tmpi3xbw25o -
 CHARMM>     flex
 VOPEN> Attempting to open::/tmp/tmpi3xbw25o::

          PARAMETER FILE BEING READ FROM UNIT 91
 TITLE> * PROTEIN PRM LOADED FROM CRIMM
 TITLE> *>>>> CHARMM36 ALL-HYDROGEN PARAMETER FILE FOR PROTEINS <<<<<<<<<<
 TITLE> *>>>>> INCLUDES PHI, PSI CROSS TERM MAP (CMAP) CORRECTION <<<<<<<<
 TITLE> *>>>>>>>>>>>>>>>>>>>>>>>>>> JAN. 2016 <<<<<<<<<<<<<<<<<<<<<<<<<<<<
 TITLE> * ALL COMMENTS TO THE CHARMM WEB SITE: WWW.CHARMM.ORG
 TITLE> *             PARAMETER SET DISCUSSION FORUM
 TITLE> *
 PARMIO> NONBOND, HBOND lists and IMAGE atoms cleared.
 VCLOSE: Closing unit   91 with status "KEEP"
  
 CHARMM>     
  
  
 CHARMM> 

In [None]:
# LEGACY APPROACH: Load protein chains sequentially
# Skip this if you used load_model() above

for chain in model.protein:
    load_chain(chain)
# In legacy mode, we need to patch disulfide bonds in CHARMM manually
# (load_model() handles this automatically via PSF)
patch_disu_from_model(model)

In [None]:
# LEGACY APPROACH: Load ligands separately  
# Skip this if you used load_model() above

# model.ligand+model.phos_ligand+model.co_solvent is the concatenated list of entities
load_ligands(model.ligand+model.phos_ligand+model.co_solvent)

## Minimize the Protein and Crystal Water First

In [15]:
# Specify nonbonded python object called my_nbonds - this just sets it up
# equivalant CHARMM scripting command: 
# nbonds cutnb 18 ctonnb 13 ctofnb 17 cdie eps 1 atom vatom fswitch vfswitch
non_bonded_script = NonBondedScript(
    cutnb=18.0, ctonnb=13.0, ctofnb=17.0,
    eps=1.0,
    cdie=True,
    atom=True, vatom=True,
    fswitch=True, vfswitch=True
)

# select the heavy atoms for harmonic restraints
harmonic_restraint_atoms = ~pycharmm.SelectAtoms(hydrogens=True)
status = pycharmm.cons_harm.setup_absolute(
    selection=harmonic_restraint_atoms,
    force_const=80
)

pycharmm.minimize.run_sd(nstep=500, tolenr=1e-3, tolgrd=1e-3)
pycharmm.cons_harm.turn_off()

 CSTRAN: Harmonic Restraints
          ABSOlute type as set number  1.  Number of selected atoms:    518
          Reference coordinates set to main coordinates.
          Mass weighting will NOT be used for new restraints.
          The force constant of      80.00000 will be used.
          An exponent of  2 will be used.
          The XYZ scale factors are:       1.00000       1.00000       1.00000
          A total of    518 atoms are restrained.

 NONBOND OPTION FLAGS: 
     ELEC     VDW      ATOMs    CDIElec  FSHIft   VATOm    VFSWIt  
     BYGRoup  NOEXtnd  NOEWald 
 CUTNB  = 14.000 CTEXNB =999.000 CTONNB = 10.000 CTOFNB = 12.000
 CGONNB =  0.000 CGOFNB = 10.000
 WMIN   =  1.500 WRNMXD =  0.500 E14FAC =  1.000 EPS    =  1.000
 NBXMOD =      5
 There are        0 atom  pairs and        0 atom  exclusions.
 There are        0 group pairs and        0 group exclusions.
 <MAKINB> with mode   5 found   2740 exclusions and   2368 interactions(1-4)
 <MAKGRP> found   1003 group exclusio

1

## Sync Coord with pyCHARMM
We need to update the coords of crimm protein after minimization

In [16]:
## This is the new API for crimm sync coordinates with CHARMM
## The old sync_coord only works in a limited number of situations thus is DEPRECATED
fetch_coords_from_charmm(model)

In [17]:
model

NGLWidget()

<OrganizedModel model=4PTI Polypeptide(L)=1 Solvent=1 >
	│
	├───<Polypeptide(L) id=A Residues=58 segids=PROA>
	├──────Description: TRYPSIN INHIBITOR
	│
	├───<Solvent id=B Molecules=60 segids=CRWT>
	├──────Residue ID(s): HOH
	├──────Description: water


## Solvation

In [18]:
solvator = Solvator(model)
# we want to keep the crystallograpic water using remove_existing_water=False
added_water = solvator.solvate(
    cutoff=5.0, solvcut=2.1, remove_existing_water=False, orient_coords=False
)
# Add 150 mM KCl using the new add_ions() method
# This uses SLTCAP/SPLIT methods for accurate ion concentration
ion_chain = solvator.add_ions(
    concentration=0.15,  # 150 mM
    cation='POT',        # K+
    anion='CLA',         # Cl-
    method='auto'        # auto-selects best method based on system charge
)


Ion calculation using SPLIT method:
  N₀/|Q| = 1.4 ≥ 1: SPLIT acceptable (~7% error)
  System charge: +8 e
  Water molecules: 4,170
  Target concentration: 150 mM
  N₀ (neutral pairs): 11.3
  N₀/|Q| ratio: 1.41

  Adding ions:
    POT: 7
    CLA: 15

  Final system charge: +0 e


In [19]:
model

NGLWidget()

<OrganizedModel model=4PTI Polypeptide(L)=1 Solvent=2 Ion=1 >
	│
	├───<Polypeptide(L) id=A Residues=58 segids=PROA>
	├──────Description: TRYPSIN INHIBITOR
	│
	├───<Solvent id=B Molecules=60 segids=CRWT>
	├──────Residue ID(s): HOH
	├──────Description: water
	│
	├───<Solvent id=WA Molecules=4088 segids=SOLV>
	├──────Description: water
	│
	├───<Ion id=IA Ions=22 segids=IONS>
	├──────Description: ions (CLA, POT) at 150 mM


## Doc Strings for Solvator

The `add_ions()` method supports three ionization methods:
- **SPLIT**: Best when system has moderate charge
- **SLTCAP**: More accurate for highly charged systems  
- **Add-Neutralize**: Simple approach (may overestimate concentration)

Use `method='auto'` to let crimm select the best method based on your system.

In [31]:
Solvator?

[0;31mInit signature:[0m [0mSolvator[0m[0;34m([0m[0mentity[0m[0;34m)[0m [0;34m->[0m [0;32mNone[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Solvates a Structure, Model, or Chain level entity with water molecules.
The solvated entity will be returned as a Model level entity. The solvated
entity will be centered in a cubic box with side length equal to the
maximum dimension of the entity plus the cutoff distance. (i.e., Coordinates 
will be oriented using CoordManipulator.orient_coords() before solvation.)
The solvcut distance is the distance from the solute at which water
molecules will be removed. The solvcut distance is used to remove water 
molecules that are too close to the solute. 
If altloc atoms exist in the entity, the first altloc atoms will be used to
determine water molecules location during solvation.

Parameters
----------
entity : Structure, Model, or Chain level entity
    The entity to solvate. If a Structure level entity is provided, the
    fi

In [21]:
Solvator.solvate?

[0;31mSignature:[0m
[0mSolvator[0m[0;34m.[0m[0msolvate[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mself[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcutoff[0m[0;34m=[0m[0;36m9.0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msolvcut[0m[0;34m=[0m[0;36m2.1[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mremove_existing_water[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mremove_existing_ions[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0morient_coords[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbox_type[0m[0;34m=[0m[0;34m'cube'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0morient_method[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbox_dims[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0mlist[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Solvate the entity with a water box.

The solvated entity will be center

In [22]:
Solvator.add_ions?

[0;31mSignature:[0m
[0mSolvator[0m[0;34m.[0m[0madd_ions[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mself[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mconcentration[0m[0;34m:[0m [0mfloat[0m [0;34m=[0m [0;36m0.15[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmethod[0m[0;34m:[0m [0mstr[0m [0;34m=[0m [0;34m'auto'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcation[0m[0;34m:[0m [0mstr[0m [0;34m=[0m [0;34m'SOD'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0manion[0m[0;34m:[0m [0mstr[0m [0;34m=[0m [0;34m'CLA'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmin_dist_solute[0m[0;34m:[0m [0mfloat[0m [0;34m=[0m [0;36m5.0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmin_dist_ion[0m[0;34m:[0m [0mfloat[0m [0;34m=[0m [0;36m5.0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mskip_undefined[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mremove_generated_ions[0m[0;34m:[0m [0mbool[0m [0;34m=

## Model after Solvation

After solvation, the model includes added water and ions. With 150 mM KCl, both K+ (POT) and Cl- (CLA) 
ions are added to achieve the target salt concentration while neutralizing the system charge.
The water box may be split into multiple chains due to the PDB residue number limit of **9999**.

## Load Solvated Entities into CHARMM

After solvation, we need to load the newly added water and ions into pyCHARMM. This step is required regardless of whether you used `load_model()` or the legacy sequential approach earlier, because the `Solvator` adds new entities to the model after the initial loading.

We use `append=True` to add these new entities to the existing CHARMM PSF structure.

In [20]:
# Load ions and water into pyCHARMM (required even if you used load_model earlier,
# because solvation adds new entities after the initial load)
# We only want to load the water box and the ions generated by Solvator
# Notes: 
# 1) These functions only take a list of chains as arguments
# 2) You need to use `load_solvent_toppar()` before executing these 
# loading functions if the previous loaded model did not contain solvent

load_ions([ion_chain], append=True)
load_water(added_water, append=True)

  
 CHARMM>     read psf card -
 CHARMM>     name /tmp/tmpiv6whykw.psf -
 CHARMM>     append
 VOPEN> Attempting to open::/tmp/tmpiv6whykw.psf::
 MAINIO> Protein structure file being appended from unit  91.
 psf_read_formatted: Reading PSF in the expanded format.
 TITLE>  * Generated by crimm
 TITLE>  * Chains: A(protein,58res)
 TITLE>  * Solvation: 4088 waters, 22 ions (CLA,POT), 150 mM salt, 60 crystal waters kept
 TITLE>  * Crystal: Cubic box, 51.26 A
 TITLE>  * Created: 2026-01-22 21:42:06 by truman
 TITLE>  *
 PSFSUM> PSF modified: NONBOND lists and IMAGE atoms cleared.
 PSFSUM> Summary of the structure file counters :
         Number of segments      =        3   Number of residues   =      140
         Number of atoms         =     1105   Number of groups     =      358
         Number of bonds         =     1037   Number of angles     =     1703
         Number of dihedrals     =     2412   Number of impropers  =      156
         Number of cross-terms   =       58   Number of a

['WT00']

crimm now builds hydrogens for crystallographic water automatically during solvation,
but if needed, we can also use CHARMM to rebuild them by `create_water_hs_from_charmm(model)`

## Set up PBC and Minimize Water

In [21]:
# organize segids and ion types for image and cons_fix
non_solvent_segids = set()
all_ion_types = set()
for chain in model:
    if chain.chain_type == 'Solvent':
        continue
    elif chain.chain_type == 'Ion':
        for res in chain:
            all_ion_types.add(res.resname)
    else:
        for res in chain:
            non_solvent_segids.add(res.segid)

In [22]:
# anything but solvent or ions in the model
non_solvent_segids

{'PROA'}

In [23]:
# all types of ions loaded in pyCHARMM by crimm
all_ion_types

{'CLA', 'POT'}

In [24]:
# CHARMM scripting: crystal define cubic @boxsize @boxsize @boxsize 90 90 90
crystal.define_cubic(solvator.box_dim)
# CHARMM scripting: crystal build cutoff @boxhalf noper 0
crystal.build(solvator.box_dim/2)

 Crystal Parameters : Crystal Type = CUBI
           A     =   51.26390 B    =   51.26390 C     =   51.26390
           Alpha =   90.00000 Beta =   90.00000 Gamma =   90.00000
 XBUILD> Building all transformations with a minimum atom-atom
         contact distance of less than   25.63 Angstroms.

 Range of Grid Search for Transformation     1 :
 Lattice Vector A    -2 TO     2
 Lattice Vector B    -2 TO     2
 Lattice Vector C    -2 TO     2


 The number of transformations generated =    26


 Number  Symop   A   B   C   Distance

      1      1  -1  -1  -1     4.2204
      2      1  -1   0  -1     2.2176
      3      1  -1   1  -1     4.2311
      4      1   0  -1  -1     2.8313
      5      1   0   0  -1     1.7507
      6      1   0   1  -1     2.1243
      7      1  -1  -1   0     3.2410
      8      1  -1   0   0     1.6315
      9      1  -1   1   0     2.6203
     10      1   0  -1   0     1.1103
     11      1   0   1   0     1.1103
     12      1  -1  -1   1     4.2324
     1

1

In [25]:
# Turn on image centering - bysegment for protein, by residue for solvent and ions
# CHARMM scripting: image byseg xcen 0 ycen 0 zcen 0 select segid SEGID end
for segid in non_solvent_segids:
    image.setup_segment(0.0, 0.0, 0.0, segid)
# CHARMM scripting: image byres xcen 0 ycen 0 zcen 0 select resname tip3 end
image.setup_residue(0.0, 0.0, 0.0, 'TIP3')
# CHARMM scripting: image byres xcen 0 ycen 0 zcen 0 select resname ion_type end
for ion_type in all_ion_types:
    image.setup_residue(0.0, 0.0, 0.0, ion_type)

 select>    903 atoms have been selected out of   13369
 IMAGE CENTERING ON FOR SOME ATOMS
 select>  12444 atoms have been selected out of   13369
 IMAGE CENTERING ON FOR SOME ATOMS
 select>      7 atoms have been selected out of   13369
 IMAGE CENTERING ON FOR SOME ATOMS
 select>     15 atoms have been selected out of   13369
 IMAGE CENTERING ON FOR SOME ATOMS


In [26]:
# Now specify nonbonded cutoffs for solvated box
cutnb = min(solvator.box_dim/2, 12)
cutim = cutnb
ctofnb = cutnb - 1.0
ctonnb = cutnb - 3.0

# Another nbonds example
# CHARMM scripting: nbonds cutnb @cutnb cutim @cutim ctofnb @ctofnb ctonnb @ctonnb -
#        inbfrq -1 imgfrq -1
non_bonded_script = pycharmm.NonBondedScript(
    cutnb=cutnb, cutim=cutim, ctonnb=ctonnb, ctofnb=ctofnb,
    eps=1.0,
    cdie=True,
    atom=True, vatom=True,
    fswitch=True, vfswitch=True,
    inbfrq=-1, imgfrq=-1
)

In [27]:
# We want to fix the protein and ligands and minimize the solvent to "fit"
# Select everything but solvent and ions
cons_fix_atoms = pycharmm.SelectAtoms()
for segid in non_solvent_segids:
    cons_fix_atoms |= pycharmm.SelectAtoms(seg_id=segid)

# Minimize the solvent positions with periodic boundary conditions using steepest descents
ener_dict = sd_minimize(200, non_bonded_script, cons_fix_selection=cons_fix_atoms)

  
 CHARMM>     nbonds cutnb 12 -
 CHARMM>     cutim 12 -
 CHARMM>     ctonnb 9.0 -
 CHARMM>     ctofnb 11.0 -
 CHARMM>     eps 1.0 -
 CHARMM>     cdie -
 CHARMM>     atom -
 CHARMM>     vatom -
 CHARMM>     fswitch -
 CHARMM>     vfswitch -
 CHARMM>     inbfrq -1 -
 CHARMM>     imgfrq -1

 SELECTED IMAGES ATOMS BEING CENTERED ABOUT  0.000000  0.000000  0.000000
 RESIDUE  156 OPERATED ON BY TRANSFORMATION P1Z0Z0R1
 RESIDUE  203 OPERATED ON BY TRANSFORMATION Z0Z0P1R1
 RESIDUE  303 OPERATED ON BY TRANSFORMATION P1Z0Z0R1
 RESIDUE  502 OPERATED ON BY TRANSFORMATION Z0Z0N1R1
 RESIDUE  601 OPERATED ON BY TRANSFORMATION P1Z0Z0R1
 RESIDUE  653 OPERATED ON BY TRANSFORMATION Z0Z0P1R1
 RESIDUE  756 OPERATED ON BY TRANSFORMATION P1Z0Z0R1
 RESIDUE  945 OPERATED ON BY TRANSFORMATION Z0Z0N1R1
 RESIDUE 1058 OPERATED ON BY TRANSFORMATION P1Z0Z0R1
 RESIDUE 1106 OPERATED ON BY TRANSFORMATION Z0Z0P1R1
 RESIDUE 1156 OPERATED ON BY TRANSFORMATION Z0N1Z0R1
 RESIDUE 1201 OPERATED ON BY TRANSFORMATION P1Z0Z0R1




 Image nonbond list generation found:
  1159695 ATOM PAIRS WERE FOUND FOR ATOM LIST
        0 ATOM PAIRS WERE FOUND FOR ATOM SELF LIST
   110539 GROUP PAIRS REQUIRED ATOM SEARCHES

 PRNHBD: CUToff Hydrogen Bond  distance =    0.5000   Angle =   90.0000
         CuT switching ON HB dist. =     3.5000  OFf HB dist. =    4.0000
         CuT switching ON Hb Angle =    50.0000  OFf Hb Angle =   70.0000
         ACCEptor antecedents included
         All hydrogen bonds for each hydrogen will be found
         Hydrogen bonds between excluded atoms will be kept

 HBFIND-exclusions:******* due to distance cutoff,       0 due to angle cutoff
                         0 primary donor to image acceptor hbonds found
 HBFIND-exclusions:******* due to distance cutoff,       0 due to angle cutoff
                         0 image donor to primary acceptor hbonds found
                         0 unique image hbonds found
 HBEDIT-deletions:       0 due to duplications,          0 due to best-option,
    

In [28]:
fetch_coords_from_charmm(model)
model

NGLWidget()

<OrganizedModel model=4PTI Polypeptide(L)=1 Solvent=2 Ion=1 >
	│
	├───<Polypeptide(L) id=A Residues=58 segids=PROA>
	├──────Description: TRYPSIN INHIBITOR
	│
	├───<Solvent id=B Molecules=60 segids=CRWT>
	├──────Residue ID(s): HOH
	├──────Description: water
	│
	├───<Solvent id=WA Molecules=4088 segids=SOLV>
	├──────Description: water
	│
	├───<Ion id=IA Ions=22 segids=IONS>
	├──────Description: ions (CLA, POT) at 150 mM


In [29]:
pcm_write.coor_card(f'{PDBID}.crd')
pcm_write.psf_card(f'{PDBID}.psf')

  
 CHARMM>     write name 4pti.crd -
 CHARMM>     coor card
 VOPEN> Attempting to open::4pti.crd::
 RDTITL>  
 RDTITL> No title read.
 VCLOSE: Closing unit   91 with status "KEEP"
 VCLOSE: Closing unit   91 with status "KEEP"
  
 CHARMM>     
  
  
 CHARMM>     write name 4pti.psf -
 CHARMM>     psf card
 VOPEN> Attempting to open::4pti.psf::
 RDTITL>  
 RDTITL> No title read.
 VCLOSE: Closing unit   91 with status "KEEP"
 VCLOSE: Closing unit   91 with status "KEEP"
  
 CHARMM>     
  


## Alternative: Write PSF/CRD with Native crimm Writers (#NEW)

In addition to pyCHARMM's `write.psf_card()` and `write.coor_card()`, crimm provides native Python 
writers that don't require pyCHARMM. This is useful when:
- You need to write files without an active pyCHARMM session
- Working in environments where pyCHARMM isn't available
- Need programmatic control over the output format

**Important**: The native writers extract topology and coordinate information directly from the 
`OrganizedModel` and its associated `TopologyLoader`, ensuring consistency with the structure 
you've been working with in crimm.

In [30]:
# #NEW: Write PSF and CRD files using crimm's native writers
# These work directly with the OrganizedModel without requiring pyCHARMM

# Write PSF file - extracts topology from model.topology_loader
write_psf(model, f'{PDBID}_crimm.psf')
print(f"Written {PDBID}_crimm.psf")

# Write CRD file - extracts coordinates from model
write_crd(model, f'{PDBID}_crimm.crd')
print(f"Written {PDBID}_crimm.crd")

Written 4pti_crimm.psf
Written 4pti_crimm.crd


## Verify Output Files (#NEW)

You can verify that the native crimm writers produce equivalent output to pyCHARMM by comparing the files. 
The PSF files should contain identical atom counts, bonds, angles, and other topology information.
Minor differences in formatting or floating-point precision are expected but shouldn't affect simulations.

In [31]:
# #NEW: Quick verification - compare atom counts between pyCHARMM and crimm outputs
psf_charmm = read_psf(f'{PDBID}.psf')
psf_crimm = read_psf(f'{PDBID}_crimm.psf')

print("=== PSF Comparison ===")
print(f"pyCHARMM PSF: {len(psf_charmm.atoms)} atoms, {len(psf_charmm.bonds)} bonds")
print(f"crimm PSF:    {len(psf_crimm.atoms)} atoms, {len(psf_crimm.bonds)} bonds")
print(f"Match: {len(psf_charmm.atoms) == len(psf_crimm.atoms) and len(psf_charmm.bonds) == len(psf_crimm.bonds)}")

=== PSF Comparison ===
pyCHARMM PSF: 13369 atoms, 9213 bonds
crimm PSF:    13369 atoms, 9213 bonds
Match: True
