In [1]:
from aiida import load_profile
load_profile()

Profile<uuid='854b62c634cd4cd6ade4c630f3c0af0e' name='gromacs'>

In [2]:
# Polymer/Monomer inputs
monomer_pdbfilename = '14tPBMonomer.pdb'        # PDB filename of the monomer
monomer_count_per_polymer = 4                   # Number of monomers present in one polymer

polymer_count = 600                             # Number of polymers will be simulated

# MD simulation inputs
thermo_T = 300.15                               # Simulation Temperature (K)
thermo_P = 1.0                                  # Simulation Pressure (bar)

In [3]:
import pathlib
import os
import numpy as np

from aiida.engine import run, submit
from aiida.engine import calcfunction, WorkChain
from aiida.orm import Bool, Int, Float, Str, Dict, List, SinglefileData, FolderData
from aiida_shell import launch_shell_job

In [4]:
monomer_pdbfilename = Str(monomer_pdbfilename)
monomer_count_per_polymer = Int(monomer_count_per_polymer)

polymer_count = Int(polymer_count)

thermo_T = Float(thermo_T)
thermo_P = Float(thermo_P)

In [5]:
monomer = SinglefileData(os.getcwd() + '/' + monomer_pdbfilename.value)

In [6]:
from polymerize import polymerize

polymer_data = run(polymerize, monomer = monomer, monomer_count = monomer_count_per_polymer)

Polymerization starts ->
4
ATOM      1 CA   BD1     1       8.280  51.200  30.550                       C
ATOM      2 HA1  BD1     1       8.790  50.330  30.140                       H
ATOM      3 HA2  BD1     1       8.830  52.120  30.380                       H
ATOM      4 HA3  BD1     1       8.380  51.100  31.630                       H
ATOM      5 CB   BD1     1       6.890  51.370  30.050                       C
ATOM      6 HB   BD1     1       6.230  50.510  30.130                       H
ATOM      7 CG   BD1     1       6.460  52.520  29.510                       C
ATOM      8 HG   BD1     1       7.270  53.230  29.440                       H
ATOM      9 CW   BD1     1       5.030  52.720  28.830                       C
ATOM     10 HW1  BD1     1       4.580  53.650  29.180                       H
ATOM     11 HW2  BD1     1       4.320  51.910  29.030                       H
ATOM     13 CA   BD2     2       4.885  52.865  27.263                       C
ATOM     14 HA1  BD2     

In [7]:
@calcfunction
def calc_simulation_box_length(polymer_molecular_weight: Float, polymer_count: Int) -> Float:
    molecular_weight = polymer_molecular_weight.value * polymer_count.value
    
    # Default density (0.4 g/cm3)
    density = 0.4
    avogadro_number = 6.022e23  # molecules/mol
    cm_to_nm = 1e7  # cm to nm conversion factor
    
    # Step 1: Mass of one molecule
    mass_per_molecule = molecular_weight / avogadro_number  # g
    
    # Step 2: Volume of cubic box
    volume = mass_per_molecule / density  # cm³
    
    # Step 3: Side length of the cubic box
    box_length = volume**(1/3)  # cm
    box_length = box_length * cm_to_nm  # nm

    return Float(box_length)

box_length = calc_simulation_box_length(polymer_data['polymer_molecular_weight'], polymer_count)

In [9]:
import nglview as nv
import tempfile

In [10]:
import nglview as nv
import tempfile

def visualize(node: SinglefileData) -> nv.NGLWidget:
    with node.open(mode='rb') as source:
        with tempfile.NamedTemporaryFile(suffix=node.filename) as target:
            target.write(source.read())
            print(target.name)
            view = nv.NGLWidget()
            view.add_component(target.name)
            view.clear_representations()
            view.add_ball_and_stick()
            return view

In [11]:
visualize(polymer_data['polymer'])

/mnt/scratch/tmp-aamir/tmpdbjc0bwppolymer.pdb


NGLWidget()

In [14]:
from aiida.orm import Computer, InstalledCode, load_code

In [15]:
computer = Computer.collection.get(label='ondemand')

In [16]:
from aiida.orm import Computer, InstalledCode, load_code, load_computer

In [17]:
gmx_gpu = load_code('gmx-gpu@ondemand')
gmx_gpu

<InstalledCode: Remote code 'gmx-gpu' on ondemand pk: 3, uuid: 1257560f-2c9d-4e53-949c-5cdbcf17c330>

In [19]:
hpc = load_computer('ondemand')

In [20]:
hpc

<Computer: ondemand (ondemand), pk: 2>

In [21]:
#! verdi process list -a -D desc
#! verdi node delete 27641 -f
#! verdi process report 27732
#! verdi node graph generate 32574

In [22]:
# Default Parameters
ffdir = os.getcwd() + '/oplsaa.ff'
oplsaa = FolderData(tree=ffdir)                       

In [23]:
# Run `gmx pdb2gmx` to convert the PDB to GROMACS .gro format.
results_pdb2gmx, node_pdb2gmx = launch_shell_job(
    'gmx',
    arguments='pdb2gmx -f {polymer} -o polymer_out.pdb -water spce -ff oplsaa',
    nodes={
        'polymer': polymer_data['polymer'],
        'folder': oplsaa,
    },
    filenames={
        'folder': 'oplsaa.ff'
    },
    outputs=['polymer_out.pdb', 'topol.top', 'posre.itp'],
    metadata={'options': {'redirect_stderr': True}},
)
#print(results_pdb2gmx['stdout'].get_content())

<div class="alert alert-info">
`gmx` is the GROMACS executable which will be executed with several parameters. The `nodes` argument takes a dictionary of `SinglefileData` and `FolderData` which will be copied to the working directory where the `gmx` command is executed. The `oplsaa` in `nodes` are copied to the `oplsaa.ff` folder in the working directory. If the key of the node appears as a placeholder in the `arguments` argument, it is replaced with its filename. 
</div>

The `gmx pdb2gmx` is expected to generate three outputs of interest:
- `polymer_out.pdb`: `.pdb` file of the polymer.
- `topol.top`: The topology file.
- `posre.itp`: The position restraint file included in the topology file.

<div class="alert alert-info">
These output files will be used in the upcoming steps in the pipeline. To capture these output files in the AiiDA's provenance graph, these filenames are declared in the `output` argunent. `aiida-shell` will wrap these output files in a `SinglefileDtata`.
</div>

Similar to the previous steps, GROMACS creates a simulation box where mutiple chains of the polymer will be inserted for the MD simulations. From the previous step, `.pdb` file of the single polymer chain is used to replicate the polymer chain inside the simulation box. In the output, we get a simulation box containing `polymer_count` number of polymers.

In [24]:
@calcfunction
def check_insert_molecules(log: SinglefileData, polymer_count: Int) -> Bool:
    lines = log.get_content().split('\n')
    check_lines = [line for line in lines if line.startswith('Added')]
    
    if len(check_lines) > 1:
        raise Exception('ERROR: Multiple lines got selected in the check_lines variable', check_lines)

    wordlist = check_lines[0].split()
    polymer_count_inserted = Int(wordlist[1])
    
    if polymer_count_inserted.value == polymer_count.value:
        return Bool(False)
    else:
        return Bool(True)

run = Bool(True)
while run.value:
    results_insert, node_insert = launch_shell_job(
        'gmx',
        arguments='insert-molecules -box {box_length} -ci {polymer} -nmol {polymer_count} -try 999 -o melt.pdb',
        nodes={
            'box_length': box_length,
            'polymer': polymer_data['polymer'],
            'polymer_count': polymer_count,
        },
        outputs=['melt.pdb'],
        metadata={'options': {'redirect_stderr': True}},
    )
    #print(results_insert['stdout'].get_content())
    run = check_insert_molecules(results_insert['stdout'], polymer_count)
    print(run.value)
    if run.value:
        
        run = Bool(False)
    else:
        run = Bool(False)
    print(run.value)

False
False


While running the `pdb2gmx` command we have created the topology for one single polymer chain. However, in the `topol.top` file we have to provide the number of polymer chain present in the simulation box. Therefore, we are updating the number of molecules line from 1 to `polymer_count` using the `sed` command.

In [25]:
results_updatetop, node_updatetop = launch_shell_job(
    'sed',
    arguments='-i "s/Other               1/Other               {polymer_count}/g" {top}',
    nodes={
        'polymer_count': polymer_count,
        'top': results_pdb2gmx['topol_top'],
    },
    outputs=['topol.top'],
    metadata={'options': {'redirect_stderr': True}},
)
#print(results_updatetop['stdout'].get_content())

<div class="alert alert-info">
The output `topol.top` file is declared in the `output` argunent where `aiida-shell` will wrap these output files in a `SinglefileDtata`.
</div>

**Prerequisite for MD simulations and simulation box preparation is completed and we can proceed to run the molecular dynamics simulation.**

In [26]:
em_mdp = SinglefileData.from_string(
    """
    integrator      = steep
    emtol           = 1000.0
    emstep          = 0.01
    nsteps          = 50000
    nstlist         = 1
    cutoff-scheme   = Verlet
    ns_type         = grid
    coulombtype     = PME
    rcoulomb        = 1.0
    rvdw            = 1.0
    pbc             = xyz
    ld_seed         = 1
    gen_seed        = 1
    """,
    filename='em.mdp',
)

# Run `gmx grompp` to pre-process the parameters for energy minimization.
results_grompp_em, node_grompp_em = launch_shell_job(
    'gmx',
    arguments='grompp -f {mdp} -c {gro} -p {top} -o output.tpr',
    nodes={
        'mdp': em_mdp,
        'gro': results_insert['melt_pdb'],
        'top': results_updatetop['topol_top'],
        'folder': oplsaa,
    },
    filenames={
        'folder': 'oplsaa.ff'
    },
    outputs=['output.tpr'],
    metadata={'options': {'redirect_stderr': True}},
)
#print(results_grompp_em['stdout'].get_content())

The `gmx grompp` takes the input of three necesary files to generate the `.tpr` file.
- [x] `.mdp`: The molecular dynamics parameters.
- [x] `.gro`: The coordinate information of all the polymer chains present in the simulation box.
- [x] `.top`: The tolology file of the polymer. Also, this has the information of number of polymers present in the simulation box.

The generated `.tpr` file is used for running the molecular dynamics simulation. We can run the `gmx mdrun` to run the energy minimization step.

In [27]:
# Run `gmx mdrun` to run the energy minimization.
results_em, node_em = launch_shell_job(
    'gmx',
    arguments='mdrun -v -deffnm output -s {tpr} -ntmpi 1 -ntomp 16',
    nodes={
        'tpr': results_grompp_em['output_tpr'],
    },
    outputs=['output.edr', 'output.gro', 'output.log'],
    metadata={'options': {'redirect_stderr': True, 'computer': computer}},
    #metadata={'options': {'redirect_stderr': True, 'computer': computer, 'resources': 1}},
)
#print(results_em['stdout'].get_content())

  "cipher": algorithms.TripleDES,
  "class": algorithms.TripleDES,


In [28]:
print(results_em['stdout'].get_content())

                   :-) GROMACS - gmx mdrun, 2023.3-spack (-:

Executable:   /anfhome/spack/opt/spack/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placehold/linux-almalinux8-zen3/aocc-4.1.0/gromacs-2023.3-upt5rsow4zsttzs63kqercg7her4e3e2/bin/gmx_mpi
Data prefix:  /anfhome/spack/opt/spack/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placehold/linux-almalinux8-zen3/aocc-4.1.0/gromacs-2023.3-upt5rsow4zsttzs63kqercg7her4e3e2
Working dir:  /anfhome/aamir/scratch/aiida/4e/ac/a8a4-72d9-4daf-8a18-8e707db65bb1
Command line:
  gmx_mpi mdrun -v -deffnm output -s output.tpr -ntmpi 1 -ntomp 16

Reading file output.tpr, VERSION 2023.3-spack (single precision)

-------------------------------------------------------
Program:     gmx mdrun, version 2023.3-spack
Source file: src/gromacs/taskassignment/resourcedivision.cpp (line 718)

Fatal error:
Setting the number of thread-MPI ranks is only supported with

Similar to the previous `gmx grompp` step, we can proceed with NPT equilibration step where the molecular dynamics simulation will be performed at the given thermodynamic conditions given by the user.

In [29]:
eqnpt_mdp = orm.SinglefileData.from_string(
    """
    title                   = NPT Equilibration
    ;define                 = -DPOSRES
    integrator              = md
    dt                      = 0.002
    nsteps                  = 50000
    nstenergy               = 2000
    nstxout-compressed      = 10000
    nstvout                 = 0
    nstlog                  = 1000
    gen_vel                 = yes
    gen_temp                = 298.15
    pbc                     = xyz
    cutoff-scheme           = Verlet
    rlist                   = 1.0
    ns_type                 = grid
    nstlist                 = 10
    coulombtype             = PME
    fourierspacing          = 0.12
    pme_order               = 4
    rcoulomb                = 1.0
    vdwtype                 = Cut-Off
    rvdw                    = 1.0
    DispCorr                = EnerPres
    constraints             = h-bonds
    constraint_algorithm    = lincs
    lincs_iter              = 1
    lincs_order             = 4
    tcoupl                  = v-rescale
    tc-grps                 = System
    ref_t                   = 298.15
    tau_t                   = 0.1
    pcoupl                  = c-rescale
    pcoupltype              = isotropic
    ref_p                   = 1.0
    tau_p                   = 2.0
    ;refcoord-scaling        = com
    compressibility         = 4.5e-5
    """,
    filename='eqnpt.mdp',
)

# Run `gmx grompp` to pre-process the parameters for energy minimization.
results_grompp_eqnpt, node_grompp_eqnpt = launch_shell_job(
    'gmx',
    arguments='grompp -f {mdp} -c {gro} -p {top} -o npt.tpr',
    nodes={
        'mdp': eqnpt_mdp,
        'gro': results_em['output_gro'],
        'top': results_updatetop['topol_top'],
        'folder': oplsaa,
    },
    filenames={
        'folder': 'oplsaa.ff'
    },
    outputs=['npt.tpr'],
    metadata={'options': {'redirect_stderr': True}},
)
#print(results_grompp_eqnpt['stdout'].get_content())

NameError: name 'orm' is not defined

The generated `.tpr` file is used for running the molecular dynamics simulation for NPT equilibration. We can run the `gmx mdrun` to run the NPT equilibration step.

In [None]:
# Run `gmx mdrun` to run the energy minimization.
results_eqnpt, node_eqnpt = launch_shell_job(
    'gmx',
    arguments='mdrun -v -deffnm output -s {tpr}',
    nodes={
        'tpr': results_grompp_eqnpt['npt_tpr'],
    },
    outputs=['output.edr', 'output.gro', 'output.log', 'output.xtc'],
    metadata={'options': {'redirect_stderr': True}},
)
#print(results_eqnpt['stdout'].get_content())

**MD simulation is completed. Therefore, we can go for analysis of the simulated system.**

<div class="alert alert-info">
Using `gmx energy` command we can calculate and plot the primary properties for the simulated polymer system.
</div>

In [None]:
# Run `gmx energy` to extract the potential energy during the energy minimization.
results_energy, node_energy = launch_shell_job(
    'gmx',
    arguments='energy -f {edr} -o potential.xvg',
    nodes={
        'edr': results_eqnpt['output_edr'],
        'stdin': orm.SinglefileData.from_string('10\n0'),
    },
    outputs=['potential.xvg'],
    metadata={'options': {'redirect_stderr': True, 'filename_stdin': 'stdin'}},
)

<div class="alert alert-info">
The input for `gmx energy` is the `output.edr` from the `results_eqnpt`. Here we first pass `10` which corresponds to the potential energy, followed by `0` which finalizes the selection and completes the prompt. The `output` file of this step saved as GROMACS generated `.xvg` format.
</div>

To visualize the data, first we need to import necessary python packages to be installed.

In [None]:
import io

import matplotlib.pyplot as plt
import numpy as np

from IPython.display import display, IFrame

We define the `create_plot` function and pass the `potential.xvg` from the previous step. The `create_plot` will create a plot which is saved to a stream in memory which is then passed to a `SinglefileData` node to store it in AiiDA's provecance graph.

In [None]:
@engine.calcfunction
def create_plot(xvg: orm.SinglefileData) -> orm.SinglefileData:
    """Plot the data of a XVG output file."""
    lines = xvg.get_content().split('\n')
    data = np.loadtxt(lines, comments=['#', '@']).T
    plt.figure(figsize=(6,4))
    plt.plot(*data)
    plt.xlabel('Time (ps)')
    plt.ylabel('Potential energy [kJ/mol]')
    stream = io.BytesIO()
    plt.savefig(stream, format='png', bbox_inches='tight', dpi=150)
    #stream.seek(0)
    return orm.SinglefileData(stream, filename='potential.png')
    
# Create a plot from the extracted potential energy of the system
plot = create_plot(results_energy['potential_xvg'])

In the same way we can get the density data from the `output.edr` using the `gmx energy` command and plot it.

In [None]:
# Run `gmx energy` to extract the potential energy during the energy minimization.
results_energy, node_energy = launch_shell_job(
    'gmx',
    arguments='energy -f {edr} -o density.xvg',
    nodes={
        'edr': results_eqnpt['output_edr'],
        'stdin': orm.SinglefileData.from_string('22\n0'),
    },
    outputs=['density.xvg'],
    metadata={'options': {'redirect_stderr': True, 'filename_stdin': 'stdin'}},
)

In [None]:
@engine.calcfunction
def create_plot(xvg: orm.SinglefileData) -> orm.SinglefileData:
    """Plot the data of a XVG output file."""
    lines = xvg.get_content().split('\n')
    data = np.loadtxt(lines, comments=['#', '@']).T
    plt.plot(*data)
    plt.xlabel('Time (ps)')
    plt.ylabel('Density $[kg/m^3]$')
    stream = io.BytesIO()
    plt.savefig(stream, format='png', bbox_inches='tight', dpi=300)
    #stream.seek(0)
    return orm.SinglefileData(stream, filename='density.png')

# Create a plot from the extracted density of the system
plot = create_plot(results_energy['density_xvg'])

300.15


In [38]:
thermo_T_count = 5
thermo_T_list = orm.ArrayData(np.array([thermo_T.value-10.0, thermo_T.value-5.0]))

In [39]:
from aiida.engine.daemon.client import get_daemon_client

client = get_daemon_client()
client.start_daemon()

In [40]:
@engine.calcfunction
def set_mdp_parameter(id: int=0, thermo_T: np.float64=298.15, thermo_P: np.float64=1.0, ) -> orm.SinglefileData:
    mdp_str = f"""
    title                   = NPT Equilibration
    ;define                 = -DPOSRES
    integrator              = md
    dt                      = 0.002
    nsteps                  = 50000
    nstenergy               = 2000
    nstxout-compressed      = 10000
    nstvout                 = 0
    nstlog                  = 1000
    gen_vel                 = yes
    gen_temp                = 298.15
    pbc                     = xyz
    cutoff-scheme           = Verlet
    rlist                   = 1.0
    ns_type                 = grid
    nstlist                 = 10
    coulombtype             = PME
    fourierspacing          = 0.12
    pme_order               = 4
    rcoulomb                = 1.0
    vdwtype                 = Cut-Off
    rvdw                    = 1.0
    DispCorr                = EnerPres
    constraints             = h-bonds
    constraint_algorithm    = lincs
    lincs_iter              = 1
    lincs_order             = 4
    tcoupl                  = v-rescale
    tc-grps                 = System
    ref_t                   = {thermo_T.value}
    tau_t                   = 0.1
    pcoupl                  = c-rescale
    pcoupltype              = isotropic
    ref_p                   = {thermo_P.value}
    tau_p                   = 2.0
    ;refcoord-scaling        = com
    compressibility         = 4.5e-5
    """

    #print(mdp_str)
    #print('T = ', thermo_T.value)
    #print('P = ', thermo_P.value)

    return orm.SinglefileData.from_string(mdp_str, filename=f'eqnpt-{id.value}.mdp')

nodelist = []
resultslist = []

for i, thermo_T_ele in enumerate(thermo_T_list.get_array()):
    print(i, thermo_T_ele, type(thermo_T_ele), type(i))
    eqnpt_mdp = set_mdp_parameter(i, thermo_T_ele, thermo_P)
    print(eqnpt_mdp.filename)
    #print(eqnpt_mdp.get_content())

    print(i)
    tpr = f'npt-{i}.tpr'
    results_grompp_eqnpt, node_grompp_eqnpt = launch_shell_job(
        'gmx',
        arguments='grompp -f {mdp} -c {gro} -p {top} -o {tpr}',
        nodes={
            'mdp': eqnpt_mdp,
            'gro': results_em['output_gro'],
            'top': results_updatetop['topol_top'],
            'tpr': orm.Str(tpr),
            'folder': oplsaa,
        },
        filenames={
            'folder': 'oplsaa.ff'
        },
        outputs=[tpr],
        metadata={'options': {'redirect_stderr': True}},
    )
    nodelist.append(node_grompp_eqnpt)
    resultslist.append(results_grompp_eqnpt)
    #print(node_grompp_eqnpt.outputs.stdout.get_content())
    print('done')

0 290.15 <class 'numpy.float64'> <class 'int'>
eqnpt-0.mdp
0
done
1 295.15 <class 'numpy.float64'> <class 'int'>
eqnpt-1.mdp
1
done


In [41]:
print(nodelist)

[<CalcJobNode: uuid: 32603611-d2b0-4ffa-9323-967db18d2c90 (pk: 4152) (aiida.calculations:core.shell)>, <CalcJobNode: uuid: 09aa274d-4f72-4794-be7d-701ecb8cee63 (pk: 4165) (aiida.calculations:core.shell)>]


In [42]:
print(resultslist)

[{'npt_0_tpr': <SinglefileData: uuid: 4d22018d-3dad-435f-90ec-052092796a34 (pk: 4155)>, 'stdout': <SinglefileData: uuid: 02a1740b-9287-45af-a231-b3db5135c2ee (pk: 4156)>}, {'npt_1_tpr': <SinglefileData: uuid: 6a505202-9e9a-4060-bb10-84c07f20480a (pk: 4168)>, 'stdout': <SinglefileData: uuid: 059df435-7a17-461e-9705-fa1fef59f56c (pk: 4169)>}]


In [43]:
for i, result in enumerate(resultslist):
    ou = f'npt_{i}_tpr'
    print(type(result[ou]))

<class 'aiida.orm.nodes.data.singlefile.SinglefileData'>
<class 'aiida.orm.nodes.data.singlefile.SinglefileData'>


In [44]:
node_dumplist = []
results_dumplist = []

for i, thermo_T_ele in enumerate(thermo_T_list.get_array()):
    print(i, thermo_T_ele, type(thermo_T_ele), type(i))
    tpr = f'npt_{i}_tpr'
    dump = f'npt-{i}.dump'
    print(tpr)
    results_dump, node_dump = launch_shell_job(
        'gmx',
        arguments='dump -s {tpr}',
        nodes={
            'tpr': resultslist[i][tpr],
        },
        metadata={'options': {'redirect_stderr': True}},
    )
    node_dumplist.append(node_dump)
    results_dumplist.append(results_dump)
    print('done')

0 290.15 <class 'numpy.float64'> <class 'int'>
npt_0_tpr
done
1 295.15 <class 'numpy.float64'> <class 'int'>
npt_1_tpr
done


In [45]:
#print(results_dumplist[0]['stdout'].get_content())

In [58]:
import aiida_aqe

In [59]:
import aiida_aqe.tools

In [64]:
dir(aiida_aqe.tools)

['__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__']