In [None]:
####################
## Initial stuff
####################a

# Load functions, modules and global variables required in our pipeline
from simulate_structures_functions import *
from importlib import reload
from htmd.builder.charmm import _recoverProtonations

# Passwords for paramchem
username = None
password = None
if not (username and password):
    raise Exception("Please define your password and username for CGenFF app. It is required to parameterize small molecules")

# Paths to ACEMD3 and its license (REPLACE WITH YOUR OWN)
acemd_path = "acemd4"
acemd_license = "SG_LICENSE_FILE=28000@tolkien.prib.upf.edu,ACELLERA_LICENSE_SERVER=28000@tolkien.prib.upf.edu"

# Our main path
basepath = os.getcwd()+'/'
# Other Paths
strucpath = basepath + 'input_structures/'
resultspath = basepath + 'simulation_output/'
membranepdb = basepath + 'membrane/popc36_box_renumbered.pdb'
topparpath = basepath + 'toppar/TOP_PARAMS_ACE3/'#toppar= topology+parameters
ligandsdict_path = basepath + 'ligands.json'
modres_path = basepath + 'modified_residues.json'
slurmpath = basepath+'fake_slurm/'
# Path to slurm queing system binaries
# In our case, Ismael designed a bunch of small bash scripts (fake_slurm) which do ssh to Hydra and execute slurm there
# REmember to modify the "fake_slurm" commands by adding your username in them

# USER INPUTS: introudce a json dictionary with the data for your system in the folowing format
"""
Example:
[
    {
        "name" : "NAME",
        "pdbfile" : "PATH/TO/YOUR_PDB.pdb", # Relative to basepath. NOT ABSOLUTE PATH
        "modres" : ["MODIFIED_RESNAME1", "MODIFIED_RESNAME"],
        "ligands" : [{
                        "resname":"SMALMOL_RESNAME1",
                        "name" : "SMALMOL_NAME1",
                        "covalently_bound" : true, # If the SMALMOL is covalently bount to peptide
                        "inchikey" : "RLDFVSFNVOLFEU-UHFFFAOYSA-N"
                    }],
        "apo": False, #do you wish to simulate an apo-version of this structure, by removing all ligands and not-main-proteins of it??),
        "prot_chain" : "R", #chain id of the main protein in this system (a GPCR, for us),
        "pdbcode" : "PDBCODE", # CLosest-ressembling PDB structure of this GPCR. Put False if there is none
        "curated" : true, # If system has been already properly protonated
        "sod2x50" : true, # If system requires addition of sodium near 2x50
        "isgpcr" : true # If system is indeed a GPCR
    }
]
"""
input_dict = json_dict(basepath+"demo/inputs.json")
# IMPORTANT: Peptide ligands must have L, L0, L1, L2 or PEP as its segid for 
#this pipeline to work properly
    
# Load topology, parameter and stream files with our current basepath
topos = [os.path.join(topparpath,file) for file in toposfilenames] 
params = [os.path.join(topparpath,file) for file in paramsfilenames]
streams = [os.path.join(topparpath,file) for file in streamsfilenames]

# Add Chimera's and vmd paths (replace by your own)
chimera_path = "/soft/system/software/Chimera/1.16/bin/"
vmd_path = "/soft/system/software/VMD/1.9.4a57/bin/vmd"
psfgenpath = None # Download and use NAMDs psfgen for optimal results. 
# Otherwise you can always use the default HTMD one, but has problems with organic hallogens
path= os.environ['PATH']
%env PATH=$path:$chimera_path:$vmd_path$slurmpath

%load_ext autoreload
%autoreload 2

2025-03-07 16:05:13,731 - numexpr.utils - INFO - NumExpr defaulting to 12 threads.


env: PATH=/soft/system/software/VMD/1.9.4a57/bin:/home/daranda/.conda/envs/sim_pipe/bin:/home/daranda/.conda/envs/sim_pipe/condabin:/soft/system/software/Miniconda3/4.9.2:/soft/system/software/Miniconda3/4.9.2/bin:/soft/system/software/Chimera/1.16/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/home/daranda/bin:/home/daranda/software/getcontacts-master:/home/daranda/VMD:/home/daranda/VMD2/bin:/opt/zotero:/soft/system/software/Chimera/1.16/bin/:/soft/system/software/VMD/1.9.4a57/bin/vmd/data/gpcr/daranda/doctorat/simulation_pipeline_standard/fake_slurm/


In [2]:
################################
# Part 1: Ligand parametrization
################################

# Save mol2 files of ligand molecules and modres present in systems
(modresdict,ligandsdict,pdbfilesdict) = save_smalmol_mol2(input_dict, basepath, hydrogenate_ligands=False)

# Get topology-parameter files for ligands
get_lig_toppar(ligandsdict, basepath, username, password, pdbfiles = pdbfilesdict)

# Get topology-parameter files for modified residues
get_modres_toppar(modresdict, basepath, username, password, pdbfiles = pdbfilesdict)

You have used an unregistered copy of Chimera for 74 days.
You can either register now by visiting:
   http://www.cgl.ucsf.edu/cgi-bin/chimera_registration.py
or by choosing 'Registration...' from the 'Help' menu next
time you start Chimera with the gui enabled.

Registration is free.  By providing the information requested
you will be helping us document the impact this software is
having in the scientific community. The information you supply
will only be used for reporting summary statistics to NIH.


Compiling /data/gpcr/daranda/doctorat/simulation_pipeline_standard/toppar/Ligands/EJ4/delta_opioid_naltrindole_EJ4.py...
Compiling /data/gpcr/daranda/doctorat/simulation_pipeline_standard/toppar/Ligands/EJ4/delta_opioid_naltrindole_EJ4.py succeeded
Executing /data/gpcr/daranda/doctorat/simulation_pipeline_standard/toppar/Ligands/EJ4/delta_opioid_naltrindole_EJ4.py...
Opening delta_opioid_naltrindole_EJ4.pdb...
delta_opioid_naltrindole_EJ4.pdb opened
Executed /data/gpcr/daranda/doctorat/simulation_pipeline_standard/toppar/Ligands/EJ4/delta_opioid_naltrindole_EJ4.py
Getting toppar file for ligand EJ4 
submited_mol <Response [200]>
submitting into cgenff....


In [None]:
###########################
## Part 2: Build the models
###########################

# Iterate by GPCRdb structures to simulate
for entry in input_dict:    
    try:

        # Entry's data
        name = entry['name']
        isgpcr = entry['isgpcr']
        pdbcode = entry['pdbcode']
        pdbfile = entry['pdbfile']
        curated = entry['curated']
        sod = "sod_yes" if entry['sod2x50'] else "sod_no"
        prot_chain = entry['prot_chain']
        gpcr_chain = entry['prot_chain'] if isgpcr else False
        apo = entry['apo']
        
        #Starting simulation
        start_time = time.time()        
        sysname = name+'_apo' if apo else name
        mystrucpath = strucpath+sysname+'/'
        os.makedirs(mystrucpath, exist_ok=True)

        # Skip if there is already a model build for this
        if os.path.exists(resultspath+'build/'+sysname+'/structure.pdb'):
            print('Build model for '+sysname+' already exists. Skipping...')
            continue

        # Check if simulation is aminergic
        if isgpcr:
            aminergic = gpcrdb_dict[pdbcode]['family'].startswith('001_001')
            adenosine = gpcrdb_dict[pdbcode]['family'].startswith('001_006_001')
        else:
            aminergic = None; adenosine = None
        
        # Add waters with homolwat if protein is a GPCR. Sodium 2x50 will also be added if a 
        # non-false pdbcode is added
        if isgpcr:
            (sod2x50, watered_filename) = internal_waters(pdbfile, pdbcode, gpcrdb_dict, apo,sod=sod)
        else:
            watered_filename=pdbfile
            sod2x50=False
        mol = Molecule(watered_filename)
            
        # Remove unnecessary ligand molecules: mostly crystalization detergents, quelants, buffers,
        # or post-traductional glicosilations
        mol.remove('resname '+' '.join(blacklist))
        
        # Remove 2x50Sodium from non-A-class GPCRs
        if isgpcr:
            if not gpcrdb_dict[pdbcode]['family'].startswith('001'):
                mol.remove('element NA')

        
        # Ismael's function to add labels (segid) for 'ligand' and 'protein' parts of the system
        mol_fixed,prot_segids = fix_and_prepare_input(mol,name,pdbcode,
                                                        modresdict,
                                                        isgpcr=isgpcr,
                                                        prot_chain=prot_chain)
        
        # If the pipeline is running in 'apoform mode', remove any non-protein, non-ion, non-water thing on the system      
        # Delete also non-receptor proteins
        # If there's any, parameterize and rename covalent-bound ligands
        if apo:
            (mod_mol,prot_segids) = make_apo(mol_fixed,prot_chain)    
            covligs = []
        else:
            (mod_mol, covligs) = covalent_ligands(mol_fixed, name, ligandsdict)

        # Get aligned OPM structure
        thickness,opm_mol = get_opm(pdbcode)

        # Superimpose fixed molecule onto OPM counterpart for proper membrane fitting
        # Bit dirty, I know, but the best option avaliable
        modfile = 'mod_mol.pdb'
        mod_mol.write(modfile)
        opmfile = 'opm_mol.pdb'
        opm_mol.write(opmfile)
        aligfile = 'aligned_mol.pdb'
        chimera_superimpose(opmfile,modfile,aligfile)            
        mol_aligned = Molecule(aligfile)
        os.remove(modfile);os.remove(opmfile);os.remove(aligfile)
        
        #Center to receptor XY
        center = np.mean(mol_aligned.get('coords',sel='chain '+prot_chain),axis=0)
        mol_aligned.moveBy([-center[0],-center[1],0])
        
        # Prepare protein: asign titration states, flipping side chains of HIS, ASN and GLN; rotate some sidechains, optimize waters, etc.
        # Most of this is done with a HTMD function called proteinPrepare()
        # Skip step if we are working with curators structures
        prepared_mol = mol_aligned if curated else prepare_system(mol_aligned, pdbcode, thickness, gpcr_chain, sod2x50, aminergic, adenosine)
        
        #Add membrane
        print('Adding membrane...')
        membranemol = Molecule(membranepdb)
        mol_membraned, membrane_resnames, membrane_segids, xreps, yreps = add_membrane(prepared_mol, membranemol,prot_segids,membrane_distance)

        #Solvate
        print('Solvating...')
        mol_solvated = solvate_pdbmol(mol_membraned,membrane_segids,water_thickness,water_margin,buffer=buffer,coldist=coldist,prefix='WT')

        #Obtain extra parameters for ligands and modified residues 
        ligstreams=extra_parameters(name, ligandsdict, modresdict, blacklist, covligs, basepath)
        
        # Assignign terminology for cap atoms of protein chain, depending if it is the receptor protein or not
        caps = get_caps(prot_segids, mol_solvated)
        #e.g.: {'P0': ['first ACE', 'last CT3'], 'P1': ['first ACE', 'last CT3']}

        #Pre-build model
        print('Pre-build...')
        prebuildmol = charmm.build(mol_solvated, 
                                    topo=topos, 
                                    param=params,
                                    stream=streams+ligstreams,
                                    caps=caps,
                                    outdir=resultspath+'/pre-build/'+sysname,
                                    ionize=False,
                                    psfgen=psfgenpath)

        # Save prebuild model topologies in files, and  store prebuild model in molecule object
        prebuild_psffile = prebuildmol.topoloc
        prebuild_pdbfile = os.path.splitext(prebuildmol.topoloc)[0]+'.pdb'
        prebuildmol = Molecule(prebuild_pdbfile)
        _recoverProtonations(prebuildmol)

        # Checking of water/lipid ratio
        lipid_num = len(set(prebuildmol.get('resid',sel='segid '+membrane_lipid_segid)))
        solv_num = len(prebuildmol.get('index',sel='resname TIP3 and name OH2'))
        if float(solv_num) / lipid_num < 35:
            raise ValueError('Water/lipid ratio lower than 35.')

        #Renumber residues
        print('Renumbering...')
        mol_renumbered = renumber_resid_vmd(prebuildmol,'segid '+' '.join(membrane_segids),by=2)

        # Ionizing system
        print('Ionizing...')
        molbuilt = charmm.build(prebuildmol,
                                topo=topos, 
                                param=params,
                                stream=streams+ligstreams,                        
                                outdir=resultspath+'/ionize/'+sysname,
                                saltconc=0.15,
                                caps=caps,
                                psfgen=psfgenpath)

        build_psffile = molbuilt.topoloc
        build_pdbfile = os.path.splitext(molbuilt.topoloc)[0]+'.pdb'
        molbuilt = Molecule(build_pdbfile)
        _recoverProtonations(molbuilt)

        #Building system
        print('Building...')
        molbuilt = renumber_resid_vmd(molbuilt,'segid "WT.*" or segid I',by=2)
        molbuilt = charmm.build(molbuilt, 
                                topo=topos, 
                                param=params,
                                stream=streams+ligstreams,                        
                                outdir=resultspath+'/build/'+sysname,
                                ionize=False,
                                psfgen=psfgenpath)

        print('End of %s after %s seconds\n' % (sysname, time.time() - start_time))

    except Exception as e:
        print("model "+sysname+" could not be build because ",e)
        print(traceback.format_exc())


2025-03-07 16:07:40,542 - moleculekit.molecule - INFO - Removed 0 atoms. 2521 atoms remaining in the molecule.


Structure 4EJ4 already has a watered version. Skipping...


2025-03-07 16:07:40,836 - moleculekit.molecule - INFO - Removed 0 atoms. 2521 atoms remaining in the molecule.
2025-03-07 16:07:41,018 - moleculekit.molecule - INFO - Removed 0 atoms. 2521 atoms remaining in the molecule.
2025-03-07 16:07:41,115 - moleculekit.molecule - INFO - Removed 0 atoms. 2521 atoms remaining in the molecule.
2025-03-07 16:07:41,343 - moleculekit.molecule - INFO - Removed 0 atoms. 2521 atoms remaining in the molecule.
2025-03-07 16:07:46,314 - moleculekit.molecule - INFO - Removed 0 atoms. 2521 atoms remaining in the molecule.
You have used an unregistered copy of Chimera for 74 days.
You can either register now by visiting:
   http://www.cgl.ucsf.edu/cgi-bin/chimera_registration.py
or by choosing 'Registration...' from the 'Help' menu next
time you start Chimera with the gui enabled.

Registration is free.  By providing the information requested
you will be helping us document the impact this software is
having in the scientific community. The information you sup

Opening opm_mol.pdb...
Model 0 (opm_mol.pdb) appears to be a protein without secondary structure assignments.
Automatically computing assignments using 'ksdssp' and parameter values:
  energy cutoff -0.5
  minimum helix length 3
  minimum strand length 3
Use command 'help ksdssp' for more information.

Computing secondary structure assignments...
Computed secondary structure assignments (see reply log)
opm_mol.pdb opened
Opening mod_mol.pdb...
Model 1 (mod_mol.pdb) appears to be a protein without secondary structure assignments.
Automatically computing assignments using 'ksdssp' and parameter values:
  energy cutoff -0.5
  minimum helix length 3
  minimum strand length 3
Use command 'help ksdssp' for more information.

Computing secondary structure assignments...
Computed secondary structure assignments (see reply log)
mod_mol.pdb opened
Computing secondary structure assignments for model(s) #0, #1
using ksdssp (Kabsch and Sander Define Secondary Structure
of Proteins) with the paramet

2025-03-07 16:08:00,214 - moleculekit.tools.preparation - INFO - Forcing protonation of residue A:95 to ASP
2025-03-07 16:08:01,581 - moleculekit.tools.preparation - INFO - Skipping titration of residue ASP:A:95
2025-03-07 16:08:04,607 - moleculekit.tools.preparation - INFO - Modified residue CYS   121 A to CYX
2025-03-07 16:08:04,609 - moleculekit.tools.preparation - INFO - Modified residue HIS   152 A to HIE
2025-03-07 16:08:04,610 - moleculekit.tools.preparation - INFO - Modified residue CYS   198 A to CYX
2025-03-07 16:08:04,612 - moleculekit.tools.preparation - INFO - Modified residue HIS   278 A to HID
2025-03-07 16:08:04,613 - moleculekit.tools.preparation - INFO - Modified residue HIS   301 A to HID
2025-03-07 16:08:04,615 - moleculekit.tools.preparation - INFO - Modified residue TIP3    1 X to TIP
2025-03-07 16:08:04,617 - moleculekit.tools.preparation - INFO - Modified residue TIP3    2 X to TIP
2025-03-07 16:08:04,618 - moleculekit.tools.preparation - INFO - Modified residue

Adding membrane...


2025-03-07 16:08:06,603 - htmd.builder.builder - INFO - Replicating Membrane 3x3

2025-03-07 16:09:48,329 - moleculekit.molecule - INFO - Removed 2079 atoms. 41253 atoms remaining in the molecule.
2025-03-07 16:09:50,936 - moleculekit.molecule - INFO - Removed 132 residues from appended Molecule due to collisions.


Solvating...


2025-03-07 16:09:52,422 - htmd.builder.solvate - INFO - Using water pdb file at: /home/daranda/.conda/envs/sim_pipe/lib/python3.10/site-packages/htmd/share/solvate/wat.pdb


wataerbox Max and min:  [48.48986  49.81642  28.174822] [-46.144142 -46.31058  -27.620178]


2025-03-07 16:09:54,178 - htmd.builder.solvate - INFO - Replicating 8 water segments, 2 by 2 by 2

2025-03-07 16:10:46,256 - htmd.builder.solvate - INFO - 10309 water molecules were added to the system.
███████████████████████████████████████████████████████| 8/8 [00:49<00:00,  6.21s/it]

In [5]:
#########################
## Part 3: Equillibration
#########################

# Iterate by GPCRdb structures to simulate
for entry in input_dict:    
    name = entry['name']
    pdbcode = entry['pdbcode']
    apo = entry['apo']
    # Entry's data
    try:
        modelname = name+'_apo' if apo else name
        equildir = resultspath+'equil/'+modelname+'/'
        if os.path.exists(equildir+'output.xtc') or os.path.exists(equildir+'simrunning'):
            print(" structure %s already has been equilibrated" % modelname)
            continue

        if not os.path.exists(equildir):
            os.makedirs(equildir)

        # Define equillibration parameters
        md = define_equilibration(
            const_sel=const_sel,
            simtime = equil_simtime,
            minimize = minim_steps,
            timestep = equil_timestep, 
            temperature = temperature
            )
        md.write(resultspath+'build/'+modelname,equildir)

        #Substitute run.sh generated by HTMD by a different one, adapted to the specified path of ACEMD
        with open(equildir + 'run.sh', 'w') as f:
            f.write('#!/bin/bash\n%s > %slog.txt 2>&1' % (acemd_path, equildir))
    
        #Prepare slurm job  
        sq = SlurmQueue()
        sq.envvars = acemd_license
        sq.jobname = modelname+'_eql'
        sq.datadir = None
        sq.partition = 'gpu_gpcr'
        sq.ngpu = 1
        sq.ncpu = 1
        sq.prerun = job_commands(equildir, './'+modelname+'/')
        sq.memory = 2000
        
        # Submit
        sq.submit(equildir)
    except Exception as e:
        print("model "+modelname+" could not be send to equilibrate because of ",e)



model delta_opioid_naltrindole could not be send to equilibrate because of  Could not find required executable [sbatch]


In [3]:
#####################
## Part 4: Production
#####################

# Production protocol
md = define_production(timestep, trajperiod, temperature, prod_simtime)

# For each structure 
for entry in input_dict:    
    name = entry['name']
    pdbcode = entry['pdbcode']
    apo = entry['apo']
    # must match with equildir in equilibration launcher code and contain input and output of equilibration.
    modelname = name+'_apo' if apo else name
    equildir = '%s/equil/%s/' % (resultspath, modelname)
    if not os.path.exists(equildir):
        print("structure %s has not been yet equillibrated. Skipping...")
        continue
    for rep in range(1,repnum+1):
        
        try: 
            # If simulation for this PDB has already been run
            proddir='%sproduction/%s/rep_%d/' % (resultspath, modelname, rep)
            if os.path.exists(proddir+'/output.xtc') or os.path.exists(proddir+'simrunning'):
                print("replicate %d of structure %s already has been simulated" %(rep, modelname))
                continue

            print('submitting replicate %d of %s' % (rep, modelname))
            # directory copy output of equilibration to production input (initial working directory for run_prod.sh).
            md.write(equildir,proddir)

            sq = SlurmQueue()
            sq.envvars = acemd_license
            sq.jobname = modelname+'_pr'+str(rep)
            sq.datadir = None
            sq.partition = 'gpcr_gpu'
            sq.prerun = job_commands(proddir, './%s_pr_%d/'%(modelname,rep))
            sq.ngpu = 1
            sq.ncpu = 1

            #Substitute run.sh generated by HTMD by a different one, adapted to the specified path of ACEMD
            with open(proddir + 'run.sh', 'w') as f:
                f.write('#!/bin/bash\n%s >%slog.txt 2>&1' % (acemd_path,proddir))

            sq.submit(proddir)
            
        except Exception as e:
            print("model "+modelname+" could not be send to production because of ",e)


replicate 1 of structure metI already has been simulated
replicate 2 of structure metI already has been simulated
replicate 3 of structure metI already has been simulated
replicate 1 of structure metI_mut already has been simulated
replicate 2 of structure metI_mut already has been simulated
replicate 3 of structure metI_mut already has been simulated
replicate 1 of structure NitroI already has been simulated
replicate 2 of structure NitroI already has been simulated
submitting replicate 3 of NitroI


2022-01-10 09:23:50,693 - jobqueues.slurmqueue - INFO - Queueing /gpcr/users/daranda/doctorat/PSYBIAS_simulations/simulation_output/production/NitroI/rep_3/


submitting replicate 1 of NitroI_mut


2022-01-10 09:24:19,816 - jobqueues.slurmqueue - INFO - Queueing /gpcr/users/daranda/doctorat/PSYBIAS_simulations/simulation_output/production/NitroI_mut/rep_1/


submitting replicate 2 of NitroI_mut


2022-01-10 09:24:43,123 - jobqueues.slurmqueue - INFO - Queueing /gpcr/users/daranda/doctorat/PSYBIAS_simulations/simulation_output/production/NitroI_mut/rep_2/


submitting replicate 3 of NitroI_mut


2022-01-10 09:25:05,026 - jobqueues.slurmqueue - INFO - Queueing /gpcr/users/daranda/doctorat/PSYBIAS_simulations/simulation_output/production/NitroI_mut/rep_3/


replicate 1 of structure otava35 already has been simulated
replicate 2 of structure otava35 already has been simulated
replicate 3 of structure otava35 already has been simulated
replicate 1 of structure otava35_mut already has been simulated
replicate 2 of structure otava35_mut already has been simulated
replicate 3 of structure otava35_mut already has been simulated
replicate 1 of structure otava37 already has been simulated
replicate 2 of structure otava37 already has been simulated
replicate 3 of structure otava37 already has been simulated
replicate 1 of structure otava37_mut already has been simulated
replicate 2 of structure otava37_mut already has been simulated
replicate 3 of structure otava37_mut already has been simulated


In [None]:
##########################
## Part 5: Wrap Structures
##########################

# Wrap trajectories obtained during production with an htmd command
prot_sel = "protein and segid P P0 P1 P2 P3 P4 P5 P6 P7 P8 P9" #Every chain in our system is assigned a PX segid. I dont think there will ever be more than 10
for entry in input_dict:    
    name = entry['name']
    pdbcode = entry['pdbcode']
    isgpcr = entry['isgpcr']
    prot_chain = entry['prot_chain']

    # Create a transmembrane selection (for alignment) 
    topopath = '%sproduction/%s/rep_1/structure.pdb' % (resultspath, modelname)
    transmem_sel = transmem_atoms(strucpath)
        
    modelname = name
    simpath = '%sproduction/%s/' % (resultspath, modelname)
    # Skip if all replicates are ready
    alldone=True
    for rep in range(1,repnum+1):
        trajpath = '%s/rep_%d/output_wrapped.xtc' % (simpath, rep)
        if not os.path.exists(trajpath):
            alldone=False
    if alldone:
        print("All replicates in dystem %s already wrapped. Skipping...."%(modelname))
        continue

    try:
        # Wrap individual replicates one by one
        pool = mp.Pool(3)
        for rep in range(1,repnum+1):
            
            trajpath = '%s/rep_%d/output.xtc' % (simpath, rep)
            if not os.path.exists(trajpath):
                print("System %s_%d not yet simulated. Skipping...."%(modelname,rep))
                continue
            
            outrajpath = '%s/rep_%d/output_wrapped' % (simpath, rep)
            if os.path.exists(outrajpath+'.xtc'):
                print("System %s_%d already wrapped. Skipping...."%(modelname,rep))
                continue
            
            # Wrap and align
            print("Wrapping and aligning %s..."% modelname)
            x = pool.apply_async(wrap_alig_vmd, args=(topopath, strucpath, trajpath, outrajpath, prot_sel, transmem_sel))
            print(x.get()) # Print errors

        pool.close()
        pool.join() 


    except Exception as e:
        print("System %s could not be wrapped because of %s" % (modelname,e))

wrapping replicate 1 of metI
replicate already wrapped. Skipping...
wrapping replicate 2 of metI
replicate already wrapped. Skipping...
wrapping replicate 3 of metI
replicate already wrapped. Skipping...
End of metI after 0.014490127563476562 seconds

wrapping replicate 1 of metI_mut
replicate already wrapped. Skipping...
wrapping replicate 2 of metI_mut
replicate already wrapped. Skipping...
wrapping replicate 3 of metI_mut
replicate already wrapped. Skipping...
End of metI_mut after 0.011120319366455078 seconds

wrapping replicate 1 of NitroI
replicate already wrapped. Skipping...
wrapping replicate 2 of NitroI
replicate already wrapped. Skipping...
wrapping replicate 3 of NitroI
replicate already wrapped. Skipping...
End of NitroI after 0.010163545608520508 seconds

wrapping replicate 1 of NitroI_mut
replicate already wrapped. Skipping...
wrapping replicate 2 of NitroI_mut
replicate already wrapped. Skipping...
wrapping replicate 3 of NitroI_mut
replicate already wrapped. Skipping..

  covariance = np.dot(P.T, Q)
  all1 = np.dot(all1, rot.T)


In [8]:
#####################
## EXTRA: ligand RMSD
#####################
### Assess quality of systems by calculating the RMSD of the ligand/s molecule/s

# Set paths and files
outpath = resultspath+'rmsd_lig_probe.tsv'
if os.path.exists(outpath):
    print('RMSDs already computed. SKipping...')
else:

    # Iterate over pdbcodes 
    results = []
    for entry in input_dict:    
        name = entry['name']
        pdbcode = entry['pdbcode']
        # Iterate over production replicates
        for mytrajid in ["1","2","3"]:
            
            try:
                sysname = name

                # Input files of simulation
                files_path = resultspath+'production/%s/rep_%s/'%(name, mytrajid)
                mypdbpath = files_path+'structure.pdb'
                mypsfpath = files_path+'structure.psf'
                mytrajpath = files_path+'output_wrapped.xtc'

                # Skip if no trajectory
                if not os.path.exists(mytrajpath):
                    print("no trajectory replicate %s for system %s avalible. Skipping..."%(mytrajid, sysname))
                    continue

                print('computing ligand RMSD for trajectory %s of system %s' % (mytrajid, sysname))

                # Load trajectory and topology into MDA universe, and select protein atoms
                u = mda.Universe(mypsfpath, mytrajpath)
                ligsel = u.select_atoms("segid LIG and not resname CLR")

                # Compute rmsd, extract its values and put them in corresponding lists and dicts
                R = rms.RMSD(ligsel)
                R.run()
                rmsd = np.mean([ a[2] for a in R.rmsd ])
                results.append((sysname,rmsd)) 

            except Exception as e:
                print("error: system %s failed becasue %s"%(sysname,e))

    # Once everything is done, sort and write RMSD results
    out = open(outpath,'w')
    out.write("Simulated_system\tRMSD_ligand\n")
    results_sorted = sorted(results, key=lambda tup: tup[0])
    for line in results_sorted:
        out.write("%s\t%f\n"%(line[0],line[1])) 
    out.close()

computing ligand RMSD for trajectory 1 of system metI


2021-12-30 15:51:14,957 - MDAnalysis.analysis.rmsd - INFO - RMS calculation for 0 atoms.
2021-12-30 15:51:14,957 - MDAnalysis.analysis.base - INFO - Choosing frames to analyze
2021-12-30 15:51:14,958 - MDAnalysis.analysis.base - INFO - Starting preparation


ZeroDivisionError: float division

In [None]:
#################################
## Submit simulation into GPCRmd
#################################
mainurl = 'https://www.gpcrmd.org'

# Load/create json file for submission ids
subm_file = basepath+'subm_ids.json'
dyn_file = basepath+'dyn_ids.json'
subm_dict = json_dict(subm_file) if os.path.exists(subm_file) else {}
dyn_dict = json_dict(dyn_file) if os.path.exists(dyn_file) else {}

# For each of the currently-working-with systems defined in Part 1
# for pdbcode in pdb_set:
for entry in input_dict:    

    try:
        name = entry['name']
        pdbcode = entry['pdbcode']
        modelpath = basepath+entry['pdbfile']
        apo = not len(entry['ligands'])

        # Set the path to the folder containing this simulation's files
        sysname = name
        prodpath = resultspath+'production/'+name+'/'
        repath = prodpath+'rep_1/'# For files no changing across representations

        # Skip if not all trajectories    
        incomplete = check_completeness(prodpath)
        if incomplete:
            continue            

        # Skip already submited
        if sysname in subm_dict:
            print('system %s already submitted. Skipping'%sysname)
            subm_id = subm_dict[sysname]
            continue
        
        # Load molecule
        mymol = Molecule(repath+'structure.pdb')
        print('Submitting '+sysname+' simulation...')        
        
        ## Step -2: Login into GPCRmd
        with requests.Session() as s:
            login(s)

        ## Step -1: Get information of protein chains and ligand molecules from PDB web
        (protdict,ligdict,segtochain,method_id) = get_pdb_info(pdbcode, mymol, entry['ligands'])
        dynname = find_dyn_name(protdict,pdbcode,ligdict,apo)

        ## Step 0: Create a new submission
        subm_id = new_submission(s, mainurl)

        ## Step 1: General information
        new_step1(s, subm_id, dynname, pdbcode, trajperiod, timestep, repath, modelpath, method_id, apo)

        ## Step 2: Small molecules 
        new_step2(s, subm_id, ligdict, basepath)   

        ## Step 3: Protein chains
        new_step3(s, subm_id, protdict)

        ## Step 4: Dynamics information
        new_step4(s, subm_id, prodpath, repath)

        ## Step 5: Provisional references
        new_step5(s, subm_id, 'Your_code_here',00000000)
        subm_dict[sysname] = subm_id

        ## Get Dynid and save it
        dynid = dyn_dict[sysname] if sysname in dyn_dict else get_dynid(subm_id)

    except Exception as e:
        print("Simulation "+sysname+" could not be submitted because ",e)            
        print(traceback.format_exc())


# Save update submson ids dictionary
with open(subm_file, 'w') as jsonfile:
    json.dump(subm_dict, jsonfile, ensure_ascii=False, indent = 4)            

# Save update dynids dictionary
with open(dyn_file, 'w') as jsonfile:
    json.dump(dyn_dict, jsonfile, ensure_ascii=False, indent = 4)            


In [None]:
# Check if GPCRmd's workbench and report open properly in your submission
dyn_file = basepath+'dyn_ids.json'
dyn_dict = json_dict(dyn_file) if os.path.exists(dyn_file) else {}
for dyn_id in dyn_dict.values(): 
    workbench_reponse = requests.get('https://www.gpcrmd.org/view/'+str(dyn_id))
    report_reponse = requests.get('https://www.gpcrmd.org/dynadb/dynamics/id/'+str(dyn_id))                       
    if not (workbench_reponse.ok and report_reponse.ok):
        print("Dyn_id: ",dyn_id)
        print("Workbench: ", workbench_reponse)
        print("Report: ", report_reponse)
        print("\n")
