In [10]:
####################
## Initial stuff
####################

# Load functions, modules and global variables required in our pipeline
from simulate_structures_functions import *
from importlib import reload  
from htmd.builder.charmm import _recoverProtonations
%load_ext autoreload
%autoreload 2

# Are we running curated structures??
curated = False

# Paramchem passwords
username = 'paramoid'
# username = 'ameboid'
#password = 'euglenoid-123'
#password = 'stendoroid-123'
password = 'paramoid-123'


# PDB codes of the GPCRs to be simulated. 
# If no codes are provided, all avalible structures in GPCRdb will be used (except the ones already simulated)
pdb_set = {'6WGT','7WC7'}

# Our main path
basepath = basepath = os.getcwd()+'/'
# Other Paths
strucpath = basepath + 'data_structures/'
resultspath = basepath + 'simulation_output/'
membranepdb = basepath + 'membrane/popc36_box_renumbered.pdb'
topparpath = basepath + 'toppar/TOP_PARAMS_ACE3/'#toppar= topology+parameters
ligandsdict_path = basepath + 'ligands.json'
modres_path = basepath + 'modified_residues.json'
slurmpath = basepath+'fake_slurm/'
# Path to slurm queing system binaries
# In our case, Ismael designed a bunch of small bash scripts (fake_slurm) which do ssh to Hydra and execute slurm there
path= os.environ['PATH']

# Assign a file to each pdbcode (in this case, the downloaded refined PDB structures)
struc_dict = { pdbcode : "%s%s/%s_refined.pdb"%(strucpath,pdbcode,pdbcode) for pdbcode in pdb_set }

# Modify path to include fake slurm
%env PATH=$path:$slurmpath

# Load topology, parameter and stream files with our current basepath
topos = [os.path.join(topparpath,file) for file in toposfilenames] 
params = [os.path.join(topparpath,file) for file in paramsfilenames]
streams = [os.path.join(topparpath,file) for file in streamsfilenames]


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
env: PATH=/soft/system/easybuild/software/Miniconda3/4.7.10:/soft/system/easybuild/software/Miniconda3/4.7.10/bin:/home/daranda/miniconda3/bin:/home/daranda/miniconda3/condabin:/soft/EB_repo/bio/structure/programs/noarch/Chimera/1.16/bin:/soft/EB_repo/bio/structure/programs/foss/2016b/VMD/1.9.4a9/bin:/soft/system/lua/bin:/soft/system/luarocks/bin:/usr/lib64/qt-3.3/bin:/soft/system/lua/bin:/soft/system/luarocks/bin:/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin:/opt/puppetlabs/bin:/home/daranda/.local/bin:/home/daranda/bin:/home/daranda/bin:/home/daranda/software/getcontacts-master:/gpcr/users/daranda/doctorat/simulate_6WGT_7WC7/fake_slurm/:/gpcr/users/daranda/doctorat/simulate_6WGT_7WC7/fake_slurm/


In [2]:
################################################
# Part 1: Download data and prepare dictionaries
################################################

# Download and store refined structures from GPCRdb
download_GPCRdb_structures(pdb_set, strucpath)

#Create or moidfy the ligands dictionary
(ligandsdict, ligandsset, modresdict) = ligand_dictionary(struc_dict, ligandsdict_path, modres_path, basepath, blacklist)

# Get topology-parameter files for ligands
get_lig_toppar(ligandsdict, basepath, username, password)

# Get topology-parameter files for modified residues
get_modres_toppar(modresdict, basepath, username, password)

Downloading 6WGT structure (1/2)
Structure for 6WGT already present. Skipping...
Downloading 7WC7 structure (2/2)
Structure for 7WC7 already present. Skipping...
Getting toppar file for ligand H8G 
legacy toppar for ligand H8G already exists. Skipping...
latest toppar for ligand H8G already exists. Skipping...
Getting toppar file for ligand 7LD 
legacy toppar for ligand 7LD already exists. Skipping...
latest toppar for ligand 7LD already exists. Skipping...


In [55]:
###########################
## Part 2: Build the models 
###########################
# Iterate by GPCRdb structures to simulate
pdbs_number = len(pdb_set)
i = 0
for (pdbcode,pdbfile) in struc_dict.items():
    
    # Skip if input pdbfile is not yet avaliable
    if not os.path.exists(pdbfile):
        print('file %s not avaliable. Skipping system...'%pdbfile)
        continue
#     try:
    for apo in [False]:

        #Starting simulation
        start_time = time.time()        
        i += 1
        sysname = pdbcode+'_apo' if apo else pdbcode
        mystrucpath = strucpath+pdbcode+'/'

        # Skip if there is already a model build for this
        if os.path.exists(resultspath+'build/'+sysname+'/structure.pdb'):
            print('Build model for '+sysname+' already exists. Skipping...')
            continue
        else:
            print("Building model for %s ..."%pdbfile)

        # Check if simulation is aminergic
        aminergic = gpcrdb_dict[pdbcode]['family'].startswith('001_001')
        adenosine = gpcrdb_dict[pdbcode]['family'].startswith('001_006_001')

        ## Load curated structures
        if curated:
            apo_name = "apo" if apo else "complex"
            curated_filename = str("%sreceptor2curate_output/%s/%s_%s_curated.pdb"%(basepath, pdbcode, pdbcode, apo_name))
            gpcrdb_mol = Molecule(curated_filename)
            # Remove ligand hidrogens in case curators put any
            gpcrdb_mol.remove("not (protein or water or lipid) and element H")
        ## Load non-curated structure
        else:
            (sod2x50, watered_filename) = internal_waters(pdbfile, pdbcode, gpcrdb_dict, apo)
            gpcrdb_mol = Molecule(watered_filename)

        # Add peptide ligand to structure if necessary
        if any([ True for lig in gpcrdb_dict[pdbcode]['ligands'] if lig['type'] in {'protein', 'peptide'} ]) and not (apo):
            add_peplig(watered_filename, pdbcode)

        # SKip complex if is natural apoform (AKA: has no other proteins beside the receptor)
        if (not apo) and (len(set(gpcrdb_mol.get('chain','protein')))<1):
            print('System %s is natural apoform. Skipping complex sim...'%sysname)
            continue

        # Remove unnecessary ligand molecules: mostly crystalization detergents, quelants, buffers,
        # or post-traductional glicosilations
        gpcrdb_mol.remove('resname '+' '.join(blacklist))

        # Remove 2x50Sodium from non-A-class GPCRs
        if not gpcrdb_dict[pdbcode]['family'].startswith('001'):
            gpcrdb_mol.remove('element NA')

        # Get aligned OPM structure
        thickness,opm_mol = get_opm(pdbcode)

        # Ismael's function to add labels (segid) for 'ligand' and 'protein' parts of the system
        gpcrdb_mol_fixed,prot_segids = fix_and_prepare_input(gpcrdb_mol,pdbcode,modresdict,new_pdb_chain)

        # If the pipeline is running in 'apoform mode', remove any non-protein, non-ion, non-water thing on the system      
        # Delete also non-receptor proteins
        # If there's any, parameterize and rename covalent-bound ligands
        if apo:
            (mod_mol,prot_segids) = make_apo(gpcrdb_mol_fixed,'R')    
            covligs = []
        else:
            (mod_mol, covligs) = covalent_ligands(gpcrdb_mol_fixed, pdbcode, ligandsdict)

        # Align structrues using sequences, and take first one
        rec_segid = mod_mol.get('segid','chain R')[0]
        alignment_results = sequenceStructureAlignment(mod_mol, opm_mol, rec_segid, maxalignments = 1)
        mol_aligned = alignment_results[0]

        #Center to receptor XY
        center = np.mean(mol_aligned.get('coords',sel='chain R'),axis=0)
        mol_aligned.moveBy([-center[0],-center[1],0])

        # Prepare protein: asign titration states, flipping side chains of HIS, ASN and GLN; rotate some sidechains, optimize waters, etc.
        # Most of this is done with a HTMD function called proteinPrepare()
        # Skip step if we are working with curators structures
        prepared_mol = mol_aligned if curated else prepare_system(mol_aligned, pdbcode, thickness, sod2x50, aminergic, adenosine)

        #Add membrane
        print('Adding membrane...')
        membranemol = Molecule(membranepdb)
        mol_membraned, membrane_resnames, membrane_segids, xreps, yreps = add_membrane(prepared_mol, membranemol,prot_segids,membrane_distance)

        # Needed later for equilibration
        with open(mystrucpath+"const_sel.txt",'w') as out: 
            const_sel = 'segid '+' '.join(prot_segids)+' and name C CA N O or not (segid ' + \
              ' '.join(prot_segids)+' or lipid or water or ions ) and noh or segid ION WAT and noh'
            out.write(const_sel)

        #Solvate
        print('Solvating...')
        mol_solvated = solvate_pdbmol(mol_membraned,membrane_segids,water_thickness,water_margin,buffer=buffer,coldist=coldist,prefix='WT')

        # Check if system has lone-pair hallogen atoms. If it does, use legacy CGenFF parameters
        (cgenff_par, cgenff_top, has_halo) = cgenff_params(mol_solvated, topparpath)

        #Obtain extra parameters for ligands and modified residues 
        ligstreams=extra_parameters(pdbcode, ligandsdict, modresdict, blacklist, covligs, basepath, has_halo)

        # Assignign terminology for cap atoms of protein chain, depending if it is the receptor protein or not
        caps = get_caps(prot_segids, mol_solvated)
        #{'P0': ['first ACE', 'last CT3'], 'P1': ['first ACE', 'last CT3']}

        #Pre-build model
        print('Pre-build...')
        prebuildmol = charmm.build(mol_solvated, 
                                   topo=topos+cgenff_top, 
                                   param=params+cgenff_par,
                                   stream=streams+ligstreams,
                                   caps=caps,
                                   outdir=resultspath+'/pre-build/'+sysname,
                                   ionize=False)

        # Save prebuild model topologies in files, and  store prebuild model in molecule object
        prebuild_psffile = prebuildmol.topoloc
        prebuild_pdbfile = os.path.splitext(prebuildmol.topoloc)[0]+'.pdb'
        prebuildmol = Molecule(prebuild_pdbfile)
        _recoverProtonations(prebuildmol)

        # Checking of aromatic insertions (takes quite a lot fo time)
        print('Checking aromatic insertions...')
#             mol_removed,removed_indexes = remove_aromatic_insertions(prebuildmol,prot_segids, outpdb=resultspath+'/pre-build/'+sysname+'/aromatic_check.pdb')

        # Checking of water/lipid ratio
        lipid_num = len(set(prebuildmol.get('resid',sel='segid '+membrane_lipid_segid)))
        solv_num = len(prebuildmol.get('index',sel='resname TIP3 and name OH2'))
        if float(solv_num) / lipid_num < 35:
            raise ValueError('Water/lipid ratio lower than 35.')

        #Renumber residues
        print('Renumbering...')
        mol_renumbered = renumber_resid_vmd(prebuildmol,'segid '+' '.join(membrane_segids),by=2)

        # Ionizing system
        print('Ionizing...')
        molbuilt = charmm.build(prebuildmol,
                                topo=topos+cgenff_top, 
                                param=params+cgenff_par,
                                stream=streams+ligstreams,                        
                                outdir=resultspath+'/ionize/'+sysname,
                                saltconc=0.15,
                                caps=caps)
        build_psffile = molbuilt.topoloc
        build_pdbfile = os.path.splitext(molbuilt.topoloc)[0]+'.pdb'
        molbuilt = Molecule(build_pdbfile)
        _recoverProtonations(molbuilt)

        # Remove clashing rebuild waters
        molbuilt.remove("same residue as (water and within 1 of protein)")

        #Building system
        print('Building...')
        molbuilt = renumber_resid_vmd(molbuilt,'segid "WT.*" or segid I',by=2)
        molbuilt = charmm.build(molbuilt, 
                                topo=topos+cgenff_top, 
                                param=params+cgenff_par,
                                stream=streams+ligstreams,                        
                                outdir=resultspath+'/build/'+sysname,
                                caps=caps,ionize=False)

        print('End of %s after %s seconds\n' % (sysname, time.time() - start_time))

#     except Exception as e:
#         print("model "+pdbcode+" could not be build because ",e)

Build model for 7WC7 already exists. Skipping...
Building model for /gpcr/users/daranda/doctorat/simulate_6WGT_7WC7/data_structures/6WGT/6WGT_refined.pdb ...
Structure 6WGT already has a watered version. Skipping...


2022-08-22 17:58:48,261 - moleculekit.molecule - INFO - Removed 20 atoms. 2562 atoms remaining in the molecule.
2022-08-22 17:58:50,592 - moleculekit.molecule - INFO - Removed 0 atoms. 2562 atoms remaining in the molecule.
2022-08-22 17:58:50,646 - moleculekit.molecule - INFO - Removed 0 atoms. 2562 atoms remaining in the molecule.
2022-08-22 17:58:50,800 - moleculekit.molecule - INFO - Removed 0 atoms. 2562 atoms remaining in the molecule.
2022-08-22 17:58:50,952 - moleculekit.molecule - INFO - Removed 0 atoms. 2562 atoms remaining in the molecule.
2022-08-22 17:58:51,290 - moleculekit.molecule - INFO - Removed 0 atoms. 2562 atoms remaining in the molecule.
2022-08-22 17:58:51,465 - moleculekit.tools.autosegment - INFO - Created segment P0 between resid 64 and 277.
2022-08-22 17:58:51,466 - moleculekit.tools.autosegment - INFO - Created segment P1 between resid 307 and 402.
2022-08-22 17:58:55,960 - moleculekit.readers - INFO - Attempting PDB query for 6WGT
2022-08-22 17:59:02,505 - m


---- Molecule chain report ----
Chain L:
    First residue: 7LD:1:
    Final residue: 7LD:1:
Chain R:
    First residue: SER:64:
    Final residue: ASN:402:
Chain X:
    First residue: TIP3:1:
    Final residue: TIP3:86:
---- End of chain report ----



2022-08-22 17:59:28,947 - moleculekit.tools.preparationdata - INFO - The following residues are in a non-standard state: HIS    70  A (HID), CYS   148  A (CYX), HIS   165  A (HID), HIS   182  A (HID), HIS   183  A (HID), CYS   227  A (CYX), CYS   349  R (CYX), CYS   353  R (CYX)
2022-08-22 17:59:30,522 - pdb2pqr.src.routines - INFO - In residue SER A 64 removing atom H instead of H1
2022-08-22 17:59:30,530 - pdb2pqr.src.routines - INFO - In residue THR R 307 removing atom H instead of H1


Adding membrane...


2022-08-22 17:59:52,967 - htmd.builder.builder - INFO - Replicating Membrane 3x3
Replicating Membrane: 100%|██████████| 9/9 [00:06<00:00,  1.33it/s]
2022-08-22 18:03:36,647 - moleculekit.molecule - INFO - Removed 1831 atoms. 49420 atoms remaining in the molecule.
2022-08-22 18:03:42,892 - moleculekit.molecule - INFO - Removed 110 residues from appended Molecule due to collisions.


Solvating...
wataerbox Max and min:  [55.84947  55.255943 28.081614] [-49.753532 -50.354057 -27.713387]


2022-08-22 18:03:44,989 - htmd.builder.solvate - INFO - Using water pdb file at: /soft/system/easybuild/software/Miniconda3/4.7.10/lib/python3.6/site-packages/htmd/share/solvate/wat.pdb
2022-08-22 18:03:47,247 - htmd.builder.solvate - INFO - Replicating 8 water segments, 2 by 2 by 2
Solvating: 100%|██████████| 8/8 [00:25<00:00,  3.13s/it]
2022-08-22 18:04:17,778 - htmd.builder.solvate - INFO - 17710 water molecules were added to the system.
2022-08-22 18:04:38,471 - moleculekit.molecule - INFO - Removed 885 atoms. 101079 atoms remaining in the molecule.


Pre-build...


2022-08-22 18:04:53,795 - htmd.builder.charmm - INFO - Writing out segments.
2022-08-22 18:05:12,911 - htmd.builder.builder - INFO - 2 disulfide bonds were added


Disulfide Bond between: UniqueResidueID<resname: 'CYX', chain: 'A', resid: 148, insertion: '', segid: 'P0'>
                   and: UniqueResidueID<resname: 'CYX', chain: 'A', resid: 227, insertion: '', segid: 'P0'>

Disulfide Bond between: UniqueResidueID<resname: 'CYX', chain: 'R', resid: 349, insertion: '', segid: 'P1'>
                   and: UniqueResidueID<resname: 'CYX', chain: 'R', resid: 353, insertion: '', segid: 'P1'>



2022-08-22 18:05:14,313 - htmd.builder.charmm - INFO - Starting the build.
2022-08-22 18:05:16,399 - htmd.builder.charmm - INFO - Finished building.


Checking aromatic insertions...
Renumbering...




Ionizing...


2022-08-22 18:07:10,141 - htmd.builder.charmm - INFO - Writing out segments.
2022-08-22 18:07:29,527 - htmd.builder.builder - INFO - 2 disulfide bonds were added


Disulfide Bond between: UniqueResidueID<resname: 'CYS', chain: 'A', resid: 148, insertion: '', segid: 'P0'>
                   and: UniqueResidueID<resname: 'CYS', chain: 'A', resid: 227, insertion: '', segid: 'P0'>

Disulfide Bond between: UniqueResidueID<resname: 'CYS', chain: 'R', resid: 349, insertion: '', segid: 'P1'>
                   and: UniqueResidueID<resname: 'CYS', chain: 'R', resid: 353, insertion: '', segid: 'P1'>



2022-08-22 18:07:30,924 - htmd.builder.charmm - INFO - Starting the build.
2022-08-22 18:07:33,008 - htmd.builder.charmm - INFO - Finished building.
2022-08-22 18:07:43,889 - htmd.builder.ionize - INFO - Adding 10 anions + 0 cations for neutralizing and 124 ions for the given salt concentration.
2022-08-22 18:09:49,832 - htmd.builder.charmm - INFO - Writing out segments.
2022-08-22 18:10:11,787 - htmd.builder.charmm - INFO - Starting the build.
2022-08-22 18:10:13,893 - htmd.builder.charmm - INFO - Finished building.
2022-08-22 18:11:38,241 - moleculekit.molecule - INFO - Removed 15 atoms. 101012 atoms remaining in the molecule.


Building...


2022-08-22 18:12:53,806 - htmd.builder.charmm - INFO - Writing out segments.
2022-08-22 18:13:14,067 - htmd.builder.builder - INFO - 2 disulfide bonds were added


Disulfide Bond between: UniqueResidueID<resname: 'CYS', chain: 'A', resid: 148, insertion: '', segid: 'P0'>
                   and: UniqueResidueID<resname: 'CYS', chain: 'A', resid: 227, insertion: '', segid: 'P0'>

Disulfide Bond between: UniqueResidueID<resname: 'CYS', chain: 'R', resid: 349, insertion: '', segid: 'P1'>
                   and: UniqueResidueID<resname: 'CYS', chain: 'R', resid: 353, insertion: '', segid: 'P1'>



2022-08-22 18:13:15,462 - htmd.builder.charmm - INFO - Starting the build.
2022-08-22 18:13:17,541 - htmd.builder.charmm - INFO - Finished building.


End of 6WGT after 987.0957036018372 seconds



In [56]:
#########################
## Part 3: Equillibration
#########################

for pdbcode in pdb_set:
# for pdbcode in ['5XR8']:
    for apo in [False, True]:
        try:
            modelname = pdbcode+'_apo' if apo else pdbcode
            strucfile = resultspath+'build/'+modelname+'/structure.pdb'
            equildir = resultspath+'equil/'+modelname+'/'
            if not os.path.exists(strucfile):
                print("system %s has not been build yet. Skipping..." % modelname)
                continue

            if os.path.exists(equildir+'output.xtc') or os.path.exists(equildir+'simrunning'):
                print(" structure %s already has been equilibrated" % pdbcode)
                continue

            if not os.path.exists(equildir):
                os.makedirs(equildir)

            md = define_equilibration()
            md.write(resultspath+'build/'+modelname,equildir)

            #Substitute run.sh generated by HTMD by a different one, adapted to the specified path of ACEMD
            with open(equildir + 'run.sh', 'w') as f:
                f.write('#!/bin/bash\n%s > %slog.txt 2>&1' % (acemd_path, equildir))
        
            #Prepare slurm job  
            sq = SlurmQueue()
            sq.envvars = acemd_license
            sq.jobname = 'eql_'+pdbcode
            sq.datadir = None
            sq.partition = 'gpcr_gpu'
            sq.ngpu = 1
            sq.ncpu = 1
            sq.prerun = job_commands(equildir, '/home/daranda/'+modelname+'/')
            sq.memory = 2000
#             sq.exclude = ['arwen','aragorn','bifur']
            sq.nodelist = ['aragorn']
    
            # Submit
            sq.submit(equildir)
            
        except Exception as e:
            print("model "+modelname+" could not be send to equilibrate because of ",e)

 structure 7WC7 already has been equilibrated
system 7WC7_apo has not been build yet. Skipping...


2022-08-22 18:15:18,596 - jobqueues.slurmqueue - INFO - Queueing /gpcr/users/daranda/doctorat/simulate_6WGT_7WC7/simulation_output/equil/6WGT/


system 6WGT_apo has not been build yet. Skipping...


In [3]:
#####################
## Part 4: Production
#####################

# Production protocol
md = define_production(timestep, trajperiod)

# If some model should be skipped, put its name here
modelname_skip = {}

# For each PDB 
# for pdbcode in ['4EJ4','6LRY']:
for pdbcode in pdb_set:
    for apo in [False]:
#     for apo in [True, False]:
        # must match with equildir in equilibration launcher code and contain input and output of equilibration.
        modelname = pdbcode+'_apo' if apo else pdbcode
        equildir = '%s/equil/%s/' % (resultspath, modelname)
        # IF equillibration is not avalible
        if not os.path.exists(equildir+'output.xtc'):
            print("replicate %d of structure %s has no avalible equillibration" %(rep, pdbcode))
            continue
        for rep in range(1,repnum+1):
            
            try: 
                    
                # If simulation for this PDB has already been run
                proddir='%sproduction/%s/rep_%d/' % (resultspath, modelname, rep)
                if os.path.exists(proddir+'/output.xtc') or os.path.exists(proddir+'simrunning'):
                    print("replicate %d of structure %s already has been simulated" %(rep, pdbcode))
                    continue

                print('submitting replicate %d of %s' % (rep, pdbcode))
                # directory copy output of equilibration to production input (initial working directory for run_prod.sh).
                md.write(equildir,proddir)

                sq = SlurmQueue()
                sq.envvars = acemd_license
                sq.jobname = modelname+'_pr'+str(rep)
                sq.datadir = None
                sq.partition = 'gpcr_gpu'
                sq.prerun = job_commands(proddir, '/home/daranda/%s_pr_%d/'%(modelname,rep))
                sq.ngpu = 1
                sq.ncpu = 1
                sq.exclude = ['aragorn','bifur']

                #Substitute run.sh generated by HTMD by a different one, adapted to the specified path of ACEMD
                with open(proddir + 'run.sh', 'w') as f:
                    f.write('#!/bin/bash\n%s >%slog.txt 2>&1' % (acemd_path,proddir))

                sq.submit(proddir)
                
            except Exception as e:
                print("model "+modelname+" could not be send to equilibrate because of ",e)            


replicate 1 of structure 6WGT already has been simulated
replicate 2 of structure 6WGT already has been simulated
replicate 3 of structure 6WGT already has been simulated
replicate 1 of structure 7WC7 already has been simulated
replicate 2 of structure 7WC7 already has been simulated
replicate 3 of structure 7WC7 already has been simulated


In [None]:

##########################
## Part 5: Wrap Structures
##########################

# Wrap trajectories obtained during production with an htmd command
gpcr_sel = "protein and chain "+new_pdb_chain
for pdbcode in pdb_set:
    
    # Get standard GPCR nomenclature
    (gennum_dict,resid_dict) = find_gennum(pdbcode)

    # Find residues from helices
    resids_helices = resids_helix(gennum_dict)    
    
    for apo in [False]:
#     for apo in [True, False]:
        try:
            modelname = pdbcode+'_apo' if apo else pdbcode
            mymol_pdb = Molecule('%sproduction/%s/rep_1/structure.pdb' % (resultspath, modelname))
            for rep in range(1,repnum+1):
            #for rep in [3]:
                start_time = time.time()        
                print('wrapping replicate %d of %s' % (rep, modelname))
                proddir='%sproduction/%s/rep_%d/' % (resultspath, modelname, rep)
                rep = str(rep)

                # To avoid repeating wrapping in Trajectories already wrapped, check the existance of this file
                outname = proddir+'output_wrapped.xtc'
                if os.path.exists(outname):
                    print('replicate already wrapped. Skipping...')
#                     continue
                
                # Wrap system
                mymol = Molecule('%sproduction/%s/rep_1/structure.psf' % (resultspath, modelname))
                mymol.read(proddir+'output.xtc')
                mymol.wrap(gpcr_sel)

                # Align frames and write
                mymol.align('segid P P0 P1 P2 P3 P4 and resid '+resids_helices, refmol=mymol_pdb)
                mymol.write(outname)

            print('End of %s after %s seconds\n' % (modelname, time.time() - start_time))
        except Exception as e:
            print("model "+modelname+" could not be wrapped because of ",e)            






wrapping replicate 1 of 6WGT
replicate already wrapped. Skipping...


  covariance = np.dot(P.T, Q)
  all1 = np.dot(all1, rot.T)


wrapping replicate 2 of 6WGT
replicate already wrapped. Skipping...
wrapping replicate 3 of 6WGT
replicate already wrapped. Skipping...
End of 6WGT after 144.42993021011353 seconds

wrapping replicate 1 of 7WC7
replicate already wrapped. Skipping...
wrapping replicate 2 of 7WC7
replicate already wrapped. Skipping...
wrapping replicate 3 of 7WC7
replicate already wrapped. Skipping...


In [None]:

#####################
## EXTRA: ligand RMSD
#####################
### Assess quality of systems by calculating the RMSD of the ligand/s molecule/s

# Set paths and files
outpath = resultspath+'rmsd_lig.tsv'
if os.path(outpath):
    print('RMSDs already computed. SKipping...')
else:
    # Iterate over pdbcodes 
    results = []
    for pdbcode in pdbcodes: 

        # Iterate over production replicates
        for mytrajid in ["1","2","3"]:

            try:
                sysname = "%s_rep%s"%(pdbcode,mytrajid)

                # Input files of simulation
                files_path = resultspath+'production/%s/rep_%s/'%(name, mytrajid)
                mypdbpath = files_path+'structure.pdb'
                mypsfpath = files_path+'structure.psf'
                mytrajpath = files_path+'output_wrapped.xtc'

                # Skip if no trajectory
                if not os.path.exists(mytrajpath):
                    print("no trajectory replicate %s for system %s avalible. Skipping..."%(mytrajid, sysname))
                    continue

                print('computing ligand RMSD for trajectory %s of system %s' % (mytrajid, sysname))

                # Load trajectory and topology into MDA universe, and select protein atoms
                u = mda.Universe(mypsfpath, mytrajpath)
                ligsel = u.select_atoms("segid LIG and not resname CLR")

                # Compute rmsd, extract its values and put them in corresponding lists and dicts
                R = rms.RMSD(ligsel)
                R.run()
                rmsd = np.mean([ a[2] for a in R.rmsd ])
                results.append((sysname,rmsd)) 

            except Exception as e:
                print("error: system %s failed becasue %s"%(sysname,e))

    # Once everything is done, sort and write RMSD results
    out = open(outpath,'w')
    out.write("Simulated_system\tRMSD_ligand\n")
    results_sorted = sorted(results, key=lambda tup: tup[0])
    for line in results_sorted:
        out.write("%s\t%f\n"%(line[0],line[1])) 
    out.close()

In [15]:
#### Actual Simulation submission
# mainurl = 'http://localhost:8000' 
mainurl = 'https://submission.gpcrmd.org'

## Step -2: Login into GPCRmd
with requests.Session() as s:
    login(s,'david','Ameboid')
# For each of the currently-working-with systems defined in Part 1
# pdb_set = ['6WGT']
for pdbcode in pdb_set:
    for apo in [False]:
        
        try:
            # Set the path to the folder containing this simulation's files
            sysname = pdbcode+"_apo" if apo else pdbcode+"_complex" 
            prodpath = resultspath+'production/'+pdbcode+'_apo/' if apo else resultspath+'production/'+pdbcode+'/' 
            repath = prodpath+'rep_1/'# For files no changing across representations
            
            # Skip if not all trajectories or already submitted or if already submitted
            incomplete = check_completeness(prodpath)
            submitted = os.path.exists(prodpath+'submitted.txt')
            if incomplete or submitted:
                pass
#                 continue            
            
            # Load molecule
            if not os.path.exists(repath+'structure.pdb'):
                continue
            mymol = Molecule(repath+'structure.pdb')
            print('\n###############Submitting '+sysname+' simulation ########################')            

            ## Step -1: Get information of protein chains and ligand molecules from PDB web
            (protdict,ligdict,segtochain,method_id) = get_pdb_info(pdbcode, mymol, ligandsdict)
            
            ## Step 0: Create a new submission
            subm_id = new_submission(s, mainurl)
#             subm_id = "1331"

            ## Step 1: General information
            new_step1(s, subm_id, pdbcode, trajperiod, timestep, repath, method_id, sysname, apo)
            
            ## Step 2: Small molecules 
            new_step2(s, subm_id, ligdict, apo)   

            ## Step 3: Protein chains
            new_step3(s, subm_id, pdbcode, protdict)

            ## Step 4: Dynamics information
            new_step4(s, subm_id, prodpath, repath)
        
    
        except Exception as e:
            print("Simulation "+sysname+" could not be submitted because ",e)            


loging into GPCRmd


2022-09-05 16:17:05,027 - moleculekit.readers - INFO - Attempting PDB query for 6WGT



###############Submitting 6WGT_complex simulation ########################
new submission 1336 created
1336 6WGT False
step1 finalized  <Response [200]>
initiating step 2: small molecule data
step2 finalized  <Response [200]>
initiating step 3: protein chains
P28223
step3 finalized  <Response [200]>
initiating step 4: simulation files
step4 finalized  <Response [200]>


2022-09-05 16:18:44,593 - moleculekit.readers - INFO - Attempting PDB query for 7WC7



###############Submitting 7WC7_complex simulation ########################
new submission 1337 created
1337 7WC7 False
step1 finalized  <Response [200]>
initiating step 2: small molecule data
step2 finalized  <Response [200]>
initiating step 3: protein chains
P28223
step3 finalized  <Response [200]>
initiating step 4: simulation files
step4 finalized  <Response [200]>


In [4]:
for name,dyn_id in dynids.items(): 
    workbench_reponse = requests.get('https://submission.gpcrmd.org/view/'+str(dyn_id))
    report_reponse = requests.get('https://submission.gpcrmd.org/dynadb/dynamics/id/'+str(dyn_id))                       
    if not (workbench_reponse.ok and report_reponse.ok):
        print('Name: ',name)
        print("Dyn_id: ",dyn_id)
        print("Workbench: ", workbench_reponse)
        print("Report: ", report_reponse)
        print("\n")



Name:  4Z9G
Dyn_id:  1031
Workbench:  <Response [500]>
Report:  <Response [500]>


