In [1]:
import glob
import re
import os
import shutil
import numpy as np
import reciprocalspaceship as rs
from multiprocessing import Pool
from itertools import repeat
import subprocess
import valdo



In [8]:
bGPU, ncpu=valdo.helper.configure_session()
tmp=!echo $SLURM_CPUS_PER_TASK
ncpu = np.amin([int(tmp[0]),ncpu])

There are 64 CPUs available.
For multiprocessing, we will use 63 CPUs.


In [9]:
def start_apo_refinement(mtz_file, additional_args):
    apo_file       =additional_args[0]+" "
    refine_eff     =additional_args[1]+" "
    change_of_basis=additional_args[2]+" "

    file_prefix    = os.path.basename(mtz_file)[0:4]
    path_prefix    =additional_args[3]
    verbose        =additional_args[4]

    output_prefix_flag =  'output.prefix=' +path_prefix + "refine_" + file_prefix + "/refine_" + file_prefix + " " 
    refine_script      =  path_prefix + "refine_" + file_prefix + '/refine_script_'+ file_prefix + '.sh'
    if not os.path.exists(path_prefix + "refine_" + file_prefix):
        os.makedirs(      path_prefix + "refine_" + file_prefix)
        
    change_of_basis_flag="modify_start_model.modify.change_of_basis="+change_of_basis+" "
    shebang             ='#!/bin/bash'
    source_file         ='/n/hekstra_lab_tier0/Lab/garden/phenix/phenix-1.20.1-4487/phenix_env.sh'

    refine_args = change_of_basis_flag + output_prefix_flag + apo_file + mtz_file + " " + refine_eff
    lines = [shebang, 
             'source ' + source_file, 
             'phenix.refine ' + refine_args]

    with open(refine_script, 'w') as f:
        for line in lines:
            f.write(line)
            f.write('\n')
            
    os.chmod(refine_script, 0o755)
    if ~verbose:
        result=subprocess.call(refine_script,shell=False,stdout=subprocess.DEVNULL,stderr=subprocess.STDOUT)
    else:
        result=subprocess.call(refine_script,shell=False)

    return result

In [10]:
def start_dimple_refinement(mtz_file, additional_args):
    ccp4_source_file='/n/hekstra_lab_tier0/Lab/garden/ccp4/ccp4-7.1/bin/ccp4.setup-sh'
    apo_file        =additional_args[0]
    # refine_eff     =additional_args[1]+" "
    # change_of_basis=additional_args[2]+" "

    file_prefix     = os.path.basename(mtz_file)[0:4]
    path_prefix     =additional_args[3]
    verbose         =additional_args[4]

    output_folder   =     path_prefix + "dimple_" + file_prefix + "/" 
    # rbr_script      = 
    dimple_script   =     output_folder + 'dimple_script_'+ file_prefix + '.sh'
    if not os.path.exists(output_folder):
        os.makedirs(      output_folder)
    unique_mtz      = os.path.basename(mtz_file)[:-4] + '-unique.mtz'    
    
    # change_of_basis_flag="modify_start_model.modify.change_of_basis="+change_of_basis+" "
    shebang            ='#!/bin/bash'

    # -M xx sets threshold for Molecular Replacement. We won't do so.
    lines = [shebang, 
             'source ' + ccp4_source_file,
             'cd ' + output_folder,
             'uniqueify -s ' + mtz_file + " " + unique_mtz,
             '../rbr_refmac.sh ' + unique_mtz + " " + '../1sug.pdb ' + '\"F-obs\"',
             'dimple ' + mtz_file + " rbr_only1.pdb " + output_folder + ' --fcolum=\"F-obs\" -M 0.9 --no-blob-search']

    with open(dimple_script, 'w') as f:
        for line in lines:
            f.write(line)
            f.write('\n')
            
    os.chmod(dimple_script, 0o755)
    if ~verbose:
        result=subprocess.call(dimple_script,shell=False,stdout=subprocess.DEVNULL,stderr=subprocess.STDOUT)
    else:
        result=subprocess.call(dimple_script,shell=False)

    return result

In [11]:
def refine_mtzs(file_list, apo_file, refine_eff, path_prefix, bases=['a,b,c'], verbose=True, ncpu=1):
    """
    to do
    """

    for basis in bases:
        additional_args=[apo_file, refine_eff, basis, path_prefix, verbose]
        # print(repeat(additional_args))
        with Pool(ncpu) as pool:
            # result = pool.starmap(start_apo_refinement, zip(file_list, repeat(additional_args)))
            result = pool.starmap(start_dimple_refinement, zip(file_list, repeat(additional_args)))


In [12]:
apo_file  ="/n/holyscratch01/hekstra_lab/dhekstra/valdo-tests/refine/1sug.pdb"
refine_eff="/n/holyscratch01/hekstra_lab/dhekstra/valdo-tests/refine/refine_drug.eff"

In [13]:
phyllis_dir="/n/holyscratch01/hekstra_lab/phyllis/"
my_dir     ="/n/holyscratch01/hekstra_lab/dhekstra/valdo-tests/"
original_data_path = phyllis_dir + 'pipeline/data/original_data/'

basepath = my_dir + 'pipeline/'
data_path      = basepath + 'data/'
input_mtz_path = basepath + 'data/input_mtzs/'

mtzs_to_ignore=['0003.mtz']

In [16]:
# List of MTZ files to refine against
file_list = glob.glob(input_mtz_path + "01*.mtz")
file_list.sort()

# print(file_list)
for mtz in mtzs_to_ignore:
    try:
        file_list.remove(input_mtz_path+mtz)
    except Exception as e:
        print(e)
print("Working with " + str(len(file_list)) + " MTZ files.")

list.remove(x): x not in list
Working with 92 MTZ files.


In [17]:
%%time
tmp=refine_mtzs(file_list, apo_file, refine_eff, path_prefix=my_dir+"refine/", bases=['a,b,c'], verbose=True, ncpu=ncpu)

CPU times: user 812 ms, sys: 559 ms, total: 1.37 s
Wall time: 2h 10min 45s


In [30]:
print(tmp)

None
