<font size="+3">HPC-Setup : BindCraft Designs in Funnel Metadynamics  </font>


In [1]:
import os
import glob
import shutil
import csv
from Bio.PDB import PDBParser, PPBuilder, MMCIFParser, PDBIO 
import pandas as pd

# Setup

In [7]:
topdir = '/mnt/DATA8To/rita/251227_duo_design_CXCR4/output-CXCR4/AF3_Prediction/output'
batches = ['human', 'mouse']

for batch in batches:
    files = glob.glob(f'{topdir}/{batch}/selected/*.cif')
    for file in files:
        basename = os.path.basename(file).split('.')[0]
        os.makedirs(f'{batch}/{basename}', exist_ok=True)
        shutil.copy(file, f'{batch}/{basename}/{basename}_{batch[0]}.cif')
        
        # save as pdb
        parser = MMCIFParser(QUIET=True)
        structure = parser.get_structure("CXCR4", file)

        io = PDBIO()
        io.set_structure(structure)
        io.save(f'{batch}/{basename}/complex.pdb')

In [13]:
def seq_chain_B(pdb_path):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure("s", pdb_path)
    model = next(structure.get_models())
    chain = model['B'] if 'B' in model else None
    if chain is None:
        return ""
    ppb = PPBuilder()
    seqs = ["".join(pp.get_sequence()) for pp in ppb.build_peptides(chain)]
    return "".join(seqs)

batches = ['human', 'mouse']

for batch in batches:
    systems = [d for d in os.listdir(batch) if os.path.isdir(os.path.join(batch,d))]
    systems.sort()
    
    out_csv = os.path.join(batch, f'CXCR4-{batch}.csv')
    with open(out_csv, 'w', newline='') as f:
        w = csv.writer(f)
        w.writerow(['Design', 'Sequence'])
    
        for system in systems:
            file = f'{batch}/{system}/complex.pdb'
            seq = seq_chain_B(file)
            w.writerow([system, seq])


In [None]:
python3 /mnt/DATA20To/rita/BindCraft-Funnel-Design-Filter/Design_Filter_GPCR.py CXCR4-human.csv 
python3 /mnt/DATA20To/rita/BindCraft-Funnel-Design-Filter/Design_Filter_GPCR.py CXCR4-mouse.csv 

# Submission scripts

In [18]:
fo = open("job.sh", 'w')
fo.write("""#!/bin/bash
#SBATCH --job-name=MD-BindCraft
#SBATCH --nodes=1
################
#SBATCH --ntasks-per-node=1         # (number of mpi tasks) / 4 MPI par replica  
#SBATCH --cpus-per-task=8          # (Number of OpenMP threads per MPI task) / nombre de coeurs à réserver 
#SBATCH --gres=gpu:1                # Number of GPUs per node à réserver
#SBATCH --hint=nomultithread               
#SBATCH -C MI250                
#SBATCH -A c1615115
##SBATCH --exclusive # temporary until module error is fixed
######
#SBATCH --time=24:00:00
#SBATCH -o job.out
#SBATCH -e job.err
######
module purge
module load archive CCE-GPU-3.1.0
module load gromacs/2023_amd-omp-plumed-rocm-mpi-gpu

module list

nodes="${SLURM_JOB_NUM_NODES}"
threads="${SLURM_CPUS_PER_TASK}"
ranks="${SLURM_NTASKS}"

# OpenMP thread binding
export OMP_NUM_THREADS="$threads"
export OMP_PLACES=cores
export OMP_PROC_BIND=close

# NIC POLICY
export MPICH_OFI_NIC_POLICY=GPU

# Stack limits
ulimit -c unlimited
ulimit -s unlimited      


# gromacs settings
export GMX_DISABLE_GPU_TIMING=1
export GMX_GPU_DD_COMMS=1
export GMX_GPU_PME_PP_COMMS=1
export GMX_NO_QUOTES=1
export GMX_ENABLE_DIRECT_GPU_COMM=1

#-----------------

pwd=$(pwd)
parent_dir=$(dirname "$pwd")
grandparent_dir=$(dirname "$parent_dir")

export SUBMIT="job.sh"
START=$(date +"%s")

if [ -e production.tpr ];then
    env OMP_NUM_THREADS=$threads srun -K1 --mpi=cray_shasta -N "$nodes" -n "$ranks" -c "$threads" -m block:block --cpu-bind cores \\
    gmx_mpi mdrun -deffnm production -cpi production.cpt -ntomp $threads -plumed plumed.dat -maxh 23.9
else
    # nvt
    srun -K1 --mpi=cray_shasta -N 1 -n 1 -c "$threads" -m block:block --cpu-bind cores \\
    gmx_mpi grompp -f ${grandparent_dir}/nvt.mdp -o nvt.tpr -c ${parent_dir}/minimization.gro -r ${parent_dir}/minimization.gro -p ${parent_dir}/topol.top -n ${parent_dir}/index.ndx -maxwarn 1

    env OMP_NUM_THREADS=$threads srun -K1 --mpi=cray_shasta -N "$nodes" -n "$ranks" -c "$threads" -m block:block --cpu-bind cores \\
    gmx_mpi mdrun -deffnm nvt -ntomp $threads

    # npt
    srun -K1 --mpi=cray_shasta -N 1 -n 1 -c "$threads" -m block:block --cpu-bind cores \\
    gmx_mpi grompp -f ${grandparent_dir}/npt.mdp -o npt.tpr -c nvt.gro -r nvt.gro -p ${parent_dir}/topol.top -n ${parent_dir}/index.ndx -maxwarn 2

    env OMP_NUM_THREADS=$threads srun -K1 --mpi=cray_shasta -N "$nodes" -n "$ranks" -c "$threads" -m block:block --cpu-bind cores \\
    gmx_mpi mdrun -deffnm npt -ntomp $threads

    # production
    srun -K1 --mpi=cray_shasta -N 1 -n 1 -c "$threads" -m block:block --cpu-bind cores \\
    gmx_mpi grompp -f ${grandparent_dir}/production.mdp -o production.tpr -c npt.gro -p ${parent_dir}/topol.top -n ${parent_dir}/index.ndx -maxwarn 2

    env OMP_NUM_THREADS=$threads srun -K1 --mpi=cray_shasta -N "$nodes" -n "$ranks" -c "$threads" -m block:block --cpu-bind cores \\
    gmx_mpi mdrun -deffnm production -cpi production.cpt -ntomp $threads -plumed plumed.dat -maxh 20
fi


END=$(date +"%s")
echo "$(((END-START))) seconds ran"
echo "$(((END-START)/3600)) full hours ran"

num1=$(bc <<< "scale=4;($END - $START) / 3600")
num2=23

if (( $(echo "$num1 < $num2" |bc -l) )); then
  echo "last cycle was just $(((END-START)/60))min long and therefore finito"
  exit 3
else
  echo "cycle resubmitting"
  sbatch ${SUBMIT}
  exit 2
fi

""")
fo.close()

In [15]:
fo = open("process.sh", 'w')
fo.write("""#!/bin/bash
#SBATCH --job-name=MD-BindCraft
#SBATCH --nodes=1
################
#SBATCH --ntasks-per-node=1         # (number of mpi tasks) / 4 MPI par replica  
#SBATCH --cpus-per-task=8          # (Number of OpenMP threads per MPI task) / nombre de coeurs à réserver 
#SBATCH --gres=gpu:1                # Number of GPUs per node à réserver
#SBATCH --hint=nomultithread               
#SBATCH -C MI250                
#SBATCH -A c1615115
#SBATCH --exclusive # temporary until module error is fixed
######
#SBATCH --time=00:10:00
#SBATCH -o job.out
#SBATCH -e job.err
######
module purge
module load archive CCE-GPU-3.1.0
module load gromacs/2023_amd-omp-plumed-rocm-mpi-gpu


module list

nodes="${SLURM_JOB_NUM_NODES}"
threads="${SLURM_CPUS_PER_TASK}"
ranks="${SLURM_NTASKS}"

# OpenMP thread binding
export OMP_NUM_THREADS="$threads"
export OMP_PLACES=cores
export OMP_PROC_BIND=close

# NIC POLICY
export MPICH_OFI_NIC_POLICY=GPU

# Stack limits
ulimit -c unlimited
ulimit -s unlimited      


# gromacs settings
export GMX_DISABLE_GPU_TIMING=1
export GMX_GPU_DD_COMMS=1
export GMX_GPU_PME_PP_COMMS=1
export GMX_NO_QUOTES=1
export GMX_ENABLE_DIRECT_GPU_COMM=1

#-----------------

pwd=$(pwd)
parent_dir=$(dirname "$pwd")
grandparent_dir=$(dirname $(dirname "$pwd"))

echo q | srun -K1 --mpi=cray_shasta -N 1 -n 1 -c "$threads" -m block:block --cpu-bind cores \\
gmx_mpi make_ndx -f npt.gro

echo System | srun -K1 --mpi=cray_shasta -N 1 -n 1 -c "$threads" -m block:block --cpu-bind cores \\
gmx_mpi trjconv -f production.xtc -s npt.gro -n index.ndx -pbc nojump -o tmp.xtc -dt 1000

echo Protein Protein | srun -K1 --mpi=cray_shasta -N 1 -n 1 -c "$threads" -m block:block --cpu-bind cores \\
gmx_mpi trjconv -f tmp.xtc -s production.tpr -n index.ndx -fit rot+trans -o reimage.xtc

echo Protein | srun -K1 --mpi=cray_shasta -N 1 -n 1 -c "$threads" -m block:block --cpu-bind cores \\
gmx_mpi editconf -f npt.gro -n index.ndx -o reimage.gro 

rm \#*\#

""")
fo.close()

In [16]:
fo = open(f"submit.sh", 'w')
fo.write("""#!/bin/bash

base=$(pwd)
declare -a systems=( human mouse )

for system in "${systems[@]}";do
    cd "$base/$system" || continue
    for binder in */; do
        binder=${binder%/}
        for j in {0..2}; do
            cd "$base/$system/$binder/$j" || continue
            cp "$base/job.sh" .
            pwd
            sbatch job.sh
        done
    done
done

""")
fo.close()