In [1]:
import os
import subprocess


## Run binder diffusion to hotspots!
we used to thread diffused binders with their sequence and perform FastRelax before proteinMPNN, but we don't get more candidates

In [40]:
prefix="TEVp-test"
scaffolds="/home/tsatler/RFdif/ClusterProteinDesign/scaffolds/rfdiff_scaffolds"
target="input/1lvb-clean.pdb"
hotspots='ppi.hotspot_res=[A172,A206,A210,A217]'

num_of_diffusions=1 # Number of RF diffusions per job
num_seqs=10 # How many MPNN sequences to generate per RF diffusion
num_filtered_seqs=10 # How many of the MPNN generated sequences per RF diffusion to keep for AF2
diff_steps = 50

# Af2 Mpnn parameters
num_recycles=3 # AF2 recycles
sampling_temp=0.0001 # ProteinMPNN sampling temperature

# Slurm parameters
num_jobs=2 # Number of jobs to submit
array_limit=3

print(f"This will diffuse {num_of_diffusions*num_jobs} binders and design {num_of_diffusions*num_jobs*num_seqs} sequences in total")

This will diffuse 2 binders and design 20 sequences in total


## Runs slurm array script

In [41]:
print(prefix)

TEVp-test


In [42]:

target_dir = os.path.dirname(target)
target_base = os.path.basename(os.path.splitext(target)[0])
ss = os.path.join(target_dir, f"{target_base}_ss.pt")
adj = os.path.join(target_dir, f"{target_base}_adj.pt")

output_folder = f"output/{prefix}"
os.makedirs(output_folder, exist_ok=True)

rf_out_folder = f"{output_folder}/rf_dock/{prefix}"


script = f"""#!/bin/bash
#SBATCH --partition=gpu
#SBATCH --gres=gpu:A40:1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=2
#SBATCH --job-name={prefix}_binder_docking
#SBATCH --array=0-{num_jobs-1}%{array_limit}

set -e
# Load Anaconda environment
source /home/tsatler/anaconda3/etc/profile.d/conda.sh


########################
# RF diffusion
########################

# Generate ss, adj, and out_dir variables based on target_pdb
# Check if ss and adj files exist
if [ ! -f "{ss}" ] || [ ! -f "{adj}" ]; then
  echo "One or both of the SS and ADJ files do not exist. Running Python script to generate them..."
  
  conda activate pyro
  # Run Python script to generate ss and adj files
  python helper_scripts/make_secstruc_adj.py --input_pdb "{target}" --out_dir "{target_dir}"
else
  echo "SS and ADJ files already exist."
fi


echo Running RFdiffusion for docking scaffolds to the target...
conda activate SE3nv

rf_out={rf_out_folder}_$SLURM_ARRAY_TASK_ID/{prefix}_$SLURM_ARRAY_TASK_ID
echo rf_out: $rf_out

python /home/tsatler/RFdif/RFdiffusion/scripts/run_inference.py \
diffuser.T={diff_steps} \
scaffoldguided.target_path={target} \
inference.output_prefix=$rf_out \
scaffoldguided.scaffoldguided=True \
{hotspots} \
scaffoldguided.target_pdb=True \
scaffoldguided.target_ss={ss} \
scaffoldguided.target_adj={adj} \
scaffoldguided.scaffold_dir={scaffolds} \
'potentials.guiding_potentials=["type:binder_ROG,weight:3","type:interface_ncontacts","type:binder_distance_ReLU"]' \
potentials.guide_scale=2 \
potentials.guide_decay="quadratic" \
inference.num_designs={num_of_diffusions} \
denoiser.noise_scale_ca=0 \
denoiser.noise_scale_frame=0

# Remove some files
if [ -e "$rf_out*.trb" ]; then
    rm "$rf_out*.trb" # delete trb files
fi
if [ -d "{rf_out_folder}_$SLURM_ARRAY_TASK_ID/traj" ]; then
    rm -r "{rf_out_folder}_$SLURM_ARRAY_TASK_ID/traj" # delete trajectories
fi


########################
# ProteinMPNN and AF2
########################
echo "Running AFDesign with MPNN sampling"

conda activate colabthread

input_files=($rf_out*.pdb)
script="helper_scripts/colabinder.py"

for ((i=0; i<${{#input_files[@]}}; i++)); do
  pdb_file=${{input_files[$i]}}
  echo $pdb_file - ProteinMPNN and AF2
  af_out={output_folder}/mpnn_af2/{prefix}_$SLURM_ARRAY_TASK_ID
  python $script $pdb_file $af_out B A --sampling_temp {sampling_temp} \
  --num_recycles {num_recycles} --num_seqs {num_seqs} --num_filt_seq {num_filtered_seqs} \
  --results_dataframe {output_folder} --save_best_only
done
"""

# Write the script to a file
script_file = f"{output_folder}/run_docking.sh"
with open(script_file, "w") as f:
    f.write(script)

In [43]:
# Submit the script
subprocess.run(["sbatch", script_file])

Submitted batch job 463345


CompletedProcess(args=['sbatch', 'output/TEVp-test/run_docking.sh'], returncode=0)

In [45]:
!squeue --me

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
