In [None]:
APPEND_JOB = False 
"""
INPUT PARAMETERS
"""
#OmegaFold
NAME = "test"
SEQUENCE = "" #IMPORTANT: Only fasta files unlike AlphaFold"
#Advanced
MODEL = 1 #Model 2 at the moment crashes due to lack of GPU memory

"""
CODE
"""
ONE_JOB = not APPEND_JOB
import os #WE USE ABSOLUTE PATHS
#CUDA
ENVIRONMENT = "ProteinEnv"
#General
MODELS_OUTPUT_FOLDER = "scratch" #Where to store output (subfolders will be created inside)
INSTALLATION_FOLDER = "data"
#GENERAL FOLDERS
NOTEBOOKS_FOLDER = os.getcwd()
PIPELINES_FOLDER = os.path.join(NOTEBOOKS_FOLDER,"Pipelines")
PDBs_FOLDER = os.path.join(NOTEBOOKS_FOLDER,"PDBs")
HELP_FOLDER = os.path.join(NOTEBOOKS_FOLDER,"HelpScripts")
USER_NAME = os.path.basename(os.path.dirname(NOTEBOOKS_FOLDER))
HOME_FOLDER = f"/home/{USER_NAME}"
DATA_FOLDER = f"/data/{USER_NAME}"
SCRATCH_FOLDER = f"/scratch/{USER_NAME}"
#MODIFIABLE FOLDERS
INSTALLATION_FOLDER = DATA_FOLDER
MODELS_OUTPUT_FOLDER = os.path.join(SCRATCH_FOLDER,"ProteinOutput")
if not os.path.exists(MODELS_OUTPUT_FOLDER):
    os.mkdir(MODELS_OUTPUT_FOLDER)
#MODELS
OMEGA_FOLDER = os.path.join(INSTALLATION_FOLDER,"OmegaFold")
OUT_FOLDER = os.path.join(MODELS_OUTPUT_FOLDER,"OmegaFold")
if not os.path.exists(OUT_FOLDER):
    os.mkdir(OUT_FOLDER)

#This will be used throughout to generate file and directory names to avoid overriding old outputs
def unique_name(directory,root,ext = "",fullpath=0,w=3):   
    i = 1     
    u_name = root + "_" + "{:0>{width}}".format(i, width=w)
    while os.path.exists(os.path.join(directory,u_name+ext)):
        i += 1
        u_name = root + "_" + "{:0>{width}}".format(i, width=w)
    if fullpath: return os.path.join(directory, u_name + ext)
    return u_name + ext

"""
SET JOB NAME AND CREATE OUTPUT DIRECTORIES
Data are stored in the "of_output" folder inside the data folder
Also, a folder containg all the results is created inside that folder
"""
JOB = unique_name(OUT_FOLDER,NAME)
JOB_FOLDER = os.path.join(OUT_FOLDER,JOB)
os.mkdir(JOB_FOLDER)
BASH_FOLDER = os.path.join(JOB_FOLDER,"bash_files")
if not os.path.exists(BASH_FOLDER):
    os.mkdir(BASH_FOLDER)
#Output from console will also be redirected to log files
of_log_file = os.path.join(JOB_FOLDER,"of.log")
of_pse_log_file = os.path.join(JOB_FOLDER,"of_pse.log")

"""OMEGAFOLD"""
if SEQUENCE.endswith(".fasta"):
    queries_fasta_file = os.path.join(os.getcwd(),SEQUENCE)
else:
    queries_fasta_file = os.path.join(JOB_FOLDER,"of_queries.fasta") #This will contain the queries
    with open(queries_fasta_file,"w") as queries_fasta:
        queries_fasta.write(f">{NAME}\n{SEQUENCE}")

of_options = ""
of_options += f"--model {MODEL}"
""" OPTIONS NOT IMPLEMENTED
--num_cycle NUM_CYCLE
--device DEVICE
--subbatch_size {SUBBATCH_SIZE}
--weights WEIGHTS
--pseudo_msa_mask_rate PSEUDO_MSA_MASK_RATE
--num_pseudo_msa NUM_PSEUDO_MSA
--allow_tf32 ALLOW_TF32"""
weights_pt = ["","release1.pt","release2.pt"][MODEL]
weights_pt_file = os.path.join(OMEGA_FOLDER,weights_pt)
of_options += f" --weights_file {weights_pt_file}"
of_sh_file = unique_name(BASH_FOLDER,"of",".sh",1)
of_cmd = f"""
echo Initializing model...
omegafold {queries_fasta_file} {JOB_FOLDER} {of_options}
"""
with open(of_sh_file,'w') as of_sh:
    of_sh.write(of_cmd)
os.chmod(of_sh_file, 0o755) #Give execution rights

"""PSE CREATION"""
of_pse_py_file = os.path.join(HELP_FOLDER,"make_pse_of.py")
of_pse_file = os.path.join(JOB_FOLDER,NAME+".pse")
of_pse_cmd = f"python {of_pse_py_file} {JOB_FOLDER} {of_pse_file}"
of_pse_sh_file = unique_name(BASH_FOLDER,"pmpnn_of_rank",".sh",1)
with open(of_pse_sh_file,'w') as pse_sh:
    pse_sh.write(of_pse_cmd)
os.chmod(of_pse_sh_file, 0o755) #Give execution rights

"""COMBINE ALL IN A UNIQUE PIPELINE"""
pipeline_sh_file = unique_name(BASH_FOLDER,"pipeline",".sh",1)
pipeline_cmd = f"""
echo Pipeline: {os.path.basename(pipeline_sh_file)}
echo
echo Name: {NAME}
echo SEQUENCE: {SEQUENCE}

source activate {ENVIRONMENT}
echo
echo OmegaFold
{of_sh_file} | tee {of_log_file}
echo
echo PyMol
{of_pse_sh_file} | tee {of_pse_log_file}
echo 
echo Job done
echo {JOB_FOLDER}
"""
with open(pipeline_sh_file,'w') as pipeline_sh:
    pipeline_sh.write(pipeline_cmd)
os.chmod(pipeline_sh_file, 0o755) #Give execution rights

batch_sh_file = os.path.join(PIPELINES_FOLDER,"of_batch.sh")
if not ONE_JOB and os.path.exists(batch_sh_file):
    with open(batch_sh_file,"r") as of_batch_sh:
        previous_pipelines = of_batch_sh.readlines()
    with open(batch_sh_file,"w") as of_batch_sh:
        of_batch_sh.writelines(previous_pipelines)
        of_batch_sh.write("\n")
        of_batch_sh.write(pipeline_sh_file)  
else:      
    with open(batch_sh_file,"w") as of_batch_sh:
        of_batch_sh.write(pipeline_sh_file)  
os.chmod(batch_sh_file, 0o755) #Give execution rights

print("Run using next cell")
print(f"Single job:\n{pipeline_sh_file}")
print(f"Batch:\n{batch_sh_file}")
print(f"\nOutput of this pipeline will be in:\n{JOB_FOLDER}")

In [None]:
%%bash
/home/$USER/ProteinNotebooks/Pipelines/of_batch.sh

In [None]:
"""
MAKE SLURM FILE
"""
PACKAGE_MANAGER = "mamba"
GPU = "gpu" #options: "gpu" (first available), "t4", "v100", "v100-32g", "a100"

slurm_file = unique_name(PIPELINES_FOLDER,"of_slurm",".sh",1)
with open(batch_sh_file,"r") as of_batch_sh:
    all_pipelines = of_batch_sh.readlines()
with open(slurm_file,"w") as slurm_bash:
    slurm_bash.write(f"module load {PACKAGE_MANAGER}\n")
    slurm_bash.write(f"module load {GPU}\n")
    slurm_bash.writelines(all_pipelines)
os.chmod(slurm_file, 0o755)

print(f"""
ScienceApps > Jobs > JobComposer > New Job > From Default Template    
Edit Job name from Job Options.
Replace job.sh (Open in Editor) with the following (adapt required time hh:mm:ss) then Save:
      
#!/usr/bin/bash
#SBATCH --gpus=1
#SBATCH --mem=7800
#SBATCH --time=23:59:00
#SBATCH --output=job.out      
{slurm_file}
""")

In [None]:
"""
Conventions for names
Paths to files end with _type_file
Paths to folders end with _FOLDER
Opened files .type end with _type
Content of .sh end with _cmd
Content of .py end with _script
"""

"""
Cleanup instructions
Delete data/bash_files
Delete outputs folder: contains log files
"""

**Instructions**
---
---

Use `contigs` to define continious chains. Use a `:` to define multiple contigs and a `/` to define mutliple segments within a contig.
For example:

**unconditional**
- `contigs='100'` - diffuse **monomer** of length 100
- `contigs='50:100'` - diffuse **hetero-oligomer** of lengths 50 and 100
- `contigs='50'` `symmetry='cyclic'` `order=2` - make two copies of the defined contig(s) and add a symmetry constraint, for **homo-oligomeric** diffusion.

**binder design**
- `contigs='A:50'` `pdb='4N5T'` - diffuse a **binder** of length 50 to chain A of defined PDB.
- `contigs='E6-155:70-100'` `pdb='5KQV'` `hotspot='E64,E88,E96'` - diffuse a **binder** of length 70 to 100 (sampled randomly) to chain E and defined hotspot(s).

**motif scaffolding**
 - `contigs='40/A163-181/40'` `pdb='5TPN'`
 - `contigs='A3-30/36/A33-68'` `pdb='6MRR'` - diffuse a loop of length 36 between two segments of defined PDB ranges.

**partial diffusion**
- `contigs=''` `pdb='6MRR'` - noise all coordinates
- `contigs='A1-10'` `pdb='6MRR'` - keep first 10 positions fixed, noise the rest
- `contigs='A'` `pdb='1SSC'` - fix chain A, noise the rest

*hints and tips*
- `pdb=''` leave blank to get an upload prompt
- `contigs='50-100'` use dash to specify a range of lengths to sample from