In [4]:
import os
import sys
import glob
import multiprocessing as mp
from functools import partial
sys.path.append('../')

from tools.usalign import run_usalign, usalign_parser
USALIGN_BINARY = '/home/nfs/rmadaj/bins/usalign/USalign'

# Run usalign to obtain dataset

In [5]:
targets = glob.glob('files/file*.pdb') # target pdb files
ref = 'files/ref.pdb' # reference pdb file

run = run_usalign.RunUSAlign(usalign_binary=USALIGN_BINARY) # initialize the RunUSAlign object

for target in targets:
    run.run_usalign(
        reference_pdb=ref,
        target_pdb=target,
        output_filename='usal_output.txt', # keep one to have all results in one file
        mm=1, # default for multimers
        ter=0,
        outfmt=2, # easily parsable output
        pymol=False, # no pymol session file
        matrix=False, # no rotation matrix file
        pdb_rank=None, # no rank for AF2
    )


# Analyze results

In [7]:
usal_parser = usalign_parser.USalign_parser('usal_output.txt') # initialize the parser object and load the output file
usal_parser.add_column('test_col','test') # add a column to the DataFrame
df = usal_parser.read_usalign_output() # process the output file
df

Unnamed: 0,target,template,tm1,tm2,rmsd,id1,id2,idali,docked_seqlength,template_seqlength,aligned_length,test_col,target_path
0,file2,"files/ref.pdb:1,A:1,B",0.5111,0.5111,6.6,0.514,0.514,0.675,500,500,381,test,files/file2.pdb
1,file1,"files/ref.pdb:1,B:1,A",0.4456,0.4456,5.97,0.45,0.45,0.714,500,500,315,test,files/file1.pdb
2,file2,"files/ref.pdb:1,A:1,B",0.5111,0.5111,6.6,0.514,0.514,0.675,500,500,381,test,files/file2.pdb
3,file1,"files/ref.pdb:1,B:1,A",0.4456,0.4456,5.97,0.45,0.45,0.714,500,500,315,test,files/file1.pdb


# Run multiple usalign runs to save time

In [6]:
def worker(ref, sample):
    instance = run_usalign.RunUSAlign(USALIGN_BINARY)
    instance.run_usalign(
        reference_pdb=ref,
        target_pdb=sample,
        output_filename='usal_output.txt', # keep one to have all results in one file
        mm=1, # default for multimers
        ter=0,
        outfmt=2, # easily parsable output
        pymol=False, # no pymol session file
        matrix=False, # no rotation matrix file
        pdb_rank=None, # no rank for AF2
    )

args_list = list(zip([ref] * len(targets), targets))
pool = mp.Pool(processes=2)
args_list = [(ref, target) for target in targets]
pool.starmap(worker, args_list)

pool.close()
pool.join()


Processing files/ref.pdb with files/file1.pdb...Processing files/ref.pdb with files/file2.pdb...



/home/nfs/rmadaj/bins/usalign/USalign files/file2.pdb files/ref.pdb -mm 1 -ter 0 -outfmt 2 -o file2.pdb/usalign_None.dat >> usal_output.txt
Analysis ran successfully
/home/nfs/rmadaj/bins/usalign/USalign files/file1.pdb files/ref.pdb -mm 1 -ter 0 -outfmt 2 -o file1.pdb/usalign_None.dat >> usal_output.txt
Analysis ran successfully
