In [92]:
import random

import pandas as pd
import time
import random
import subprocess
import io
import numpy as np

In [16]:
source = pd.read_csv("source.csv", index_col=0)
target = pd.read_csv("target.csv", index_col=0)

In [115]:
import pymol.cmd as cmd

def run_aln_cmd(command, row_source, row_target, fast=False):
    command.append(row_source["pdb_id"])
    command.append(row_target["pdb_id"])
    command.append("-outfmt")
    command.append("2")

    if fast:
        command.append("-fast")

    p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output, err  = p.communicate()
    p.terminate()
    output = pd.read_csv(io.BytesIO(output), sep="\t").iloc[0]

    return output["RMSD"], output["Lali"]

def align_tm(row_source, row_target):
    command = ["./structure_alignment_algorithms/TMAlign.exe"]
    return run_aln_cmd(command, row_source, row_target)


def align_tm_fast(row_source, row_target):
    command = ["./structure_alignment_algorithms/TMAlign.exe"]
    return run_aln_cmd(command, row_source, row_target, True)


def align_us(row_source, row_target):
    command = ["./structure_alignment_algorithms/USAlign.exe"]
    return run_aln_cmd(command, row_source, row_target)

def align_us_fast(row_source, row_target):
    command = ["./structure_alignment_algorithms/USAlign.exe"]
    return run_aln_cmd(command, row_source, row_target, True)

def align_pymol(row_source, row_target):
    cmd.reinitialize()
    cmd.load(row_source["pdb_id"], "prot1")
    cmd.load(row_target["pdb_id"], "prot2")
    ce_dict = cmd.cealign("prot1", "prot2")
    cmd.delete("prot1")
    cmd.delete("prot2")
    return ce_dict["RMSD"], ce_dict["alignment_length"]

def time_it(fn):
    start = time.time()
    ret = fn()
    end = time.time()
    return ret, end-start

In [117]:
num = 100
samples = [(random.choice(range(len(source.index))), random.choice(range(len(target.index)))) for _ in range(num)]

rets = {}
times = {}
for fn in [align_pymol, align_tm, align_tm_fast, align_us, align_us_fast]:
    ts = times.setdefault(fn, [])
    returns = rets.setdefault(fn, [])

    for i1, i2 in samples:
        r1 = source.iloc[i1]
        r2 = source.iloc[i2]
        ret, t = time_it(lambda: fn(r1, r2))
        returns.append(ret)
        ts.append(t)

In [118]:
for key, values in times.items():
    print(key.__name__, np.asarray(values).mean())

align_pymol 0.03743949174880981
align_tm 0.04302718877792358
align_tm_fast 0.022360396385192872
align_us 0.04332980394363403
align_us_fast 0.023163235187530516


In [119]:
df_rets = []
for key, values in rets.items():
    for (a,b),( rmsd, length) in zip(samples, values):
        df_rets.append((key.__name__,a,b, rmsd,length))

df_rets = pd.DataFrame(df_rets)
df_rets.set_index(keys=[1,2]).sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,0,3,4
1,2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,53,align_pymol,10.135159,64.0
1,53,align_tm,5.620000,76.0
1,53,align_tm_fast,5.660000,78.0
1,53,align_us,5.620000,76.0
1,53,align_us_fast,5.660000,78.0
...,...,...,...,...
99,40,align_pymol,9.243867,56.0
99,40,align_tm,5.060000,78.0
99,40,align_tm_fast,6.390000,79.0
99,40,align_us,5.060000,78.0
