# Vectron Benchmark
This notebook will go over all of vectron's tests presented in the article. Note that some runtimes might differ from the ones presented in the article due to using a virtual container and the difference between the hardware configuratuons of the systems the simulation is run on. However, if the speedups are compared, the same results will be achieved.

Also note that the input sequences for codon, vectron, C++ and cuda (seqx.txt and seqy.txt) all have the same identical sequences, but due to the fact that C++ and cuda handle pairs of sequences better, we prepared the same sequences fed to codon and vectron ready to be paired and fed them to C++ and cuda. This preparation takes place in the build process of the vectron Dockerfile by running seq_modifier.python.

In [None]:
import subprocess
import os
from tabulate import tabulate
os.system("codon build /vectron/experiments_docker/source/vectron.codon")

In [None]:
def compile(mode, src, vectron_path = ''):
    if mode == 'vectron':
        result = subprocess.run([vectron_path, '/codon-seq', '/vectron', f'{src}'], capture_output=True, text=True)
    elif mode == 'codon':
        result = subprocess.run(['codon', 'build', '-plugin', '/codon-seq', f'{src}', '-release'], capture_output=True, text=True)
    elif mode == 'cpp':             
        result = subprocess.run([
            'clang++', '-O3', '-msse4.2', '-funroll-loops', '-mfpmath=sse', '-march=native',
            f'{src}', '-o', f'{os.path.splitext(src)[0]}'
        ], capture_output=True, text=True)
        #print(result)     
    elif mode == 'cuda':
        result = subprocess.run([
            'nvcc', '-o', f'{os.path.splitext(src)[0]}', f'{src}'
        ], capture_output=True, text=True)        

def exec(mode, src, ds_type):
    seq_x = ''
    seq_y = ''
    if mode == 'cpp' or mode == 'cuda':
        ds_type = mode + "_" + ds_type.split("_", 1)[1]
    seq_x = f'/vectron/experiments_docker/data/{ds_type}/seqx.txt'
    seq_y = f'/vectron/experiments_docker/data/{ds_type}/seqy.txt'       
    result = subprocess.run(f'{src} {seq_y} {seq_x} >{mode}_out.txt', capture_output=True, text=True, shell=True)
    if mode == 'vectron' or mode == 'codon':
        return result.stderr
    else:
        with open(f'{mode}_out.txt', 'r') as file:
            lines = file.readlines()
            return(lines[-1].strip())
        
def batch_exec(mode, ds_type):
    algorithms = [
        ("Levenshtein Distance", "levenshtein_distance"),
        ("Longest Common Subsequence", "lcs"),
        ("Hamming Distance", "hamming_distance"),
        ("Manhattan Tourist", "manhattan_tourist"),
        ("Minimum Cost Path", "min_cost_path"),
        ("Needleman Wunsch", "needleman_wunsch"),
        ("Smith Waterman", "smith_waterman"),
    ]
    col1_width = 40
    col2_width = 40
    
    results = []
    if mode == 'vectron':
        source_p = mode + '_' + ds_type.split("_", 1)[0]
    else:
        source_p = mode

    execution_time_string = "Execution Time:"
    headers = [f"{mode:<{col1_width}}", f"{execution_time_string:<{col2_width}}"]
    print(tabulate([], headers=headers, tablefmt="pretty"))
    
    for name, exec_name in algorithms:
        if mode == "cuda":
            exec_path = f'/vectron/experiments_docker/source/{source_p}/{exec_name}_cuda'
        else:
            exec_path = f'/vectron/experiments_docker/source/{source_p}/{exec_name}'
                
        result = exec(mode, exec_path, ds_type)
        formatted_name = f"{name:<{col1_width}}"
        formatted_result = f"{result:<{col2_width}}"
        formatted_row = (formatted_name, formatted_result)
        results.append(formatted_row)
        print(tabulate([results[-1]], headers=headers, tablefmt="pretty"))

The following module will compile vectron, codon and C++ benchmarks on CPU in integer mode.
The path to source codes for each script can be found in its compile command.

In [None]:
## COMPILING VECTRON EXPERIMENTS:
compile('vectron', '/vectron/experiments_docker/source/vectron_int/smith_waterman.codon', '/vectron/experiments_docker/source/vectron')
compile('vectron', '/vectron/experiments_docker/source/vectron_int/needleman_wunsch.codon', '/vectron/experiments_docker/source/vectron')
compile('vectron', '/vectron/experiments_docker/source/vectron_int/levenshtein_distance.codon', '/vectron/experiments_docker/source/vectron')
compile('vectron', '/vectron/experiments_docker/source/vectron_int/lcs.codon', '/vectron/experiments_docker/source/vectron')
compile('vectron', '/vectron/experiments_docker/source/vectron_int/hamming_distance.codon', '/vectron/experiments_docker/source/vectron')
compile('vectron', '/vectron/experiments_docker/source/vectron_int/manhattan_tourist.codon', '/vectron/experiments_docker/source/vectron')
compile('vectron', '/vectron/experiments_docker/source/vectron_int/min_cost_path.codon', '/vectron/experiments_docker/source/vectron')

## COMPILING codon EXPERIMENTS:
compile('codon', '/vectron/experiments_docker/source/codon/smith_waterman.codon')
compile('codon', '/vectron/experiments_docker/source/codon/needleman_wunsch.codon')
compile('codon', '/vectron/experiments_docker/source/codon/levenshtein_distance.codon')
compile('codon', '/vectron/experiments_docker/source/codon/lcs.codon')
compile('codon', '/vectron/experiments_docker/source/codon/hamming_distance.codon')
compile('codon', '/vectron/experiments_docker/source/codon/manhattan_tourist.codon')
compile('codon', '/vectron/experiments_docker/source/codon/min_cost_path.codon')

## COMPILING C++ EXPERIMENTS:
compile('cpp', '/vectron/experiments_docker/source/cpp/smith_waterman.cpp')
compile('cpp', '/vectron/experiments_docker/source/cpp/needleman_wunsch.cpp')
compile('cpp', '/vectron/experiments_docker/source/cpp/levenshtein_distance.cpp')
compile('cpp', '/vectron/experiments_docker/source/cpp/lcs.cpp')
compile('cpp', '/vectron/experiments_docker/source/cpp/hamming_distance.cpp')
compile('cpp', '/vectron/experiments_docker/source/cpp/manhattan_tourist.cpp')
compile('cpp', '/vectron/experiments_docker/source/cpp/min_cost_path.cpp')

The following module will execute vectron, codon and C++ respectively, and benchmark their runtimes for the small dataset (4096 sequence pairs)

In [None]:
batch_exec('vectron', 'int_small')

batch_exec('codon', 'int_small')

batch_exec('cpp', 'int_small')

The following module will execute vectron, codon and C++ respectively, and benchmark their runtimes for the medium dataset (262,144 sequence pairs)

In [None]:
batch_exec('vectron', 'int_medium')

batch_exec('codon', 'int_medium')

batch_exec('cpp', 'int_medium')

The following module will execute vectron, codon and C++ respectively, and benchmark their runtimes for the large dataset (4,194,304 sequence pairs)

In [None]:
batch_exec('vectron', 'int_large')

In [None]:
batch_exec('codon', 'int_large')

In [None]:
batch_exec('cpp', 'int_large')

### The following module will compile vectron, cuda and C++ benchmarks on GPU in floating-point mode

In [None]:
## COMPILING VECTRON EXPERIMENT:
compile('vectron', '/vectron/experiments_docker/source/vectron_float/smith_waterman.codon', '/vectron/experiments_docker/source/vectron')

## COMPILING cuda EXPERIMENT:
compile('cuda', '/vectron/experiments_docker/source/cuda/smith_waterman_cuda.cu')

## COMPILING C++ EXPERIMENT:
compile('cpp', '/vectron/experiments_docker/source/cuda/smith_waterman.cpp')

### The following module will execute vectron, codon and C++ respectively, and benchmark their runtimes for the small GPU dataset (256 sequence pairs)

In [None]:
#RUNNING VECTRON EXPERIMENT:
print("vectron Smith Waterman Time:")
exec('vectron', '/vectron/experiments_docker/source/vectron_float/smith_waterman', 'float_small')

#RUNNIG cuda EXPERIMENT:
print("cuda Smith Waterman Time:")
exec('cuda', '/vectron/experiments_docker/source/cuda/smith_waterman_cuda', 'float_small')

#RUNNING C++ EXPERIMENT:
print("C++ Float Smith Waterman Time:")
exec('cpp', '/vectron/experiments_docker/source/cuda/smith_waterman', 'float_small')

### The following module will execute vectron, codon and C++ respectively, and benchmark their runtimes for the medium GPU dataset (1024 sequence pairs)

In [None]:
#RUNNING VECTRON EXPERIMENT:
print("vectron Smith Waterman Time:")
exec('vectron', '/vectron/experiments_docker/source/vectron_float/smith_waterman', 'float_medium')

#RUNNIG cuda EXPERIMENT:
print("cuda Smith Waterman Time:")
exec('cuda', '/vectron/experiments_docker/source/cuda/smith_waterman_cuda', 'float_medium')

#RUNNING C++ EXPERIMENT:
print("C++ Float Smith Waterman Time:")
exec('cpp', '/vectron/experiments_docker/source/cuda/smith_waterman', 'float_medium')

### The following module will execute vectron, codon and C++ respectively, and benchmark their runtimes for the large GPU dataset (4096 sequence pairs)

In [None]:
#RUNNING VECTRON EXPERIMENT:
print("vectron Smith Waterman Time:")
exec('vectron', '/vectron/experiments_docker/source/vectron_float/smith_waterman', 'float_large')

#RUNNIG cuda EXPERIMENT:
print("cuda Smith Waterman Time:")
exec('cuda', '/vectron/experiments_docker/source/cuda/smith_waterman_cuda', 'float_large')

#RUNNING C++ EXPERIMENT:
print("C++ Float Smith Waterman Time:")
exec('cpp', '/vectron/experiments_docker/source/cuda/smith_waterman', 'float_large')