In [2]:
import covalent as ct
import numpy as np
import time
import random
import platform

In [3]:
benchmark_results = {}
benchmark_results['platform'] = {}
benchmark_results['platform']['arch'] = platform.architecture()
benchmark_results['platform']['system'] = platform.system()
benchmark_results['platform']['machine'] = platform.machine()
benchmark_results['platform']['os'] = platform.node()
benchmark_results['platform']['python_version'] = platform.python_version()

# Setup for workflows
benchmark_results['workflows'] = {}

In [4]:
@ct.electron
def add(x: int, y: int):
    return x + y

@ct.electron
def multiply(x: int, y: int):
    return x*y

@ct.lattice
def workflow(x: int, y: int):
    r1 = add(x, y)
    r2 = multiply(r1, y)
    return r1

res = ct.dispatch_sync(workflow)(2, 3)

#### Basic 2 node workflow (measure overhead of covalent)

In [5]:
workflow_label = 'simple_two_node_workflow'
benchmark_results['workflows'][workflow_label] = {}

@ct.electron
def add(x: int, y: int):
    return x + y

@ct.electron
def multiply(x: int, y: int):
    return x*y

@ct.lattice
def workflow(x, y):
    r1 = add(x, y)
    r2 = multiply(r1, y)
    return r1

# Non covalent run
benchmark_results['workflows'][workflow_label]['direct'] = {}
benchmark_results['workflows'][workflow_label]['direct']['duration'] = []
for i in range(5):
    start = time.time()
    res = workflow(2, 3)
    end = time.time()
    benchmark_results['workflows'][workflow_label]['direct']['duration'].append(end-start)
benchmark_results['workflows'][workflow_label]['direct']['avg_duration'] = np.mean(benchmark_results['workflows'][workflow_label]['direct']['duration'])

benchmark_results['workflows'][workflow_label]['covalent'] = {}
benchmark_results['workflows'][workflow_label]['covalent']['duration'] = []

for i in range(5):
    result = ct.dispatch_sync(workflow)(2, 3)
    duration = (result.end_time - result.start_time).total_seconds()
    benchmark_results['workflows'][workflow_label]['covalent']['duration'].append(duration)

benchmark_results['workflows'][workflow_label]['covalent']['avg_duration'] = np.mean(benchmark_results['workflows'][workflow_label]['covalent']['duration'])

#### Workflow with compute intensive electrons (primality testing)

In [6]:
workflow_label = 'primality_tests'
benchmark_results['workflows'][workflow_label] = {}

@ct.electron
def is_prime(n: int) -> bool:
    """Primality test using 6k+-1 optimization."""
    if n <= 3:
        return n > 1
    if not n%2 or not n%3:
        return False
    i = 5
    stop = int(n**0.5)
    while i <= stop:
        if not n%i or not n%(i + 2):
            return False
        i += 6
    return True

# numbers to test
nums_to_test = [random.randint(1000, 10000) for i in range(50)]

@ct.lattice
def primality_tests():
    res = []
    for i in nums_to_test:
        entry = {}
        entry['num'] = i
        entry['is_prime'] = is_prime(i)
        res.append(entry)
    return res

# Non covalent run
benchmark_results['workflows'][workflow_label]['direct'] = {}
benchmark_results['workflows'][workflow_label]['direct']['duration'] = []
for i in range(5):
    start = time.time()
    res = primality_tests()
    end = time.time()
    benchmark_results['workflows'][workflow_label]['direct']['duration'].append(end-start)
benchmark_results['workflows'][workflow_label]['direct']['avg_duration'] = np.mean(benchmark_results['workflows'][workflow_label]['direct']['duration'])

# Covalent run
benchmark_results['workflows'][workflow_label]['covalent'] = {}
benchmark_results['workflows'][workflow_label]['covalent']['duration'] = []
for i in range(5):
    result = ct.dispatch_sync(primality_tests)()
    duration = (result.end_time - result.start_time).total_seconds()
    benchmark_results['workflows'][workflow_label]['covalent']['duration'].append(duration)
benchmark_results['workflows'][workflow_label]['covalent']['avg_duration'] = np.mean(benchmark_results['workflows'][workflow_label]['covalent']['duration'])

### Memory intensive workflow (martix multiplication)

In [8]:
workflow_label = 'matrix_multiplication'
benchmark_results['workflows'][workflow_label] = {}

@ct.electron
def create_matrix(arraysize: int):
    return np.random.random((arraysize, arraysize))

@ct.electron
def matrix_multiply(a: np.ndarray, b: np.ndarray):
    return np.matmul(a, b)

@ct.lattice
def matrix_multiplication():
    for arraysize in [256, 512, 1024, 2048]:
        a = create_matrix(arraysize)
        b = create_matrix(arraysize)
        matrix_multiply(a, b)

# Non covalent run
benchmark_results['workflows'][workflow_label]['direct'] = {}
benchmark_results['workflows'][workflow_label]['direct']['duration'] = []
for i in range(5):
    start = time.time()
    res = matrix_multiplication()
    end = time.time()
    benchmark_results['workflows'][workflow_label]['direct']['duration'].append(end-start)
benchmark_results['workflows'][workflow_label]['direct']['avg_duration'] = np.mean(benchmark_results['workflows'][workflow_label]['direct']['duration'])

# Covalent run
benchmark_results['workflows'][workflow_label]['covalent'] = {}
benchmark_results['workflows'][workflow_label]['covalent']['duration'] = []
for i in range(5):
    result = ct.dispatch_sync(matrix_multiplication)()
    duration = (result.end_time - result.start_time).total_seconds()
    benchmark_results['workflows'][workflow_label]['covalent']['duration'].append(duration)
benchmark_results['workflows'][workflow_label]['covalent']['avg_duration'] = np.mean(benchmark_results['workflows'][workflow_label]['covalent']['duration'])

### Workflow with chain dependencies (simple electrons)

In [9]:
import numpy as np

workflow_label = 'linear_add_workflow'
benchmark_results['workflows'][workflow_label] = {}

@ct.electron
def add(x: int, y: int):
    return x + y

@ct.lattice
def add_workflow(N: int):
    for i in range(N):
        if i == 0:
            r1 = add(1, 1)
        else:
            r1 = add(r1, 1)
    return r1

# Non covalent run
benchmark_results['workflows'][workflow_label]['direct'] = {}
benchmark_results['workflows'][workflow_label]['direct']['duration'] = []
for i in range(5):
    start = time.time()
    res = add_workflow(50)
    end = time.time()
    benchmark_results['workflows'][workflow_label]['direct']['duration'].append(end-start)
benchmark_results['workflows'][workflow_label]['direct']['avg_duration'] = np.mean(benchmark_results['workflows'][workflow_label]['direct']['duration'])

# Covalent run
benchmark_results['workflows'][workflow_label]['covalent'] = {}
benchmark_results['workflows'][workflow_label]['covalent']['duration'] = []
for i in range(5):
    result = ct.dispatch_sync(add_workflow)(50)
    duration = (result.end_time - result.start_time).total_seconds()
    benchmark_results['workflows'][workflow_label]['covalent']['duration'].append(duration)
benchmark_results['workflows'][workflow_label]['covalent']['avg_duration'] = np.mean(benchmark_results['workflows'][workflow_label]['covalent']['duration'])

workflow_label = 'linear_multiply_workflow'
benchmark_results['workflows'][workflow_label] = {}

@ct.electron
def multiply(x: int, y: int):
    return x*y

# Result can potentially overflow
@ct.lattice
def multiply_workflow(N: int):
    for i in range(N):
        if i == 0:
            r1 = multiply(2, 1)
        else:
            r1 = multiply(r1, 2)
    return r1

# Non covalent run
benchmark_results['workflows'][workflow_label]['direct'] = {}
benchmark_results['workflows'][workflow_label]['direct']['duration'] = []
for i in range(5):
    start = time.time()
    res = multiply_workflow(50)
    end = time.time()
    benchmark_results['workflows'][workflow_label]['direct']['duration'].append(end-start)
benchmark_results['workflows'][workflow_label]['direct']['avg_duration'] = np.mean(benchmark_results['workflows'][workflow_label]['direct']['duration'])

# Covalent run
benchmark_results['workflows'][workflow_label]['covalent'] = {}
benchmark_results['workflows'][workflow_label]['covalent']['duration'] = []
for i in range(5):
    result = ct.dispatch_sync(multiply_workflow)(50)
    duration = (result.end_time - result.start_time).total_seconds()
    benchmark_results['workflows'][workflow_label]['covalent']['duration'].append(duration)
benchmark_results['workflows'][workflow_label]['covalent']['avg_duration'] = np.mean(benchmark_results['workflows'][workflow_label]['covalent']['duration'])

### Embarassingly parallel workload (fileio)

In [10]:
workflow_label = 'parallel_fileio'
benchmark_results['workflows'][workflow_label] = {}

import tempfile
import secrets

@ct.electron
def create_delete_tempfile():
    fp = tempfile.NamedTemporaryFile(delete=True)
    # thousand lines per file
    for i in range(1000):
        fp.write(secrets.token_bytes(16384))
    fp.close()

@ct.lattice
def parallel_fileio(N: int):
    for i in range(N):
        create_delete_tempfile()

# Non covalent run
benchmark_results['workflows'][workflow_label]['direct'] = {}
benchmark_results['workflows'][workflow_label]['direct']['duration'] = []
for i in range(5):
    start = time.time()
    res = parallel_fileio(50)
    end = time.time()
    benchmark_results['workflows'][workflow_label]['direct']['duration'].append(end-start)
benchmark_results['workflows'][workflow_label]['direct']['avg_duration'] = np.mean(benchmark_results['workflows'][workflow_label]['direct']['duration'])

# Covalent run
benchmark_results['workflows'][workflow_label]['covalent'] = {}
benchmark_results['workflows'][workflow_label]['covalent']['duration'] = []
for i in range(5):
    result = ct.dispatch_sync(parallel_fileio)(50)
    duration = (result.end_time - result.start_time).total_seconds()
    benchmark_results['workflows'][workflow_label]['covalent']['duration'].append(duration)
benchmark_results['workflows'][workflow_label]['covalent']['avg_duration'] = np.mean(benchmark_results['workflows'][workflow_label]['covalent']['duration'])

### Sublattices

In [11]:
workflow_label = 'sublattices'
benchmark_results['workflows'][workflow_label] = {}

@ct.electron
@ct.lattice
def sublattice(x: int, y: int):
    r1 = add(x, y)
    r2 = multiply(r1, y)
    return r1 + r2

@ct.lattice
def sublattice_workflow(num_tasks: int):
    results = []
    for i in range(num_tasks):
        results.append(sublattice(i, i+2))
    return results

# Non covalent run
benchmark_results['workflows'][workflow_label]['direct'] = {}
benchmark_results['workflows'][workflow_label]['direct']['duration'] = []
for i in range(5):
    start = time.time()
    res = sublattice_workflow(10)
    end = time.time()
    benchmark_results['workflows'][workflow_label]['direct']['duration'].append(end-start)
benchmark_results['workflows'][workflow_label]['direct']['avg_duration'] = np.mean(benchmark_results['workflows'][workflow_label]['direct']['duration'])

# Covalent run
benchmark_results['workflows'][workflow_label]['covalent'] = {}
benchmark_results['workflows'][workflow_label]['covalent']['duration'] = []
for i in range(5):
    result = ct.dispatch_sync(sublattice_workflow)(10)
    duration = (result.end_time - result.start_time).total_seconds()
    benchmark_results['workflows'][workflow_label]['covalent']['duration'].append(duration)
benchmark_results['workflows'][workflow_label]['covalent']['avg_duration'] = np.mean(benchmark_results['workflows'][workflow_label]['covalent']['duration'])

#### Scaling analysis for sample compute intensive workflow

In [13]:
workflow_label = 'compute_intensive_scaling'
benchmark_results['workflows'][workflow_label] = {}

@ct.electron
def is_prime(n: int) -> bool:
    """Primality test using 6k+-1 optimization."""
    if n <= 3:
        return n > 1
    if not n%2 or not n%3:
        return False
    i = 5
    stop = int(n**0.5)
    while i <= stop:
        if not n%i or not n%(i + 2):
            return False
        i += 6
    return True
    
# Measure scaling of the covalent execution
@ct.lattice
def primality_tests_scaling(num_nodes: int):
    nums_to_test = [random.randint(1000, 10000) for i in range(num_nodes)]
    res = []
    for i in nums_to_test:
        entry = {}
        entry['num'] = i
        entry['is_prime'] = is_prime(i)
        res.append(entry)
    return res

# Covalent scaling
benchmark_results['workflows'][workflow_label]['scaling'] = {}
for j in [2, 4, 8, 16, 32, 64]:
    benchmark_results['workflows'][workflow_label]['scaling'][f"{j}"] = {}
    benchmark_results['workflows'][workflow_label]['scaling'][f"{j}"]['duration'] = []
    for i in range(5):
        result = ct.dispatch_sync(primality_tests_scaling)(j)
        duration = (result.end_time - result.start_time).total_seconds()
        benchmark_results['workflows'][workflow_label]['scaling'][f"{j}"]['duration'].append(duration)
    benchmark_results['workflows'][workflow_label]['scaling'][f"{j}"]['avg_duration'] = np.mean(benchmark_results['workflows'][workflow_label]['scaling'][f"{j}"]['duration'])

#### Scaling analysis for parallel file i/o tasks

In [14]:
workflow_label = 'parallel_fileio_scaling'
benchmark_results['workflows'][workflow_label] = {}

import tempfile
import secrets

@ct.electron
def create_delete_tempfile():
    fp = tempfile.NamedTemporaryFile(delete=True)
    # thousand lines per file
    for i in range(1000):
        fp.write(secrets.token_bytes(16384))
    fp.close()

@ct.lattice
def parallel_fileio(N: int):
    for i in range(N):
        create_delete_tempfile()

# Covalent run scaling
benchmark_results['workflows'][workflow_label]['scaling'] = {}
for j in [2, 4, 8, 16, 32, 64]:
    benchmark_results['workflows'][workflow_label]['scaling'][f"{j}"] = {}
    benchmark_results['workflows'][workflow_label]['scaling'][f"{j}"]['duration'] = []
    for i in range(5):
        result = ct.dispatch_sync(parallel_fileio)(j)
        duration = (result.end_time - result.start_time).total_seconds()
        benchmark_results['workflows'][workflow_label]['scaling'][f"{j}"]['duration'].append(duration)
    benchmark_results['workflows'][workflow_label]['scaling'][f"{j}"]['avg_duration'] = np.mean(benchmark_results['workflows'][workflow_label]['scaling'][f"{j}"]['duration'])