# Parallel Computing Project

### Run the following cell to define the auxillary functions

In [None]:
import subprocess
import statistics
import re
from math import sqrt

def execute_class(class_, num_iters, num_devices):
    cmd = f'./bin/ft.{class_} {num_devices}'

    runtimes = []

    print(f'Class: {class_}, num_devices: {num_devices}, Iterations: {num_iters}')

    for i in range(num_iters):
        res = subprocess.check_output(cmd.split(' ')).decode('utf-8')

        match = re.search(r'Verification\s+=\s+(\w+)', res)
        assert match, 'No match found!'
        assert match.group(1) == 'SUCCESSFUL', f'Verification = {match.group(1)}'

        match = re.search(r' Time in seconds\s+=\s+(\d+\.\d+)', res)
        assert match, 'No match found!'
        runtime = float(match.group(1))
        runtimes.append(runtime)

        runtime_mean, runtime_err = calc_stats(runtimes)
        print(f'  [{(i+1):03d}] {runtime:>8.6f} [sec] | Average Runtime: {runtime_mean:>8.6f} ± {runtime_err:.6f} [sec]')

    print()

    return runtime_mean, runtime_err

def calc_stats(runtimes):
    if len(runtimes) == 0:
        raise ValueError("runtimes array is empty")
    elif len(runtimes) == 1:
        runtime_mean = runtimes[0]
        runtime_err = 0.0
    else:
        runtime_mean = statistics.mean(runtimes)
        runtime_stdev = statistics.stdev(runtimes)

        # Filter out outliers
        if len(runtimes) > 2:
            runtimes_filtered = [runtime for runtime in runtimes if abs(runtime - runtime_mean) < runtime_stdev]
            if len(runtimes_filtered) < len(runtimes):
                runtime_mean = statistics.mean(runtimes_filtered)
                runtime_stdev = statistics.stdev(runtimes_filtered)

        runtime_err = runtime_stdev

    return runtime_mean, runtime_err

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x149d6b98ffa0>>
Traceback (most recent call last):
  File "/opt/intel/oneapi/intelpython/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 770, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(
KeyboardInterrupt: 


### Build CPU implementation for all problem sizes

In [None]:
!make cleanall
!make FT CLASS=S IMPL=multiple_gpu
!make FT CLASS=W IMPL=multiple_gpu
!make FT CLASS=A IMPL=multiple_gpu
!make FT CLASS=B IMPL=multiple_gpu
!make FT CLASS=C IMPL=multiple_gpu
!make FT CLASS=D IMPL=multiple_gpu

In [10]:
CLASS = 'A'
REPETITIONS = 5
GPU_TILES=[1, 2, 4] # max 224

runtime_means = []
runtime_errs = []
speedup_means = []
speedup_errs = []

ref_mean = None
ref_err = None

for num_gpu_tiles in GPU_TILES:
  runtime_mean, runtime_err = execute_class(CLASS, REPETITIONS, num_gpu_tiles)
  runtime_means.append(runtime_mean)
  runtime_errs.append(runtime_err)

print("\nSummary:")
for i in range(len(GPU_TILES)):
  print(f'num_devices: {GPU_TILES[i]:>3}, Runtime: {runtime_means[i]:>8.6f} ± {runtime_errs[i]:>8.6f} [sec]')

print("\nRuntimes:")
for i in range(len(GPU_TILES)):
  print(f"({GPU_TILES[i]},{runtime_means[i]:.6f})",end='')
print()



Class: A, num_devices: 1, Iterations: 5


  [001] 0.590294 [sec] | Average Runtime: 0.590294 ± 0.000000 [sec]
  [002] 0.656487 [sec] | Average Runtime: 0.623390 ± 0.046806 [sec]
  [003] 0.678927 [sec] | Average Runtime: 0.667707 ± 0.015867 [sec]
  [004] 0.660895 [sec] | Average Runtime: 0.665436 ± 0.011889 [sec]
  [005] 0.568818 [sec] | Average Runtime: 0.646651 ± 0.038805 [sec]

Class: A, num_devices: 2, Iterations: 5
  [001] 0.761439 [sec] | Average Runtime: 0.761439 ± 0.000000 [sec]
  [002] 0.713487 [sec] | Average Runtime: 0.737463 ± 0.033907 [sec]
  [003] 0.674956 [sec] | Average Runtime: 0.694222 ± 0.027246 [sec]
  [004] 0.782653 [sec] | Average Runtime: 0.737463 ± 0.033907 [sec]
  [005] 0.770551 [sec] | Average Runtime: 0.757032 ± 0.030303 [sec]

Class: A, num_devices: 4, Iterations: 5
  [001] 1.113340 [sec] | Average Runtime: 1.113340 ± 0.000000 [sec]
  [002] 1.132784 [sec] | Average Runtime: 1.123062 ± 0.013749 [sec]
  [003] 1.063486 [sec] | Average Runtime: 1.123062 ± 0.013749 [sec]
  [004] 1.163828 [sec] | Average R

In [None]:
CLASSES = ['C', 'D']
# CLASSES = ['S', 'W', 'A', 'B', 'C', 'D']
num_iters=5
num_devices =2
runtime_means, runtime_errs = execute_classes(CLASSES, num_iters, num_devices)

print(f'runtime_means: {runtime_means}')
print(f'runtime_errs : {runtime_errs}')

In [16]:
class_ = 'C'
num_iters = 100

runtime_mean, runtime_err = execute_class(class_, num_iters)

print(f'runtime_mean: {runtime_mean:.6}')
print(f'runtime_err : {runtime_err:.6}')

# ours: 3.304523 ± 0.630665

Class: C, Iterations: 100


  [001] Runtime: 12.340323 ± 0.000000 [sec] (12.340323)
  [002] Runtime: 12.148646 ± 0.542143 [sec] (11.956970)
  [003] Runtime: 12.074489 ± 0.461467 [sec] (11.926174)
  [004] Runtime: 13.071470 ± 4.005685 [sec] (16.062414)


KeyboardInterrupt: 

In [10]:
# Iterations:        1000           1000           200            100            50              2
ref_mean_by_class = {'S': 0.000000, 'W': 0.000000, 'A': 0.000000, 'B': 2.913119, 'C': 0.000000, 'D': 0.000000}
ref_err_by_class  = {'S': 0.000000, 'W': 0.000000, 'A': 0.000000, 'B': 0.629813, 'C': 0.000000, 'D': 0.000000}

class_ = 'B'
num_iters = 50

ref_mean = ref_mean_by_class[class_]
ref_err = ref_err_by_class[class_]

_ = benchmark_class(class_, num_iters, ref_mean, ref_err)

Class: B, Iterations: 50


  [001] Runtime: 3.298517 ± 0.000000 [sec] (3.298517)
  [002] Runtime: 3.089596 ± 0.590916 [sec] (2.880676)
  [003] Runtime: 3.125022 ± 0.435489 [sec] (3.195872)
  [004] Runtime: 3.032199 ± 0.514090 [sec] (2.753733)
  [005] Runtime: 3.110229 ± 0.565677 [sec] (3.422349)


KeyboardInterrupt: 