# Parallel Computing Project

### Run the following cell to define the auxillary functions

In [30]:
import subprocess
import statistics
import re
from math import sqrt

# def benchmark_classes(classes, num_iters, ref_means, ref_errs):
#     speedup_means = []
#     speedup_err = []

#     for i, class_ in enumerate(classes):
#         speedup_mean, speedup_err = benchmark_class(class_, num_iters, ref_means[i], ref_errs[i])
#         speedup_means.append(speedup_mean)
#         speedup_err.append(speedup_err)

#     return speedup_means, speedup_err


# def benchmark_class(class_, num_iters, ref_mean, ref_err):
#     runtime_mean, runtime_err = execute_class(class_, num_iters)
#     speedup_means, speedup_err = calc_speedup(runtime_mean, runtime_err, ref_mean, ref_err)

#     return speedup_means, speedup_err, runtime_mean, runtime_err


# def execute_classes(classes, num_iters):
#     runtime_means = []
#     runtime_errs = []

#     for i, class_ in enumerate(classes):
#         runtime_mean, runtime_err = execute_class(class_, num_iters)
#         runtime_means.append(runtime_mean)
#         runtime_errs.append(runtime_err)

#     return runtime_means, runtime_errs


def execute_class(class_, num_iters):
    cmd = f'./bin/ft.{class_}'

    runtimes = []

    print(f'Class: {class_}, Iterations: {num_iters}')

    for i in range(num_iters):
        res = subprocess.check_output(cmd.split(' ')).decode('utf-8')

        match = re.search(r'Verification\s+=\s+(\w+)', res)
        assert match, 'No match found!'
        assert match.group(1) == 'SUCCESSFUL', f'Verification = {match.group(1)}'

        match = re.search(r' Time in seconds\s+=\s+(\d+\.\d+)', res)
        assert match, 'No match found!'
        runtime = float(match.group(1))
        runtimes.append(runtime)

        runtime_mean, runtime_err = calc_stats(runtimes)
        print(f'  [{(i+1):03d}] {runtime:>8.6f} [sec] | Average Runtime: {runtime_mean:>8.6f} ± {runtime_err:.6f} [sec]')

    print()

    return runtime_mean, runtime_err


def calc_speedup(runtime_mean, runtime_err, ref_mean, ref_err):
    speedup_mean = ref_mean / runtime_mean
    speedup_err = sqrt((runtime_err/runtime_mean)**2 + (ref_err/ref_mean)**2) * abs(speedup_mean)

    print(f'Runtime: {runtime_mean:.6f} ± {runtime_err:.6f} [sec]')
    print(f'Speedup: {speedup_mean:>8.2f} ± {speedup_err:>8.2f}')

    return speedup_mean, speedup_err


def calc_stats(runtimes):
    if len(runtimes) == 0:
        raise ValueError("runtimes array is empty")
    elif len(runtimes) == 1:
        runtime_mean = runtimes[0]
        runtime_err = 0.0
    else:
        runtime_mean = statistics.mean(runtimes)
        runtime_stdev = statistics.stdev(runtimes)

        # Filter out outliers
        if len(runtimes) > 2:
            runtimes_filtered = [runtime for runtime in runtimes if abs(runtime - runtime_mean) < runtime_stdev]
            if len(runtimes_filtered) < len(runtimes):
                runtime_mean = statistics.mean(runtimes_filtered)
                runtime_stdev = statistics.stdev(runtimes_filtered)

        runtime_err = runtime_stdev

    return runtime_mean, runtime_err

### Build CPU implementation for all problem sizes

In [None]:
!make cleanall
!make FT CLASS=S
!make FT CLASS=W
!make FT CLASS=A
!make FT CLASS=B
!make FT CLASS=C
!make FT CLASS=D

In [33]:
CLASS = 'S'
REPETITIONS = 2
NUM_THREADS=[1, 2, 4, 8, 16, 32, 56, 112, 224] # max 224

runtime_means = []
runtime_errs = []
speedup_means = []
speedup_errs = []

ref_mean = None
ref_err = None

for num_threads in NUM_THREADS:
  %env OMP_NUM_THREADS={num_threads}
  runtime_mean, runtime_err = execute_class(CLASS, REPETITIONS)

  if num_threads == 1:
    ref_mean, ref_err = runtime_mean, runtime_err
    speedup_mean, speedup_err = 1.0, 0.0
  else:
    speedup_mean, speedup_err = calc_speedup(runtime_mean, runtime_err, ref_mean, ref_err)

  runtime_means.append(runtime_mean)
  runtime_errs.append(runtime_err)
  speedup_means.append(speedup_mean)
  speedup_errs.append(speedup_err)

  print()

print()
for i in range(len(NUM_THREADS)):
  print(f'#Threads: {NUM_THREADS[i]:>3}, Runtime: {runtime_means[i]:>8.2f} ± {runtime_errs[i]:>8.2f} [sec], Speedup: {speedup_means[i]:>8.2f} ± {speedup_errs[i]:>8.2f}')


env: OMP_NUM_THREADS=1
Class: S, Iterations: 2


  [001] 0.029768 [sec] | Average Runtime: 0.029768 ± 0.000000 [sec]
  [002] 0.029777 [sec] | Average Runtime: 0.029773 ± 0.000006 [sec]


env: OMP_NUM_THREADS=2
Class: S, Iterations: 2
  [001] 0.014991 [sec] | Average Runtime: 0.014991 ± 0.000000 [sec]
  [002] 0.017820 [sec] | Average Runtime: 0.016405 ± 0.002000 [sec]

Runtime: 0.016405 ± 0.002000 [sec]
Speedup:     1.81 ±     0.22

env: OMP_NUM_THREADS=4
Class: S, Iterations: 2
  [001] 0.007818 [sec] | Average Runtime: 0.007818 ± 0.000000 [sec]
  [002] 0.008120 [sec] | Average Runtime: 0.007969 ± 0.000214 [sec]

Runtime: 0.007969 ± 0.000214 [sec]
Speedup:     3.74 ±     0.10

env: OMP_NUM_THREADS=8
Class: S, Iterations: 2
  [001] 0.004872 [sec] | Average Runtime: 0.004872 ± 0.000000 [sec]
  [002] 0.005095 [sec] | Average Runtime: 0.004984 ± 0.000158 [sec]

Runtime: 0.004984 ± 0.000158 [sec]
Speedup:     5.97 ±     0.19

env: OMP_NUM_THREADS=16
Class: S, Iterations: 2
  [001] 0.003060 [sec] | Average Runtime: 0.003060 ± 0.000000 [sec]

TypeError: 'int' object is not subscriptable

In [None]:
CLASSES = ['C', 'D']
# CLASSES = ['S', 'W', 'A', 'B', 'C', 'D']
num_iters=50

runtime_means, runtime_errs = execute_classes(CLASSES, num_iters)

print(f'runtime_means: {runtime_means}')
print(f'runtime_errs : {runtime_errs}')

In [16]:
class_ = 'C'
num_iters = 100

runtime_mean, runtime_err = execute_class(class_, num_iters)

print(f'runtime_mean: {runtime_mean:.6}')
print(f'runtime_err : {runtime_err:.6}')

# ours: 3.304523 ± 0.630665

Class: C, Iterations: 100


  [001] Runtime: 12.340323 ± 0.000000 [sec] (12.340323)
  [002] Runtime: 12.148646 ± 0.542143 [sec] (11.956970)
  [003] Runtime: 12.074489 ± 0.461467 [sec] (11.926174)
  [004] Runtime: 13.071470 ± 4.005685 [sec] (16.062414)


KeyboardInterrupt: 

In [10]:
# Iterations:        1000           1000           200            100            50              2
ref_mean_by_class = {'S': 0.000000, 'W': 0.000000, 'A': 0.000000, 'B': 2.913119, 'C': 0.000000, 'D': 0.000000}
ref_err_by_class  = {'S': 0.000000, 'W': 0.000000, 'A': 0.000000, 'B': 0.629813, 'C': 0.000000, 'D': 0.000000}

class_ = 'B'
num_iters = 50

ref_mean = ref_mean_by_class[class_]
ref_err = ref_err_by_class[class_]

_ = benchmark_class(class_, num_iters, ref_mean, ref_err)

Class: B, Iterations: 50


  [001] Runtime: 3.298517 ± 0.000000 [sec] (3.298517)
  [002] Runtime: 3.089596 ± 0.590916 [sec] (2.880676)
  [003] Runtime: 3.125022 ± 0.435489 [sec] (3.195872)
  [004] Runtime: 3.032199 ± 0.514090 [sec] (2.753733)
  [005] Runtime: 3.110229 ± 0.565677 [sec] (3.422349)


KeyboardInterrupt: 