## Load environment and check for folder

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
import subprocess
import os
base_dir = "/content/gdrive/MyDrive/cuda-exercises-02"
get_ipython().run_line_magic('cd', base_dir)

# assert that we have the exercise folder
assert '03-Smoothing' in os.listdir(), "Exercise folder not found"

# Exercise 03

In [None]:
# set exercise path
exercise_dir = "03-Smoothing"
exercise_dir = os.path.join(base_dir, exercise_dir)

# Function to run shell commands in a specific directory
def run_command(command, work_dir):
    return subprocess.run(command, shell=True, cwd=work_dir, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

# Create and prepare the build directory
cmd_return = run_command('cmake -E remove -f build', exercise_dir)
print(f"Cmake output:\n{cmd_return.stdout}")

if cmd_return.returncode != 0:
    print(f"Cmake failed with error:\n{cmd_return.stderr}")
    assert False

run_command('cmake -E make_directory build', exercise_dir)
cmd_return = run_command('cmake ../ -DCMAKE_BUILD_TYPE=Release', os.path.join(exercise_dir, 'build'))
print(f"Cmake output:\n{cmd_return.stdout}")

if cmd_return.returncode != 0:
    print(f"Cmake failed with error:\n{cmd_return.stderr}")
    assert False

# Compile the programs
cmd_return = run_command('make', os.path.join(exercise_dir, 'build'))
print(f"Compilation output:\n{cmd_return.stdout}")

if cmd_return.returncode != 0:
    print(f"Compilation failed with error:\n{cmd_return.stderr}")
    assert False

# Execute tests and collect outputs
for i in range(1, 6):
    test_input = f"tests/{i}.in"
    parallel_output = f"parallel.{i}.out"
    serial_output = f"serial.{i}.out"
    parallel_time = f"parallel.{i}.time"
    serial_time = f"serial.{i}.time"


    # Workaround for GDrive permissions
    subprocess.run(f"chmod 755 ./parallel", shell=True, cwd=os.path.join(exercise_dir, 'build'))
    subprocess.run(f"chmod 755 ./serial", shell=True, cwd=os.path.join(exercise_dir, 'build'))

    # Run parallel and serial programs
    subprocess.run(f"build/parallel {test_input} > build/{parallel_output} 2> build/{parallel_time}", shell=True, cwd=os.path.join(exercise_dir))
    subprocess.run(f"build/serial {test_input} > build/{serial_output} 2> build/{serial_time}", shell=True, cwd=os.path.join(exercise_dir))

    # Compare outputs and calculate speedup
    diff_result = subprocess.run(f"cmp {serial_output} {parallel_output}", shell=True, cwd=os.path.join(exercise_dir, 'build'), stderr=subprocess.PIPE)
    diff_status = diff_result.returncode

    # Read timings and calculate speedup
    with open(os.path.join(exercise_dir, 'build', serial_time), 'r') as f:
        serial_runtime = float(f.read().strip())

    test_status = "[CORRECT OUTPUT]" if diff_status == 0 else "[INCORRECT OUTPUT]"

    # Read parallel runtime only if the output is correct
    if diff_status == 0:
        with open(os.path.join(exercise_dir, 'build', parallel_time), 'r') as f:
            parallel_runtime = float(f.read().strip())
        speedup = serial_runtime / parallel_runtime if parallel_runtime > 0 else float('inf')
        print(f"Test {i}: {test_status} Serial time = {serial_runtime}s, Parallel time = {parallel_runtime}s, Speedup = {speedup:.4f}x\n")

    else:
        print(f"Test {i}: {test_status} Serial time = {serial_runtime}s, Parallel time = --, Speedup = --")
        print(f"Diff output: {diff_result.stderr}\n\n")