### Importing necessary modules


In [1]:
import time
import matplotlib.pyplot as plt
import seaborn as sns
from diffusion import (
    SequentialDiffusionEquation,
    OMPdiffusionEquation,
    CUDADiffusionEquation,
    BaseDiffusionEquation,
)
import pandas as pd
import os
from IPython.display import display
from glob import glob

### Utils Methods


In [17]:
def standard_deviation(arr: list) -> float:
    mean = sum(arr) / len(arr)
    return (sum((x - mean) ** 2 for x in arr) / len(arr)) ** 0.5

In [18]:
def measure_execution_time(
    Solver: BaseDiffusionEquation,
    N: int,
    total_eval: int,
    steps: int,
    n_threads: int = None,
    block_dim: tuple[int, int] = None,
) -> list:
    time_list = []
    for _ in range(total_eval):
        with Solver("../build/libDiffusionEquation.so", N=N) as solver:
            if n_threads:
                solver.set_num_threads(n_threads)

            if block_dim:
                solver.set_block_dim(*block_dim)

            start = time.time()
            for _ in range(steps):
                solver.step()
            end = time.time()

            time_list.append(end - start)
    return time_list

In [19]:
def get_thread_num(N, block_dim_x, block_dim_y):
    num_blocks = ((N - 2 + block_dim_x - 1) / block_dim_x) * (
        (N - 2 + block_dim_y - 1) / block_dim_y
    )
    return block_dim_x * block_dim_y * num_blocks

In [2]:
def read_execution_file(file_path: str, column_name:str) -> pd.DataFrame:
    data_dict = {} 
    data = []
    with open(file_path, "r") as file:
        for line in file:
            data.append(float(line.strip()))
        data_dict[column_name] = data
        return pd.DataFrame(data_dict)

### Measure Execution time of the Different Solutions


#### Sequential implementation


In [20]:
# times = measure_execution_time(SequentialDiffusionEquation, 500, 10, 1000)
# print("Time elapsed: ", sum(times) / len(times), "+/-", standard_deviation(times))

#### OpenMP implementation


In [21]:
# times = measure_execution_time(OMPdiffusionEquation, 500, 10, 1000)
# print("Time elapsed: ", sum(times) / len(times), "+/-", standard_deviation(times))

#### CUDA implementation


In [22]:
# times = measure_execution_time(CUDADiffusionEquation, 500, 10, 1000)
# print("Time elapsed: ", sum(times) / len(times), "+/-", standard_deviation(times))

### Firsts Results


Now that we can measure the execution time of both implementations, let's compare them and check if the OpenMP implementation is faster than the Sequential implementation


In [9]:
# Configs
N = 3000

total_evaluations = 15
total_steps = 1000
num_threads_omp = [4]
cuda_block_dims = [(16, 16), (32, 32)]
folder_path = "../data/performance/final_report"
file_name = "All_EduardoNotebook.csv"

# Outuput data
times_data = {}

In [None]:
# # Just to make the notebook screen goes off before the execution
# time.sleep(80)

In [10]:
# Sequential Solver
times_data["Sequential"] = measure_execution_time(
    SequentialDiffusionEquation, N, total_evaluations, total_steps
)

In [11]:
# OMP Solver
for num_threads in num_threads_omp:
    times_data[f"OMP {num_threads}"] = measure_execution_time(
        OMPdiffusionEquation, N, total_evaluations, total_steps, n_threads=num_threads
    )

In [12]:
# CUDA solver
for block_dim in [(16, 16), (32, 32)]:
    times_data[f"CUDA {block_dim}"] = measure_execution_time(
        CUDADiffusionEquation, N, total_evaluations, total_steps, block_dim=block_dim
    )

In [13]:
# Save data into a csv file
pd.DataFrame(times_data).to_csv(
    os.path.join(folder_path, f"times_{file_name}"), index=False
)

In [10]:
path = "../build/results/*.txt"

for file in sorted(glob(path)):
    column_name = file.split("/")[-1].split(".")[0]
    data = read_execution_file(file, column_name)
    times_data[column_name] = data[column_name]

times_data = pd.DataFrame(times_data)

times_data.to_csv(
    os.path.join(folder_path, f"times_{file_name}"), index=False
)

# Compute mean, std and speedup
mean_data = times_data.mean()
std_data = times_data.std()
speedup_data = pd.Series([mean_data["seq"] / mean_data[col] for col in times_data], index=times_data.columns)

# Create csv table with metrics
pd.DataFrame({
    "experiment": times_data.columns,
    "mean": mean_data,
    "std": std_data,
    "speedup": speedup_data
}).to_csv(os.path.join(folder_path, f"metrics_{file_name}"), index=False)

In [None]:
times_df = pd.read_csv(os.path.join(folder_path, f"times_{file_name}"))

table = {}
table["Experiment"] = (
    ["Sequential"]
    + [f"OMP {num_threads}" for num_threads in num_threads_omp]
    + [f"CUDA {block_dim}" for block_dim in [(16, 16), (32, 32)]]
)
table["Num Threads"] = (
    [1]
    + num_threads_omp
    + [get_thread_num(N, 16, 16), get_thread_num(N, 32, 32)]
)
table["Time"] = (
    [times_df["Sequential"].mean()]
    + [times_df[f"OMP {num_threads}"].mean() for num_threads in num_threads_omp]
    + [times_df[f"CUDA {(16, 16)}"].mean(), times_df[f"CUDA {(32, 32)}"].mean()]
)
table["STD"] = (
    [times_df["Sequential"].std()]
    + [times_df[f"OMP {num_threads}"].std() for num_threads in num_threads_omp]
    + [times_df[f"CUDA {(16, 16)}"].std(), times_df[f"CUDA {(32, 32)}"].std()]
)
table["Speedup"] = [
    times_df["Sequential"].mean() / x
    for x in [times_df["Sequential"].mean()]
    + [times_df[f"OMP {num_threads}"].mean() for num_threads in num_threads_omp]
    + [times_df[f"CUDA {(16, 16)}"].mean(), times_df[f"CUDA {(32, 32)}"].mean()]
]
table["Efficiency"] = (
    [1]
    + [
        times_df["Sequential"].mean() / x / num_threads
        for x, num_threads in zip(
            [times_df[f"OMP {num_threads}"].mean() for num_threads in num_threads_omp],
            num_threads_omp,
        )
    ]
    + [
        times_df["Sequential"].mean()
        / times_df[f"CUDA {(16, 16)}"].mean()
        / get_thread_num(N, 16, 16),
        times_df["Sequential"].mean()
        / times_df[f"CUDA {(32, 32)}"].mean()
        / get_thread_num(N, 32, 32),
    ]
)

metrics_df = pd.DataFrame(table)
display(metrics_df)

metrics_df.to_csv(os.path.join(folder_path, f"metrics_{file_name}"), index=False)

#### Plot the results


In [None]:
# plot the results times in box plots
times_df = pd.read_csv(os.path.join(folder_path, f"times_{file_name}"))

times_sequential = pd.read_csv("../data/performance/report2/times_CUDA_EduardoNotebook.csv")["Sequential"]

df = pd.concat([times_df, times_sequential], axis=1)


# Calculate the speedup
mean_time_omp = df["OMP 4"].mean()
std_time_omp = df["OMP 4"].std()
speedup = df["Sequential"].mean() / mean_time_omp

print(f"Speedup: {speedup}")
print(f"OMP Time: {mean_time_omp} +/- {std_time_omp}")


print(df)
# Apply a style
sns.set_theme(style="whitegrid")


plt.figure(figsize=(4, 6))
b = sns.boxplot(data=df["OMP 4"], palette="Set1")
b.set(xlabel=None, ylabel=None)
plt.title("Tempo de Execução Sequencial/OMP")
plt.savefig(os.path.join(folder_path, f"times_boxplot_omp.png"), dpi=300)
plt.show()


plt.figure(figsize=(4, 6))
b = sns.boxplot(data=df["Sequential"], palette="Set2")
b.set(xlabel=None, ylabel=None)
plt.title("Tempo de Execução Sequencial")
plt.savefig(os.path.join(folder_path, f"times_boxplot_sequential.png"), dpi=300)
plt.show()

plt.figure(figsize=(6, 6))
sns.boxplot(data=times_df[["CUDA (16, 16)", "CUDA (32, 32)"]], palette="Set3")
plt.ylabel("Tempo (s)")
plt.xlabel("Experimento")
plt.title("Tempo de Execução CUDA")
plt.savefig(os.path.join(folder_path, f"times_boxplot_cuda.png"), dpi=300)
plt.show()

In [None]:
metrics_df = pd.read_csv(os.path.join(folder_path, f"metrics_{file_name}"))

# Get only the values with the index that have mpi in the experiment name
mpi_speedup_df = metrics_df.loc[metrics_df["experiment"].str.contains("mpi"), ["experiment", "speedup"]]
omp_speedup_df = metrics_df.loc[metrics_df["experiment"].str.contains("omp"), ["experiment", "speedup"]]


# Extract the number of processes from the experiment string and sort the dataframe
mpi_speedup_df["n_procs"] = mpi_speedup_df["experiment"].apply(lambda x: int(x.split("_")[1]))
mpi_speedup_df = mpi_speedup_df.sort_values("n_procs")

omp_speedup_df["n_procs"] = omp_speedup_df["experiment"].apply(lambda x: int(x.split("_")[1]))
omp_speedup_df = omp_speedup_df.sort_values("n_procs")


# Plot the actual speedup
plt.plot(
    mpi_speedup_df["n_procs"],
    mpi_speedup_df["speedup"],
    label="MPI Speedup",
    marker="x",    
)

# Plot OMP speedup
plt.plot(
    omp_speedup_df["n_procs"],
    omp_speedup_df["speedup"],
    label="OMP Speedup",
    marker="o",
)

# Plot the ideal (linear) speedup
plt.plot(
    mpi_speedup_df["n_procs"],
    mpi_speedup_df["n_procs"],
    label="Speedup Linear",
    linestyle="--",
    marker="*",
)


cuda_speedup = metrics_df.loc[metrics_df["experiment"] == "cuda", "speedup"].values[0]

# Create a horizontal line to be the omp and cuda speedup
plt.axhline(cuda_speedup, color="red", linestyle="--", label="CUDA Speedup")

# Set y-axis limits
plt.ylim(1, metrics_df["speedup"].max() + 0.5)

# Set the x-axis ticks to only those values present in n_procs
unique_n_procs = sorted(mpi_speedup_df["n_procs"].unique())
plt.xticks(unique_n_procs)

# Add grid, labels, title, and legend
plt.grid(True)
plt.title("Speedup vs Nº Threads")
plt.xlabel("Nº Threads / Processos")
plt.ylabel("Speedup")
plt.legend()

plt.show()

In [None]:
# plot the results together in a single graph
metrics_df = pd.read_csv(os.path.join(folder_path, f"metrics_{file_name}"))

# Plot the efficiency
plt.plot(
    metrics_df["Num Threads"], metrics_df["Efficiency"], label="Eficiência", marker="o"
)
# Add the linear efficiency line
plt.plot(
    metrics_df["Num Threads"],
    [1] * len(metrics_df["Num Threads"]),
    label="Eficiência Linear",
    linestyle="--",
    marker="o",
)

plt.grid()
plt.title("Eficiência vs Nº Threads")
plt.xlabel("Nº Threads")
plt.ylabel("Eficiência")
plt.legend()
plt.show()

In [None]:
metrics_df = pd.read_csv(os.path.join(folder_path, f"metrics_{file_name}"))

# Calculate percentage of linear speedup achieved
metrics_df["Percent of Linear Speedup"] = (
    metrics_df["Speedup"] / metrics_df["Num Threads"]
) * 100

# Plot the percentage
plt.plot(
    metrics_df["Num Threads"],
    metrics_df["Percent of Linear Speedup"],
    label="Percent of Linear Speedup",
    marker="o",
)

plt.grid()
plt.title("Percentage of Linear Speedup Achieved vs Number of Threads")
plt.xlabel("Number of Threads")
plt.ylabel("Percentage of Linear Speedup Achieved (%)")
plt.legend()
plt.show()

In [None]:
metrics_df = pd.read_csv(os.path.join(folder_path, f"metrics_{file_name}"))

fig, ax1 = plt.subplots()

color = "tab:blue"
ax1.set_xlabel("Number of Threads")
ax1.set_ylabel("Measured Speedup", color=color)
ax1.plot(
    metrics_df["Num Threads"],
    metrics_df["Speedup"],
    label="Measured Speedup",
    color=color,
    marker="o",
)
ax1.tick_params(axis="y", labelcolor=color)

ax2 = ax1.twinx()  # Instantiate a second axes sharing the same x-axis

color = "tab:red"
ax2.set_ylabel("Linear Speedup", color=color)
ax2.plot(
    metrics_df["Num Threads"],
    metrics_df["Num Threads"],
    label="Linear Speedup",
    linestyle="--",
    color=color,
)
ax2.tick_params(axis="y", labelcolor=color)

# Combined legend
lines_labels = [ax.get_legend_handles_labels() for ax in [ax1, ax2]]
lines, labels = [sum(lol, []) for lol in zip(*lines_labels)]
fig.legend(lines, labels, loc="upper left")

plt.title("Speedup vs Number of Threads")
plt.grid()
plt.show()