# TP OpenMP - October 2022

Florian Rascoussier

Bastien Barbe

Ce notebook constitue une étude comparative des méthodes de parallélisation CUDA sur GPU pour différentes applications.

## Part 2 : Matrix-Vector operation

Ce programme fait du calcul vectoriel sur des matrices et des vecteurs.
### Compilation

In [1]:
# script params
EXEC: bool = True

In [7]:
# create /bin if it does not exist
!mkdir -p bin

# compile the programs
!nvcc -o bin/tp_cuda_part_2_vector_sequential tp_cuda_part_2_vector_sequential.cu -O3
!nvcc -o bin/tp_cuda_part_2_vector_1_thread_per_1_block tp_cuda_part_2_vector_1_thread_per_1_block.cu -O3
!nvcc -o bin/tp_cuda_part_2_vector_multithread_atomics tp_cuda_part_2_vector_multithread_atomics.cu -O3
!nvcc -o bin/tp_cuda_part_2_vector_multithread_shared_atomics tp_cuda_part_2_vector_multithread_shared_atomics.cu -O3 -g

# cuda-memcheck ./bin/tp_cuda_part_1_pi_basic -T 1000 -N 100000

Testing with S=31, out of memory as it requires more than 2 Go of device memory

### Performance evaluation

In [8]:
# global vars
csv_result_file = "stats.csv"
csv_headers = {
    'version': str,
    'S': int,
    'T': int,
    'runtime' : float
}

N_values = [2, 4, 8, 10, 12, 14]
M_values = [1, 3, 7, 9, 11, 13]
T_values = [1, 32, 64, 128, 256]

S_values = []
for i in range(len(N_values)):
    S_sum = N_values[i] + M_values[i]
    S_values.append(2**S_sum)
print("S values:", S_values)

color_class = {
    "sequential": "purple",
    "onethreadperblock": "red",
    "multithread_atomics": "orange",
    "multithread_shared": "blue"
}

repeats = 10

S values: [8, 128, 32768, 524288, 8388608, 134217728]


In [10]:
import os
import subprocess

def delete_old_csv_file():
    try:
        os.remove(csv_result_file)
    except OSError:
        pass

def create_csv_file():
    print(f"Creating NEW CSV file ({csv_result_file})")
    f = open(csv_result_file, "w")
    f.write(",".join(csv_headers.keys()) + "\n")
    f.close()

# execute the programs
def run_program(
    program_filepath: str, N: int, M: int, T: int
):
    S: int = N + M
    args = (program_filepath, 
        "-N", str(N), 
        "-M", str(M),
        "-S", str(S),
        "-T", str(T),
    )
    popen = subprocess.Popen(args, stdout=subprocess.PIPE)
    popen.wait()

print("Program executions")
def exec_instances():
    for N in N_values:
        for M in M_values:
            for T in T_values:
                print(f" + Running programs (N: {N}, M: {M}) with T: {T}")
                for _ in range(repeats):
                    run_program("bin/tp_cuda_part_2_vector_sequential", N,M,T)
                    run_program("bin/tp_cuda_part_2_vector_1_thread_per_1_block", N,M,T)
                    run_program("bin/tp_cuda_part_2_vector_multithread_atomics", N,M,T)
                    run_program("bin/tp_cuda_part_2_vector_multithread_shared_atomics", N,M,T)
                    print("*", end = '')
                print()

if EXEC:
    delete_old_csv_file()
    create_csv_file()
    exec_instances()
else: 
    print(f"WARN: EXEC = {EXEC}. No C++ program execution.")



Program executions
Creating NEW CSV file (stats.csv)
 + Running programs (N: 2, M: 1) with T: 1
**********
 + Running programs (N: 2, M: 1) with T: 32
**********
 + Running programs (N: 2, M: 1) with T: 64
*****

### Performance analysis

In [None]:
# superposed big graph
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import display
from numpy import genfromtxt
import pandas as pd
import seaborn as sns

sns.set()


df = pd.read_csv(
    csv_result_file,
    header=0, # line number of header
    dtype=csv_headers
)
display(df.head(10))

# filter out 0 runtime values
df = df[df.runtime > 0]



# display the graph
def display_graph(df: pd.DataFrame):
    COLUMNS = 2
    ROWS = 2

    fig, ax = plt.subplots(COLUMNS, ROWS, figsize=(18,16))
    fig.suptitle(f"Superposed Graphs of Matrix-Vector operations")

    for i in range(ROWS):
        for j in range(COLUMNS):
            # print version graph data
            graph_version = list(color_class.keys())[i*COLUMNS + j]
            df_plot_0 = df[df['version'] == graph_version]

            g0.set(title=graph_version)
            g0 = sns.lineplot(
                ax=ax[i][j],
                data=df_plot_0, 
                x="T",
                y="runtime", 
                hue="S",
                palette=color_class,
                style="version",
            )
            g0.set(yscale="log")
            g0.set(xlabel="Matrix dimensions S=2^(N+M)", ylabel="runtime (s)")
            g00 = sns.scatterplot(
                ax=ax,
                data=df_plot_0, 
                x="S",
                y="runtime", 
                hue="version",
                palette=color_class,
                legend=False,
            )
            g00.set(yscale="log")

    # display the graphs
    plt.legend()
    plt.show()


display_graph(df)

ModuleNotFoundError: No module named 'seaborn'

In [None]:
# superposed big graph
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import display
from numpy import genfromtxt
import itertools
import pandas as pd
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

sns.set()


df = pd.read_csv(
    csv_result_file,
    header=0, # line number of header
    dtype=csv_headers
)
display(df.head(10))

# create palette for each version
color_versions = {
    "critical": "red",
    "atomic": "orange",
    "reduction": "green",
    "nred": "blue",
}



# display graphs
fig, ax = plt.subplots(figsize=(18,16))
fig.suptitle("Superposed Graphs of custom matrix computing, for different parallism techniques")

for i in range(0, len(color_versions.keys())):
    # print version graph data
    graph_version = list(color_versions.keys())[i]
    df_plot_0 = df[df['version'] == graph_version]

    g0 = sns.lineplot(
        ax=ax,
        data=df_plot_0, 
        x="T",
        y="runtime", 
        hue="version",
        palette=color_versions,
        style="version",
    )
    g0.set(xscale="log")
    g0.set(yscale="log")
    g0.set(xlabel="number of threads", ylabel="runtime (s)")
    g00 = sns.scatterplot(
        ax=ax,
        data=df_plot_0, 
        x="T",
        y="runtime", 
        hue="version",
        palette=color_versions,
        legend=False,
    )
    g00.set(xscale="log")
    g00.set(yscale="log")


plt.legend()
plt.show()

### Discussion