In [1]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import pandas as pd
from pathlib import Path
import colorsys
import math
import numpy as np
# print(pd.__version__)

In [2]:
path_to_data :Path = Path("./../data")
path_to_img_folder :Path = Path("./../report/img")
data_seq_gcc = path_to_data.joinpath("mandelbrot_g++_seq_.csv")
data_seq = path_to_data.joinpath("mandelbrot_amd_seq_.csv")
data_openmp = path_to_data.joinpath("mandelbrot_amd_openmp_.csv")
data_cuda = path_to_data.joinpath("mandelbrot_cuda_cuda_.csv")
data_mpi = path_to_data.joinpath("mandelbrot_mpi.csv")
for x in [data_seq,data_seq_gcc, data_openmp, data_cuda, data_mpi]:
	print(x)
	assert x.exists()
	assert x.is_file()
	assert x.suffix == ".csv"
df_openmp : pd.DataFrame = pd.read_csv(data_openmp)
df_cuda : pd.DataFrame = pd.read_csv(data_cuda)
df_mpi : pd.DataFrame = pd.read_csv(data_mpi)
df_seq : pd.DataFrame = pd.read_csv(data_seq)
df_seq_gcc : pd.DataFrame = pd.read_csv(data_seq_gcc)

# Add implementation column
df_seq['Implementation'] = 'Seq'
df_openmp['Implementation'] = 'OpenMP'
df_cuda['Implementation'] = 'CUDA'
df_mpi['Implementation'] = 'MPI'


..\data\mandelbrot_amd_seq_.csv
..\data\mandelbrot_g++_seq_.csv
..\data\mandelbrot_amd_openmp_.csv
..\data\mandelbrot_cuda_cuda_.csv
..\data\mandelbrot_mpi.csv


# Seq

#### G++ 13.3.0

#### AOCC 5.0.0

### Seq exec heatmap

In [55]:
# Filter for Sequential implementation
df_seq_temp = df_seq[['Resolution', 'Iterations', 'Time (seconds)']].copy()
df_seq_gcc_temp = df_seq_gcc[['Resolution', 'Iterations', 'Time (seconds)']].copy()
df_seq_temp['Implementation'] = 'AOCC'
df_seq_gcc_temp['Implementation'] = 'G++'
# Format labels for better readability
df_seq_temp['Resolution_str'] = df_seq_temp['Resolution'].astype(str)
df_seq_gcc_temp['Resolution_str'] = df_seq_gcc_temp['Resolution'].astype(str)

df_seq_temp['Iterations_str'] = df_seq_temp['Iterations'].astype(str) + ' iterations'
df_seq_gcc_temp['Iterations_str'] = df_seq_gcc_temp['Iterations'].astype(str) + ' iterations'

# Create pivot tables and calculate difference
aocc_pivot = df_seq_temp.pivot(index='Iterations_str', columns='Resolution_str', values='Time (seconds)')
gpp_pivot= df_seq_gcc_temp.pivot(index='Iterations_str', columns='Resolution_str', values='Time (seconds)')
diff_pivot = gpp_pivot - aocc_pivot
diff_pivot.to_latex(path_to_tables.joinpath("mandelbrot_gcc_vs_aocc.tex"))
# Create heatmap
fig = go.Figure(data=go.Heatmap(
    z=diff_pivot.values,
    x=diff_pivot.columns,
    y=diff_pivot.index,
    text=diff_pivot.values.round(3),
    texttemplate='%{text}',
    textfont={"size": 10},
    colorbar_title='Time Difference (s)<br>G++ - AOCC'
))

fig.update_layout(
    title='Performance Difference between G++ and AOCC (seconds)',
    xaxis_title='Resolution',
    yaxis_title='Iterations',
    width=800,
    height=600,
)

fig.show()
fig.write_image(path_to_img_folder.joinpath("mandelbrot_gcc_vs_aocc.png"))

In [4]:

# Pivot the dataframe
melted_df = df_seq_temp[["Resolution", "Iterations"]].astype(str)
melted_df["Time (seconds)"] = df_seq_temp["Time (seconds)"]
heatmap_df_seq = melted_df.pivot(index='Resolution', columns='Iterations', values='Time (seconds)')
# Create heatmap with Plotly
fig = px.imshow(
    heatmap_df_seq,
    labels=dict(x="Iterations", y="Resolution", color="Time (seconds)"),
    title="Sequential Execution Time Heatmap",
    aspect="auto",
    text_auto=True,
	width=800,
    height=600,
)

fig.show()
fig.write_image(path_to_img_folder.joinpath("mandelbrot_aocc_seq_heatmap.png"))


In [59]:
speedup_pivot = gpp_pivot / aocc_pivot
def create_latex_gpp_aocc_table():
    comparison_df = pd.DataFrame()
    comparison_df['Resolution'] = df_seq_temp['Resolution']
    comparison_df['Iterations'] = df_seq_temp['Iterations']
    comparison_df['G++ Time (s)'] = df_seq_gcc_temp['Time (seconds)']
    comparison_df['AOCC Time (s)'] = df_seq_temp['Time (seconds)']
    comparison_df['Speedup'] = df_seq_gcc_temp['Time (seconds)'] / df_seq_temp['Time (seconds)']
    comparison_df = comparison_df.sort_values(['Iterations', 'Resolution'])

    # Export to LaTeX with specific formatting
    latex_table = comparison_df.to_latex(
        index=False,
        float_format="%.3f",
    )

    # Save to file
    with open(path_to_tables.joinpath("mandelbrot_gcc_vs_aocc_comparison.tex"), 'w') as f:
        f.write(latex_table)
        
create_latex_gpp_aocc_table()
# Create speedup heatmap
fig = go.Figure(data=go.Heatmap(
    z=speedup_pivot.values,
    x=speedup_pivot.columns,
    y=speedup_pivot.index,
    text=speedup_pivot.values.round(3),
    texttemplate='%{text}',
    textfont={"size": 10},
    colorbar_title='Speedup<br>G++/AOCC'
))

fig.update_layout(
    title='Speedup of AOCC vs G++ (GCC/AOCC)',
    xaxis_title='Resolution',
    yaxis_title='Iterations',
    width=800,
    height=600,
)

fig.show()
fig.write_image(path_to_img_folder.joinpath("mandelbrot_gcc_vs_aocc_speedup.png"))

# OpenMP

### Split scheduling into separate csv files

In [41]:
thread_counts = [2, 4, 8, 16]
scheduling_types = ['DYNAMIC', 'STATIC', 'GUIDED', 'RUNTIME']
path_to_tables = path_to_img_folder.parent.joinpath("tables")
for threads in thread_counts:
    # Filter data for each thread count
    threads_df = df_openmp[df_openmp['Threads'] == threads]
    
    # Select relevant columns
    threads_df = threads_df[['Iterations', 'Resolution', 
                              'Scheduling', 'Threads', 
                             'Time (seconds)']].copy()
    
    # Sort the data for better readability (optional)
    pivot_df = threads_df.pivot_table(
        index=['Iterations', 'Resolution'],
        columns='Scheduling',
        values='Time (seconds)'
    ).reset_index()
    pivot_df.sort_values(by=['Iterations', 'Resolution'], inplace=True)
    pivot_df.columns.name = None  # Remove the categorization name
    pivot_df.columns = ['Iterations', 'Resolution'] + scheduling_types
    # Handle missing data by filling with '-'
    pivot_df.fillna('-', inplace=True)

    # Identify the maximum execution time in each row
    # We'll store the column name of the maximum for each row
    def highlight_max(row):
        # Extract the scheduling columns as numeric, ignoring non-numeric entries
        times = row[scheduling_types]
        # Replace '-' with NaN for comparison
        times_numeric = pd.to_numeric(times, errors='coerce')
        max_time = times_numeric.max()
        # Identify which scheduling types have the max_time
        max_sched = times_numeric[times_numeric == max_time].index.tolist()
        return max_sched
    def highlight_times(row):
        times = row[scheduling_types]
        # Convert to numeric, coercing errors (like '-') to NaN
        times_numeric = pd.to_numeric(times, errors='coerce')
        max_time = times_numeric.max()
        min_time = times_numeric.min()
        # Identify which scheduling types have the max_time and min_time
        max_sched = times_numeric[times_numeric == max_time].index.tolist()
        min_sched = times_numeric[times_numeric == min_time].index.tolist()
        return max_sched, min_sched
    pivot_df[['Max_Sched', 'Min_Sched']] = pivot_df.apply(lambda row: pd.Series(highlight_times(row)), axis=1)
    # pivot_df['Max_Scheduling'] = pivot_df.apply(highlight_max, axis=1)

    # Now, create a new DataFrame where max values are wrapped with \cellcolor{yellow}{...}
    def format_time(row, sched):
        time = row[sched]
        if isinstance(time, (int, float)):
            time = f"{time:.3f}" 
        if sched in row['Max_Sched']:
            return f'\\fcolorbox{{yellow}}{{white}}{{{time}}}'
        elif sched in row['Min_Sched']:
            return f'\\fcolorbox{{green}}{{white}}{{{time}}}'
        else:
            return f'{time}'

    for sched in scheduling_types:
        pivot_df[sched] = pivot_df.apply(lambda row: format_time(row, sched), axis=1)
    # Drop the helper columns as they are no longer needed
    pivot_df.drop(columns=['Max_Sched', 'Min_Sched'], inplace=True)

    # Generate LaTeX table without the table environment
    latex_tabular = pivot_df.to_latex(
        index=False,
        escape=False,  # Allows LaTeX commands like \fcolorbox
        column_format='llcccc',  # Two left-aligned columns and four centered columns
        float_format="%.2f",
        header=True
    )

    # Customize the table for better readability
    # Add multicolumn headers
    lines = latex_tabular.split('\n')
    new_lines = []
    for line in lines:
        if '\\toprule' in line:
            new_lines.append(line)
            new_lines.append('\\multicolumn{2}{c}{\\textbf{Parameters}} & \\multicolumn{4}{c}{\\textbf{Scheduling Types}} \\\\')
        elif '\\midrule' in line and 'Iterations' in line:
            new_lines.append('\\cmidrule(lr){1-2} \\cmidrule(lr){3-6}')
            new_lines.append(line)
        else:
            new_lines.append(line)

    customized_latex_tabular= '\n'.join(new_lines)
       # Wrap the tabular in a subtable environment using the subcaption package
    subtable_content = f"""
    \\begin{{subtable}}[t]{{0.48\\textwidth}}
        \\centering
        {customized_latex_tabular}
    \\end{{subtable}}
    """
    
    # Save the LaTeX table to a .tex file
    table_filename = path_to_tables / f'table_threads_{threads}.tex'
    with open(table_filename, 'w') as f:
        f.write(latex_tabular)


### Open mp 1 thread

In [6]:
# Filter for Sequential implementation
thread_1_df = df_openmp[df_openmp['Threads'] == 1].copy()
# Pivot the dataframe
melted_df = thread_1_df[["Resolution", "Iterations"]].astype(str)
melted_df["Time (seconds)"] = thread_1_df["Time (seconds)"]
heatmap_df_openmp = melted_df.pivot(index='Resolution', columns='Iterations', values='Time (seconds)')

# Create heatmap with Plotly
fig = px.imshow(
    heatmap_df_openmp,
    labels=dict(x="Iterations", y="Resolution", color="Time (seconds)"),
    title="OpenMP 1 thread Execution Time Heatmap",
    aspect="auto",
    text_auto=True,
    width=800,
    height=600,
)
fig.update_xaxes(side="top")
fig.show()
fig.write_image(path_to_img_folder.joinpath("mandelbrot_openmp_1_thread_heatmap.png"))


In [7]:
# Calculate speedup: Sequential time divided by threaded time
speedup_seq_openmp = df_seq_temp["Time (seconds)"] / thread_1_df["Time (seconds)"]

# Create a new DataFrame for speedup
df_speedup_seq_openmp = df_seq_temp.copy()
df_speedup_seq_openmp["Speedup"] = speedup_seq_openmp

# Prepare the DataFrame for the heatmap
melted_df = df_speedup_seq_openmp[["Resolution", "Iterations"]].astype(str)
melted_df["Speedup"] = df_speedup_seq_openmp["Speedup"]

# Pivot the DataFrame to get the format suitable for a heatmap
heatmap_df_openmp_speedup = melted_df.pivot(index='Resolution', columns='Iterations', values='Speedup')

# Create the heatmap using Plotly Express
fig = px.imshow(
    heatmap_df_openmp_speedup,
    labels=dict(x="Iterations", y="Resolution", color="Speedup"),
    title="Speedup: Sequential Time / OpenMP 1 Thread Execution Time Heatmap",
    aspect="auto",
    text_auto=True,
    width=800,
    height=600,
)

# Move the x-axis labels to the top
fig.update_xaxes(side="top")

# Optionally, you can customize the color scale to better represent speedup values
fig.update_traces(
    hovertemplate='Iterations: %{x}<br>Resolution: %{y}<br>Speedup: %{z:.2f}'
)

# Display the heatmap
fig.show()
fig.write_image(path_to_img_folder.joinpath("mandelbrot_openmp_1_thread_speedup_heatmap.png"))

### Best OpenMP scheduling Solution

In order to select the bestsolution on some basis we limit the number of iterations to 4000 and resolution to 8000 since this is the most computationally expensive solution. We will run the code on the given input and measure the time taken by the code to execute. The code which takes the least time will be selected

In [8]:
# Combine all DataFrames
types_of_scheduling_openmp = ["DYNAMIC", "STATIC", "GUIDED", "RUNTIME"]

# 1. Filter for OpenMP implementation
df_openmp_temp = df_openmp[['Threads', 'Resolution', 'Iterations', 'Time (seconds)', 'Scheduling']].copy()
df_openmp_temp = df_openmp_temp[df_openmp_temp['Threads'] != 1]
#Convert Resolution and Iterations as string
df_openmp_temp['Resolution'] = df_openmp_temp['Resolution'].astype(str)
df_openmp_temp['Iterations'] = df_openmp_temp['Iterations'].astype(str)

# Convert Resolution and Iterations in df_seq_temp to string
df_seq_temp['Resolution'] = df_seq_temp['Resolution'].astype(str)
df_seq_temp['Iterations'] = df_seq_temp['Iterations'].astype(str)

# Divide the dataframe into 4 based on the type of scheduling
df_openmp_dict = {}
for i in types_of_scheduling_openmp:
    df_openmp_dict[i] = df_openmp_temp[df_openmp_temp['Scheduling'] == i].copy()
#  Calculate the speedup for every iteration and resulution value
# Iterate through each scheduling type
for sched in types_of_scheduling_openmp:
    # Merge OpenMP dataframe with sequential dataframe on Resolution and Iterations
    merged_df = pd.merge(
        df_openmp_dict[sched],
        df_seq_temp[['Resolution', 'Iterations', 'Time (seconds)']],
        on=['Resolution', 'Iterations'],
        suffixes=('_openmp', '_seq')
    , how="inner", validate="many_to_many")
    
    # Calculate Speedup
    merged_df['Speedup'] = merged_df['Time (seconds)_seq'] / merged_df['Time (seconds)_openmp']
    
    # Calculate Efficiency
    merged_df['Efficiency'] = merged_df['Speedup'] / merged_df['Threads']
    
    # Update the dictionary with the new dataframe
    df_openmp_dict[sched] = merged_df

#Joining all openmp dataframes
df_openmp_all = pd.concat(df_openmp_dict.values(), ignore_index=True)
# Remove all records of resolution different than 8000 and iterations different than 4000
df_openmp_all_max_res_iter = df_openmp_all[(df_openmp_all['Resolution'] == '8000') & (df_openmp_all['Iterations'] == '4000')]

# 5. Create the line plot with enhanced structure
fig = px.line(
    df_openmp_all_max_res_iter,
    x='Threads',
    y='Time (seconds)_openmp',
    color='Scheduling',
    markers=True,
    title='Execution Time: OpenMP scheduling types - 8000x8000 resolution, 4000 iterations',
    labels={
        'Iterations': 'Number of Iterations',
        'Time (seconds)': 'Execution Time (seconds)',
        'Implementation': 'Implementation'
    },
	width=800,
    height=600,

    
)

# Set x-axis to display only the actual thread values
fig.update_xaxes(type='category')

# Change marker symbols to crosses and increase size for better visibility
fig.update_traces(marker=dict(symbol='cross', size=10, line=dict(width=1, color='Black')))
fig.update_layout(
    legend=dict(
        x=0.69,
        y=0.99,
        bgcolor='rgba(255,255,255,0.5)',
        bordercolor='Black',
        borderwidth=1
    )
)
# Define ideal speedup based on threads
ideal_threads = [2, 4, 8, 16]
base_time = df_openmp_all_max_res_iter[df_openmp_all_max_res_iter['Threads'] == 2]['Time (seconds)_openmp'].values[0]
ideal_times = [base_time / (t / 2) for t in ideal_threads]

# Add ideal speedup line
fig.add_trace(
    go.Scatter(
        x=ideal_threads,
        y=ideal_times,
        mode='lines+markers',
        name='Ideal Speedup',
        line=dict(dash='dot', color='Red'),
        marker=dict(symbol='cross-thin', size=10)
    )
)
fig.show()
fig.write_image(path_to_img_folder.joinpath("mandelbrot_aocc_openmp_scheduling_types.png"))

best_openmp_speedup = df_openmp_all_max_res_iter[df_openmp_all_max_res_iter["Time (seconds)_openmp"] == min(df_openmp_all_max_res_iter["Time (seconds)_openmp"])]

print(f"The best time exectuion is for: {best_openmp_speedup["Scheduling"].values} at {best_openmp_speedup["Time (seconds)_openmp"].values} therefore it will be used for calculating speedup")

The best time exectuion is for: ['DYNAMIC'] at [105.083] therefore it will be used for calculating speedup


In [9]:
# Choose metric for heatmap
heatmap_metric = 'Speedup'  # Change to 'Efficiency' if desired

# Pivot the DataFrame
heatmap_data = df_openmp_all_max_res_iter.pivot(
    index='Scheduling',
    columns='Threads',
    values=heatmap_metric
)

# Sort the data
heatmap_data = heatmap_data.sort_index()
heatmap_data = heatmap_data[sorted(heatmap_data.columns, key=lambda x: int(x))]

# --- Create the Heatmap ---

fig_heatmap = px.imshow(
    heatmap_data,
    labels=dict(x="Number of Threads", y="Scheduling Type", color=heatmap_metric),
    x=heatmap_data.columns.astype(str),
    y=heatmap_data.index,
    # color_continuous_scale='Viridis',
    title=f'Heatmap of {heatmap_metric} for OpenMP Scheduling Types',
    text_auto=True
)

# Enhance the layout
fig_heatmap.update_layout(
    xaxis_title='Number of Threads',
    yaxis_title='Scheduling Type',
    title={
        'text': f'Heatmap of {heatmap_metric} for OpenMP Scheduling Types',
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    width=800,
    height=600,
    template='plotly_white'
)

# Adjust x-axis tick labels
fig_heatmap.update_xaxes(
    tickmode='array',
    tickvals=heatmap_data.columns,
    ticktext=[str(int(t)) for t in heatmap_data.columns],
    tickangle=45
)
fig_heatmap.show()
fig.write_image(path_to_img_folder.joinpath("mandelbrot_openmp_scheduling_types_heatmap_speedup.png"))


### Calculating speedup (Base time is seq execuction)

In [10]:
# Get sequential times in pivot format
seq_times = df_seq.pivot(
    index=['Resolution', 'Iterations'],
    columns='Implementation',
    values='Time (seconds)'
)
seq_times = seq_times.rename(columns={'Seq': 'Sequential'})
seq_times = seq_times['Sequential']
# Combine with OpenMP data
best_open_mp_df = df_openmp_all[df_openmp_all["Scheduling"] == best_openmp_speedup["Scheduling"].values[0]].copy()
best_open_mp_df['Resolution'] = best_open_mp_df['Resolution'].astype(int)
best_open_mp_df['Iterations'] = best_open_mp_df['Iterations'].astype(int)
best_open_mp_df_pivoted = best_open_mp_df.pivot(
    index=['Resolution', 'Iterations'],
    columns='Threads',
    values='Time (seconds)_openmp'
)
speedup_df = best_open_mp_df_pivoted.copy()
best_open_mp_df_pivoted.columns.name = 'Threads'
# display(best_open_mp_df_pivoted)

# Calculate speedup (sequential time divided by parallel time)
for col in speedup_df.columns:
    speedup_df[col] = seq_times / best_open_mp_df_pivoted[col]
speedup_df.columns = [f'{n} threads (speedup)' for n in speedup_df.columns]
speedup_df.columns.name = 'Threads'
# print("speedup_df")
# display(speedup_df)
mismatch = ~seq_times.index.isin(best_open_mp_df_pivoted.index)
if mismatch.any():
    print("Mismatched indices:", seq_times.index[mismatch])
# Combine times and speedup
combined_results = pd.concat([
    pd.DataFrame(seq_times, columns=['Sequential']), 
    best_open_mp_df_pivoted, 
    speedup_df
], axis=1)
# Change column names so that threads , sequential to indicate that is it is seconds
combined_results.columns = [f'{c} (s)' if c == 'Sequential' else c for c in combined_results.columns]

display(combined_results)


Unnamed: 0_level_0,Unnamed: 1_level_0,Sequential (s),2,4,8,16,2 threads (speedup),4 threads (speedup),8 threads (speedup),16 threads (speedup)
Resolution,Iterations,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1000,1000,3.69722,2.12938,1.21906,0.697415,0.491327,1.736289,3.032845,5.30132,7.524968
1000,2000,7.18457,4.08609,2.37241,1.33811,0.911466,1.758299,3.028385,5.369192,7.882433
1000,4000,14.3001,8.53279,4.60581,2.66628,1.69085,1.6759,3.104796,5.363315,8.457344
2000,1000,14.5485,8.62719,4.94131,2.71778,1.76156,1.686354,2.94426,5.353082,8.258873
2000,2000,28.2592,17.0755,9.48052,5.28057,3.35556,1.654956,2.980765,5.351543,8.421605
2000,4000,56.4105,33.5716,19.1055,10.5509,6.5744,1.680304,2.952579,5.346511,8.580327
4000,1000,57.5584,35.0532,19.6306,10.936,6.85384,1.64203,2.932075,5.263204,8.397978
4000,2000,113.408,68.0894,38.2738,21.3868,13.2057,1.665575,2.963071,5.30271,8.587807
4000,4000,226.171,135.755,75.2523,42.276,26.1094,1.666023,3.005503,5.349868,8.662436
8000,1000,228.167,143.602,79.0571,43.6605,26.8861,1.588885,2.886104,5.225936,8.48643


In [11]:
# 1. Reset the index to have 'Resolution' and 'Iterations' as columns
df_temp = combined_results.reset_index()
# 2. Identify speedup columns
speedup_cols = [col for col in list(df_temp.columns) if type(col) == str and 'threads (speedup)' in col]
# 3. Melt the DataFrame to long format for speedup data
df_melted = df_temp.melt(
    id_vars=['Resolution', 'Iterations'],
    value_vars=speedup_cols,
    var_name='Threads',
    value_name='Speedup'
)

# 4. Extract thread count from the 'Threads' column
df_melted['Thread Count'] = df_melted['Threads'].str.extract(r'(\d+)').astype(int)

# 5. Create a 'Scenario' column combining Resolution and Iterations
df_melted['Scenario'] = 'Res=' + df_melted['Resolution'].astype(str) + ', It=' + df_melted['Iterations'].astype(str)

# 6. Plot using Plotly Express
fig = px.line(
    df_melted,
    x='Thread Count',
    y='Speedup',
    color='Scenario',
    markers=True,
    title='Speedup vs Number of Threads',
    labels={
        'Thread Count': 'Number of Threads',
        'Speedup': 'Speedup (Sequential / Parallel)',
        'Scenario': 'Scenario (Resolution & Iterations)'
    }
)

# 7. Enhance the layout for better readability
fig.update_layout(
    xaxis=dict(
		type='category',  # Treat x-axis as categorical
        title='Number of Threads'),
    yaxis=dict(title='Speedup (Sequential / Parallel)'),
    legend_title_text='Scenario',
    width=800,
    height=600
)

# 8. Display the plot
fig.show()
fig.write_image(path_to_img_folder.joinpath("mandelbrot_openmp_best_speedup_vs_threads.png"))

### openMP heatmap

In [12]:
df_temp = combined_results.reset_index()

# Identify speedup columns (columns containing 'threads (speedup)')
speedup_cols = [col for col in df_temp.columns if isinstance(col, str) and 'threads (speedup)' in col]

# Melt the dataframe to long format for speedup data
df_melted = df_temp.melt(
    id_vars=['Resolution', 'Iterations'],
    value_vars=speedup_cols,
    var_name='Threads',
    value_name='Speedup'
)

# Extract thread count from the 'Threads' column
df_melted['Thread Count'] = df_melted['Threads'].str.extract(r'(\d+)').astype(int)

# Create a 'Scenario' column combining Resolution and Iterations
df_melted['Scenario'] = 'Res=' + df_melted['Resolution'].astype(str) + ', It=' + df_melted['Iterations'].astype(str)

# Optional: Drop unnecessary columns if desired
df_melted = df_melted[['Scenario', 'Thread Count', 'Speedup']]
# Pivot the dataframe to have Scenarios as rows and Thread Counts as columns
heatmap_df = df_melted.pivot(index='Scenario', columns='Thread Count', values='Speedup')

# Optional: Sort the pivot table for better visualization
heatmap_df = heatmap_df.sort_index()
heatmap_df = df_melted.groupby(['Scenario', 'Thread Count']).agg({'Speedup': 'mean'}).unstack()
heatmap_df.columns = heatmap_df.columns.get_level_values(1)  # Flatten MultiIndex
heatmap_df.columns = heatmap_df.columns.astype(str)
heatmap_df = heatmap_df.fillna(0)  # or another appropriate value
fig.update_traces(
    hovertemplate='Scenario: %{y}<br>Threads: %{x}<br>Speedup: %{z:.2f}'
)
# Create the heatmap using Plotly Express
fig = px.imshow(
    heatmap_df,
    labels=dict(x="Number of Threads", y="Scenario", color="Speedup"),
    title="Heatmap of Speedup across Scenarios and Number of Threads",
    aspect="auto",
    text_auto=True,
    color_continuous_scale='Viridis'
)
custom_color_scale = [
    [0.0, 'darkblue'],
    [0.25, 'blue'],
    [0.5, 'yellow'],
    [0.75, 'orange'],
    [1.0, 'red']
]
# Customize the layout
fig.update_layout(
    xaxis_title='Number of Threads',
    yaxis_title='Scenario (Resolution & Iterations)',
    title={
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'bottom'
    },
    coloraxis_colorbar=dict(
        title="Speedup",
        tickvals=[heatmap_df.min().min(), heatmap_df.max().max()],
        ticktext=["Min", "Max"]
    )
)

# Move the x-axis labels to the top for better readability
fig.update_xaxes(side="top")

# Adjust y-axis to accommodate long scenario names
fig.update_yaxes(tickangle=0)

# Optionally, adjust the size of the figure for better visibility
fig.update_layout(
    width=800,
    height=600
)

# Display the heatmap
fig.show()
fig.write_image(path_to_img_folder.joinpath("mandelbrot_openmp_best_speedup_vs_threads_heatmap.png"))


In [13]:
# 1. Ensure 'Thread Count' is treated as an ordered categorical variable
df_melted['Thread Count'] = pd.Categorical(
    df_melted['Thread Count'],
    categories=sorted(df_melted['Thread Count'].unique()),
    ordered=True
)

# 2. Identify the maximum speedup for each thread count
max_speedups = df_melted.loc[df_melted.groupby('Thread Count')['Speedup'].idxmax()]

# 3. Combine multiple qualitative color palettes for increased variety
combined_palette = (
    px.colors.qualitative.Plotly +
    px.colors.qualitative.D3 +
    px.colors.qualitative.Set1 +
    px.colors.qualitative.Set3 +
    px.colors.qualitative.Dark2 +
    px.colors.qualitative.Pastel1
)
scenarios = df_melted['Scenario'].unique()
if len(scenarios) > len(combined_palette):
    raise ValueError(f"Number of scenarios ({len(scenarios)}) exceeds the number of available colors ({len(combined_palette)}). Consider generating more colors programmatically.")

color_discrete_map = {scenario: combined_palette[i] for i, scenario in enumerate(scenarios)}

# 4. Assign colors to max_speedups based on their Scenario
max_speedups_colors = max_speedups['Scenario'].map(color_discrete_map).tolist()

# 5. Create the bar plot with the extended color mapping
fig_bar = px.bar(
    df_melted,
    x='Thread Count',
    y='Speedup',
    color='Scenario',
    barmode='group',  # Groups bars side by side for each thread count
    title='Speedup vs Number of Threads',
    labels={
        'Thread Count': 'Number of Threads',
        'Speedup': 'Speedup (Sequential / Parallel)',
        'Scenario': 'Scenario (Resolution & Iterations)'
    },
    text='Speedup',  # Optional: Show speedup values on bars
    color_discrete_map=color_discrete_map
)

# 6. Calculate an offset for the markers to position them above the bars
offset = df_melted['Speedup'].max() * 0.15  # 15% of the maximum speedup

# 7. Add "Max" markers with scenario names on top of maximum bars
fig_bar.add_trace(
    go.Scatter(
        x=max_speedups['Thread Count'],
        y=max_speedups['Speedup'] + offset,  # Position above the bar
        mode='markers+text',
        marker=dict(
            size=12,
            color=max_speedups_colors,  # Use the same color as the corresponding bar
            symbol='diamond'
        ),
        # Combine scenario name and speedup in the text
        text=max_speedups['Speedup'].round(4).astype(str),
        textposition="top center",
        showlegend=False
    )
)

# 8. Enhance the layout for better readability
fig_bar.update_layout(
	width=1000,    # Set the desired width
    height=600,   # Set the desired height
    xaxis_title='Number of Threads',
    yaxis_title='Speedup (Sequential / Parallel)',
    legend_title='Scenario',
    xaxis=dict(type='category'),  # Ensure x-axis is treated as categorical
    template='plotly_white',      # Optional: a clean white background
    title={
        'text': "Speedup per thread configuration",
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'auto',
        'yanchor': 'auto'
    },
    margin=dict(t=100, l=50, r=50, b=50),  # Adjust margins as needed
    annotations=[
        dict(
            x=0.5,
            y=1.1,
            xref='paper',
            yref='paper',
            text='Diamond markers (♦) indicate maximum speedup',
            showarrow=False,
            font=dict(size=12)
        )
    ]
    
)

# 9. Optionally, adjust the text on the bars for better visibility
fig_bar.update_traces(texttemplate='%{text:.3f}')

# 10. Adjust y-axis range to ensure annotations are visible
fig_bar.update_yaxes(range=[0, df_melted['Speedup'].max() * 1.3])
fig_bar.update_xaxes(tickvals=df_melted['Thread Count'].unique())

# 11. Display the bar plot
fig_bar.show()
fig_bar.write_image(path_to_img_folder.joinpath("mandelbrot_openmp_best_speedup_vs_threads_bar.png"))






# MPI

### MPI results are not 1:1 processor that is being used for SEQ and OMP. So we will calculate the speedup for the best OMP solution and compare it with the MPI solution
Nodes used in full:
hpcocapie01
hpcocapie03
hpcocapie04
hpcocapie05
hpcocapie06
hpcocapie07
hpcocapie08

7 in total, meaning that only 4 hosts are used for the MPI solution to keep the comparison fair

Also because of usage of 
```c++
int threads_used = omp_get_max_threads();
```
Instead of limiting the number of threads to 4, we will use the maximum number of threads available on the machine
It gets ALL possible threads for given machine, so what the gpahs actually shows is a comparison of increasing threads vs processes


### Table for MPI

In [14]:
df_mpi_temp = df_mpi[['Resolution', 'Iterations','Processes','Threads', 'Time (seconds)', ]].copy()
# Assuming 'df_seq' contains a single row for the sequential execution
filtered_df_seq = df_seq[
    (df_seq['Resolution'] == 8000) &
    (df_seq['Iterations'] == 4000)
]
rows_per_machine = 5
machine_counts = []
total_rows = df_mpi.shape[0]
for i in range(total_rows):
    if i < rows_per_machine:
        machine_counts.append(1)
    elif i < 2 * rows_per_machine:
        machine_counts.append(2)
    elif i < 3 * rows_per_machine:
        machine_counts.append(4)
    else:
        machine_counts.append(None)  # Handle any additional rows if present
# rename Nodes to Processes
# Get the maximum processes for every distinct machine configuration


# Calculate the speedup for every row
df_mpi_temp['Speedup'] =  filtered_df_seq['Time (seconds)'].values[0] / df_mpi_temp['Time (seconds)']
# df_mpi_temp['Processes'] = df_mpi_temp['Processes'] * df_mpi_temp['Threads']
# Multiply the threads by the number of machines to get the total number of threads used
# Add the Machines column to the DataFrame
df_mpi_temp['Machines'] = machine_counts
df_mpi_temp['Threads'] = df_mpi_temp['Threads'] * df_mpi_temp['Machines']
df_mpi_temp['Machines'] = df_mpi_temp['Machines'].astype(str)
df_mpi_simple = df_mpi_temp.copy()
df_mpi_simple['Processes'] = df_mpi_simple['Processes'].astype(int)
df_mpi_simple['Machines'] = df_mpi_simple['Machines'].astype(str)
df_mpi_simple['Processes'] = df_mpi_simple['Processes'].astype(str)
# get df where there is the higest processes number per machines
df_mpi_simple = df_mpi_temp[df_mpi_temp.groupby('Machines')['Processes'].transform("max") == df_mpi_temp['Processes']]
fig_simple = px.line(
	df_mpi_simple,
	x='Processes',
	y='Speedup',
	markers=True,
	title='Speedup vs Number of Processes',
	labels={
		'Processes': 'Number of Processes',
		'Speedup': 'Speedup (Sequential / Parallel)',
	},
  width=600,    # Set the desired width
 
)
fig_simple.update_layout(
	xaxis=dict(type='category'),
	yaxis_title='Speedup (Sequential / Parallel)',
	template='plotly_white',
	title={
		'text': "Speedup per number of processes",
		'y': 0.95,
		'x': 0.5,
		'xanchor': 'auto',
		'yanchor': 'auto'
	},
	margin=dict(t=100, l=50, r=50, b=50),
 legend=dict(
	x=0.2,
	y=1.15,
)
)
# So we need to calculate the ideal speedup for the first value of the speedup
# Get the first value of the speedup
first_speedup = df_mpi_simple['Speedup'].iloc[0]
# # Get the first value of the processes
first_processes = df_mpi_simple['Processes'].iloc[0]
# Calculate the ideal speedup for the first value
ideal_speedup = first_speedup * (first_processes / 64)
# Calculate the ideal speedup for all processes
ideal_speedups = [ideal_speedup * (p / 64) for p in df_mpi_simple['Processes']]
# Add the ideal speedup line
fig_simple.add_trace(
	go.Scatter(
		x=df_mpi_simple['Processes'],
		y=ideal_speedups,
		mode='lines',
		name='Ideal Speedup starting from 64 processes',
		line=dict(dash='dot', color='Red')
	)
)
fig_simple.update_yaxes(range=[0, df_mpi_simple['Speedup'].max() * 1.3])
# Move the legend to the top
display(df_mpi_simple)
fig_simple.show()
fig_simple.write_image(path_to_img_folder.joinpath("mandelbrot_mpi_speedup_vs_processes.png"))

Unnamed: 0,Resolution,Iterations,Processes,Threads,Time (seconds),Speedup,Machines
4,8000,4000,64,4,104.814,8.550814,1
9,8000,4000,128,8,61.2547,14.631449,2
14,8000,4000,256,16,34.7099,25.821019,4


In [60]:

# Plot the biggest number of nodes per machine
fig = px.bar(
	df_mpi_temp,
	x='Processes',
	y='Speedup',
	color='Machines',
	title='Speedup vs Number of Processes',
	labels={
		'Processes': 'Number of Processes',
		'Speedup': 'Speedup (Sequential / Parallel)',
		'Machines': 'Number of Machines'
	},
	text='Speedup',
	barmode='group',
  width=600,    # Set the desired width
)
fig.update_layout(
	xaxis=dict(type='category'),
	yaxis_title='Speedup (Sequential / Parallel)',
#  make the legend horizontal


 legend = dict(
		orientation="h",	
		x=0.2,
		y=1.15,
	),
	legend_title='Number of Machines',
	template='plotly_white',
	title={
		'text': "Speedup per machine configuration",
		'y': 0.95,
		'x': 0.5,
		'xanchor': 'auto',
		'yanchor': 'auto'
	},
	margin=dict(t=100, l=50, r=50, b=50),

)
fig.update_traces(texttemplate='%{text:.5f}')
fig.update_yaxes(range=[0, df_mpi_temp['Speedup'].max() * 1.3])
fig.update_xaxes(tickvals=df_mpi_temp['Processes'].unique())
# drop resolution and iterations and move the machines to the front
df_mpi_temp_1 = df_mpi_temp[['Machines', 'Processes', 'Threads', 'Time (seconds)', 'Speedup']]
display(df_mpi_temp_1)
fig.show()
fig.write_image(path_to_img_folder.joinpath("mandelbrot_mpi_speedup_vs_processes_bar.png"))
df_mpi_temp_1.to_latex(path_to_tables.joinpath("mandelbrot_mpi_speedup_vs_processes.tex"), index=False, float_format="%.3f")

Unnamed: 0,Machines,Processes,Threads,Time (seconds),Speedup
0,1,4,64,84.8702,10.560185
1,1,8,32,99.5405,9.003823
2,1,16,16,104.066,8.612275
3,1,32,8,106.892,8.384584
4,1,64,4,104.814,8.550814
5,2,8,64,99.4507,9.011953
6,2,16,32,104.229,8.598806
7,2,32,16,106.746,8.396052
8,2,64,8,104.571,8.570684
9,2,128,8,61.2547,14.631449


# CUDA

In [54]:
# plot df_cuda
df_cuda_temp = df_cuda[['Resolution', 'Iterations', 'CUDAThreads', 'Time (seconds)']].copy()
def create_latex_cuda_tables():
    pivot_table = df_cuda_temp.pivot_table(index=['Iterations', 'Resolution'],
                             columns='CUDAThreads',
                             values='Time (seconds)',
                             aggfunc='first')  # Use 'first' if there's only one entry per combination

    # Rename the columns to include "Threads"
    pivot_table.columns = [f'{int(col) ** 2 } Threads' for col in pivot_table.columns]

    # Reset index to turn 'Iterations' and 'Resolution' back into columns
    pivot_table = pivot_table.reset_index()

    # Optional: Sort the table for better readability
    pivot_table = pivot_table.sort_values(by=['Iterations', 'Resolution'])

    # Replace NaN with '-' or any placeholder if necessary
    pivot_table = pivot_table.fillna('-')

    # Convert the pivot table to LaTeX
    latex_table = pivot_table.to_latex(index=False, 
                                    caption='CUDA Mandelbrot Program Performance',
                                    label='tab:cuda_performance',
                                    float_format="%.3f",
                                    column_format='lllll',  # Adjust based on number of columns
                                    longtable=False,
                                    multicolumn=True,
                                    multicolumn_format='c')
    path_to_cuda_table = path_to_tables.joinpath("table_cuda_performance.tex")
    with open(path_to_cuda_table, 'w') as f:
        f.write(latex_table)
    print("LaTeX table generated and saved as 'cuda_performance_table.tex'.")
create_latex_cuda_tables()
df_cuda_temp.to_latex(path_to_tables.joinpath("table_cuda_speedup.tex"), index=False)
df_cuda_temp = df_cuda_temp[(df_cuda_temp["Resolution"] == 8000) & (df_cuda_temp["Iterations"] == 4000)]

# Calculate the speedup
# Assuming 'df_seq' contains the sequential execution time for the same resolution and iterations
df_cuda_temp = df_cuda_temp[
    (df_cuda['Resolution'] == 8000) &
    (df_cuda['Iterations'] == 4000)
][['CUDAThreads', 'Time (seconds)']].copy()

# Calculate Total CUDA Threads (assuming a 2D thread block)
df_cuda_temp['TotalCUDAThreads'] = df_cuda_temp['CUDAThreads'] * df_cuda_temp['CUDAThreads']
# Ensure 'TotalCUDAThreads' is numeric
df_cuda_temp['TotalCUDAThreads'] = pd.to_numeric(df_cuda_temp['TotalCUDAThreads'])

# Reset index if necessary
df_cuda_temp.reset_index(drop=True, inplace=True)

# Fetch sequential execution time
filtered_df_seq = df_seq[
    (df_seq['Resolution'] == 8000) &
    (df_seq['Iterations'] == 4000)
]

# Validate that there's exactly one matching row
if filtered_df_seq.shape[0] != 1:
    raise ValueError("Expected exactly one matching row in df_seq for the given Resolution and Iterations.")

sequential_time = filtered_df_seq['Time (seconds)'].iloc[0]

# Calculate Actual Speedup
df_cuda_temp['Speedup'] = sequential_time / df_cuda_temp['Time (seconds)']

# --- Amdahl's Law Calculations ---

# Define serial fractions to plot multiple ideal speedup lines
serial_fractions = [0.05, 0.1, 0.15]  # Example values; adjust based on your application's characteristics

# Extract unique TotalCUDAThreads sorted in ascending order
unique_threads = np.sort(df_cuda_temp['TotalCUDAThreads'].unique())

# Initialize a dictionary to store ideal speedups for different serial fractions
amdahl_speedups = {}

for s in serial_fractions:
    # Calculate speedup using Amdahl's Law at the unique thread counts
    speedup = 1 / (s + (1 - s) / unique_threads)
    amdahl_speedups[s] = speedup

# --- Plotting ---

# Create the initial plot with actual speedup
fig = px.line(
    df_cuda_temp,
    x='TotalCUDAThreads',
    y='Speedup',
    markers=True,
    title='Speedup vs Number of CUDA Threads with Amdahl\'s Law',
    labels={
        'TotalCUDAThreads': 'Number of CUDA Threads',
        'Speedup': 'Speedup (Sequential / Parallel)',
    },
    width=800,
    height=600
)

# Add Amdahl's Law ideal speedup lines at unique thread counts
for s, speedup in amdahl_speedups.items():
    fig.add_trace(
        go.Scatter(
            x=unique_threads,
            y=speedup,
            mode='lines+markers',
            name=f'Amdahl\'s Ideal (s={s})',
            line=dict(dash='dash'),
            marker=dict(symbol='x')  # Different marker symbol for distinction
        )
    )

# Enhance Plot Layout
fig.update_layout(
    xaxis_title='Number of CUDA Threads',
    yaxis_title='Speedup (Sequential / Parallel)',
    title={
        'text': 'Speedup vs Number of CUDA Threads with Amdahl\'s Law',
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    legend=dict(
        x=0.01,
        y=1.02,
        orientation="h",
    ),
    template='plotly_white',
    xaxis=dict(
        type='category',  # Treat x-axis as categorical to show exact thread counts
        categoryorder='array',
        categoryarray=unique_threads
    )
)

# Optionally, adjust x-axis tick labels for better readability
fig.update_xaxes(
    tickmode='array',
    tickvals=unique_threads,
    ticktext=[str(int(t)) for t in unique_threads],
    tickangle=45  # Rotate labels if there are many
)

# Display the plot
fig.show()
fig.write_image(path_to_img_folder.joinpath("mandelbrot_cuda_speedup_vs_threads_amdahl.png"))


LaTeX table generated and saved as 'cuda_performance_table.tex'.



Boolean Series key will be reindexed to match DataFrame index.

