# MPI messaging

This script will analyse `broadcast` and `barrier` messging, each with 3 different algorithms.


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
import plotly.express as px
import plotly.graph_objects as go

path = os.getcwd()

## `broadcast` messaging

Let's first load the data

In [7]:
# Set path to the data
bcast_path = path + '/bcast/results_bcast/'  # Replace with your actual path

# Get the list of files in the directory
files = os.listdir(bcast_path)

# Create a list to store the dataframes
dfs = []

# Read the data from the files; for each file, check if 
# the name contains binary, default or chain and set the
# corresponding value in the dataframe

for file in files:
    if 'binary' in file:
        df = pd.read_csv(os.path.join(bcast_path, file))
        df['algorithm'] = 'Binary Tree'
        dfs.append(df)
    elif 'default' in file:
        df = pd.read_csv(os.path.join(bcast_path, file))
        df['algorithm'] = 'Default'
        dfs.append(df)
    elif 'chain' in file:
        df = pd.read_csv(os.path.join(bcast_path, file))
        df['algorithm'] = 'Chain'
        dfs.append(df)

# Filter out dataframes where the 'Algorithm' column contains '${algorithm}' or is null
valid_dfs = []
for df in dfs:
    if 'Algorithm' in df.columns:
        df['Algorithm'] = df['Algorithm'].astype(str)
        if not df['Algorithm'].str.contains(r'\$\{algorithm\}').any():
            valid_dfs.append(df)

# Split the data into separate dataframes for each algorithm
default_df = [df for df in valid_dfs if df['Algorithm'].iloc[0] == 'default']
binary_tree_df = [df for df in valid_dfs if df['Algorithm'].iloc[0] == 'binary_tree']
chain_df = [df for df in valid_dfs if df['Algorithm'].iloc[0] == 'chain']

# Select an allocation strategy
default_df = [df[df['Allocation']=='core'] for df in default_df]
binary_tree_df = [df[df['Allocation']=='core'] for df in binary_tree_df]
chain_df = [df[df['Allocation']=='core'] for df in chain_df]

message_sizes = default_df[0]['MessageSize'].unique()
allocations = default_df[0]['Allocation'].unique()


# Initialize the figure
fig = go.Figure()
# Draw a vertical line at x = 12 and x = 24
fig.add_shape(type="line", x0=12, y0=0, x1=12, y1=1040, line=dict(color="LightSeaGreen", width=3))
fig.add_shape(type="line", x0=24, y0=0, x1=24, y1=1040, line=dict(color="LightSeaGreen", width=3))
# Start plotting the data
for selected in range(len(default_df)):
	# The two runs
	used_df = default_df[selected]
	for message_size in message_sizes:
		# Only plot for 512KB and 1MB, a.k.a. one fitting entirely in L1(<32 kB per core), one comfortably fitting in L2(1 MB per core) and the max default size (1 MB))
		if message_size in [16384, 524288, 1048576]:
			# Plot the data
			fig.add_trace(go.Scatter(x=used_df[used_df['MessageSize']==message_size]['Processes'], 
									y=used_df[used_df['MessageSize']==message_size]['Latency'],
									mode='lines+markers', name=str(message_size)))
# Update the layout by naming the appropirate tings and highlighting some places of interest in the x-axis
fig.update_layout(title='Latency vs Processes for the broadcast using Default Algorithm', 
					xaxis_title='Processes', 
					yaxis_title='Latency (us)', 
					legend_title='Message Size',
					xaxis=dict(tickvals=[2, 7, 12, 15, 16, 24, 31, 36, 40, 48])
				)
fig.show()

fig=go.Figure()
# Start plotting the data
for selected in range(len(default_df)):
	# The two runs
	used_df = default_df[selected]
	for message_size in message_sizes:
		# Only plot for 512KB and 1MB, a.k.a. one fitting entirely in L1(<32 kB per core), one comfortably fitting in L2(1 MB per core) and the max default size (1 MB))
		if message_size in [16384, 524288, 1048576]:
			# Plot the data
			fig.add_trace(go.Scatter(x=used_df[used_df['MessageSize']==message_size]['Processes'], 
									y=used_df[used_df['MessageSize']==message_size]['Latency'],
									mode='lines+markers', name=str(message_size)))
# Update the layout by naming the appropirate tings and highlighting some places of interest in the x-axis
fig.update_layout(title='Latency vs Processes for the broadcast using Default Algorithm', 
					xaxis_title='Processes', 
					yaxis_title='Latency (us)', 
					legend_title='Message Size',
					xaxis=dict(tickvals=[2, 7, 12, 15, 16, 24, 31, 36, 40, 48]),
                    yaxis=dict(type='log')
)
fig.show()

In [3]:
# Initialize the figure
fig = go.Figure()
# Draw a vertical line at x = 12 and x = 24
fig.add_shape(type="line", x0=12, y0=0, x1=12, y1=1720, line=dict(color="LightSeaGreen", width=3))
fig.add_shape(type="line", x0=24, y0=0, x1=24, y1=1720, line=dict(color="LightSeaGreen", width=3))
# Start plotting the data
for selected in range(len(binary_tree_df)):
	# The two runs
	used_df = binary_tree_df[selected]
	for message_size in message_sizes:
		# Only plot for 512KB and 1MB, a.k.a. one fitting entirely in L1 (and one not fitting
		if message_size in [16384, 524288, 1048576]:
			# Plot the data
			fig.add_trace(go.Scatter(x=used_df[used_df['MessageSize']==message_size]['Processes'], 
									y=used_df[used_df['MessageSize']==message_size]['Latency'],
									mode='lines+markers', name=str(message_size)))
# Update the layout by naming the appropirate tings and highlighting some places of interest in the x-axis
fig.update_layout(title='Latency vs Processes for the Binary Tree using Default Algorithm', 
					xaxis_title='Processes', 
					yaxis_title='Latency (us)', 
					legend_title='Message Size',
					xaxis=dict(tickvals=[2, 12, 20, 24, 29, 36, 40, 47, 48])
				)
fig.show()

In [4]:
# Initialize the figure
fig = go.Figure()
# Draw a vertical line at x = 12 and x = 24
fig.add_shape(type="line", x0=12, y0=0, x1=12, y1=1230, line=dict(color="LightSeaGreen", width=3))
fig.add_shape(type="line", x0=24, y0=0, x1=24, y1=1230, line=dict(color="LightSeaGreen", width=3))
# Start plotting the data
for selected in range(len(chain_df)):
	# The two runs
	used_df = chain_df[selected]
	for message_size in message_sizes:
		# Only plot for 512KB and 1MB, a.k.a. one fitting entirely in L1 and one not fitting
		if message_size in [16384, 524288, 1048576]:
			# Plot the data
			fig.add_trace(go.Scatter(x=used_df[used_df['MessageSize']==message_size]['Processes'], 
									y=used_df[used_df['MessageSize']==message_size]['Latency'],
									mode='lines+markers', name=str(message_size)))
# Update the layout by naming the appropirate tings and highlighting some places of interest in the x-axis
fig.update_layout(title='Latency vs Processes for the Chain using the Default Algorithm', 
					xaxis_title='Processes', 
					yaxis_title='Latency (us)', 
					legend_title='Message Size',
					xaxis=dict(tickvals=[2, 6, 12, 18, 24, 30, 36, 42, 48])
				)
fig.show()

## `barrier` messaging

Lore ipsum etc etc

In [5]:
# Set path to the data
barrier_path = path + '/barrier/results_barrier/'  # Replace with your actual path

# Get the list of files in the directory
files = os.listdir(barrier_path)

# Create a list to store the dataframes
dfs = []

# Read the data from the files; for each file, check if 
# the name contains binary, default or chain and set the
# corresponding value in the dataframe

for file in files:
    if 'tree' in file:
        df = pd.read_csv(os.path.join(barrier_path, file))
        df['algorithm'] = 'Tree'
        dfs.append(df)
    elif 'default' in file:
        df = pd.read_csv(os.path.join(barrier_path, file))
        df['algorithm'] = 'Default'
        dfs.append(df)
    elif 'linear' in file:
        df = pd.read_csv(os.path.join(barrier_path, file))
        df['algorithm'] = 'Linear'
        dfs.append(df)

# Filter out dataframes where the 'Algorithm' column contains '${algorithm}' or is null
valid_dfs = []
for df in dfs:
    if 'Algorithm' in df.columns:
        df['Algorithm'] = df['Algorithm'].astype(str)
        if not df['Algorithm'].str.contains(r'\$\{algorithm\}').any():
            valid_dfs.append(df)

# Split the data into separate dataframes for each algorithm
default_df = [df for df in valid_dfs if df['Algorithm'].iloc[0] == 'Default']
binary_tree_df = [df for df in valid_dfs if df['Algorithm'].iloc[0] == 'binary_tree']
chain_df = [df for df in valid_dfs if df['Algorithm'].iloc[0] == 'chain']

# Select an allocation strategy
default_df = [df[df['Allocation']=='core'] for df in default_df]
binary_tree_df = [df[df['Allocation']=='core'] for df in binary_tree_df]
chain_df = [df[df['Allocation']=='core'] for df in chain_df]

message_sizes = default_df[0]['MessageSize'].unique()
allocations = default_df[0]['Allocation'].unique()


# Initialize the figure
fig = go.Figure()
# Draw a vertical line at x = 12 and x = 24
fig.add_shape(type="line", x0=12, y0=0, x1=12, y1=1040, line=dict(color="LightSeaGreen", width=3))
fig.add_shape(type="line", x0=24, y0=0, x1=24, y1=1040, line=dict(color="LightSeaGreen", width=3))
# Start plotting the data
for selected in range(len(default_df)):
	# The two runs
	used_df = default_df[selected]
	for message_size in message_sizes:
		# Only plot for 512KB and 1MB, a.k.a. one fitting entirely in L1 and one not fitting
		if message_size in [524288, 1048576]:
			# Plot the data
			fig.add_trace(go.Scatter(x=used_df[used_df['MessageSize']==message_size]['Processes'], 
									y=used_df[used_df['MessageSize']==message_size]['Latency'],
									mode='lines+markers', name=str(message_size)))
# Update the layout by naming the appropirate tings and highlighting some places of interest in the x-axis
fig.update_layout(title='Latency vs Processes for the broadcast using Default Algorithm', 
					xaxis_title='Processes', 
					yaxis_title='Latency (us)', 
					legend_title='Message Size',
					xaxis=dict(tickvals=[2, 7, 12, 15, 16, 24, 31, 36, 40, 48])
				)
fig.show()

KeyError: 'MessageSize'