# MPI messaging

This script will analyse `broadcast` and `barrier` messging, each with 3 different algorithms.


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
import plotly.express as px
import plotly.graph_objects as go

path = os.getcwd()

In [2]:
# Data gathered using the latency test within 1 socket
latency_by_size = pd.DataFrame(columns=['Start_CPU', 'End_CPU', 'MessageSize', 'Latency'], 
								data= [
										[0, 2, 1, 		0.19], 
										[0, 2, 2, 		0.19], 
										[0, 2, 4, 		0.19], 
										[0, 2, 8, 		0.19],
										[0, 2, 16, 		0.19],
										[0, 2, 32, 		0.24],
										[0, 2, 64, 		0.24],
										[0, 2, 128, 	0.33],
										[0, 2, 256, 	0.37],
										[0, 2, 512, 	0.44],
										[0, 2, 1024, 	0.54],
										[0, 2, 2048, 	0.75],
										[0, 2, 4096, 	1.21],
										[0, 2, 8192, 	2.04],
										[0, 2, 16384, 	3.15],
										[0, 2, 32768, 	4.97],
										[0, 2, 65536, 	8.40],
										[0, 2, 131072, 	14.95],
										[0, 2, 262144, 	16.22],
										[0, 2, 524288, 	30.01],
										[0, 2, 1048576, 61.65],
										[0, 2, 2097152, 155.70],
										[0, 2, 4194304, 356.84]
									]
								)

In [3]:
def plot_latency_vs_processes(df, title, scale):
	n_p = df[0]['Processes'].unique()
	# Initialize the figure
	fig = go.Figure()

	# Start plotting the data
	for selected in range(len(df)):
		# The two runs
		used_df = df[selected]
		for message_size in selected_sizes:
			# Only plot for 512KB and 1MB, a.k.a. one fitting entirely in L1(<32 kB per core), one comfortably fitting in L2(1 MB per core) and the max default size (1 MB))
			if message_size in selected_sizes:
				# Plot the data
				fig.add_trace(go.Scatter(x=used_df[used_df['MessageSize']==message_size]['Processes'], 
										y=used_df[used_df['MessageSize']==message_size]['Latency'],
										mode='lines+markers', name=str(message_size)))
	
				alpha = latency_by_size[latency_by_size['MessageSize']==message_size]['Latency'].values[0]

				if message_size <= 32384:
					fig.add_trace(go.Scatter(x=used_df['Processes'],
											y = alpha + beta * message_size * ( used_df['Processes'] - 2 ),
											mode='lines',
											name=str(message_size)+'Theoretical Latency for size ',
											line=dict(color='red', dash='dash')))
				else:
					fig.add_trace(go.Scatter(x=used_df['Processes'],
											y = alpha + beta2 * message_size * ( used_df['Processes'] - 2 ),
											mode='lines',
											name=str(message_size)+'Theoretical Latency for size ',
											line=dict(color='red', dash='dash')))
  

	# Update the layout by naming the appropirate tings and highlighting some places of interest in the x-axis
	fig.update_layout(title=title, 
						xaxis_title='Processes', 
						yaxis_title='Latency (us)', 
						legend_title='Message Size',
						xaxis=dict(tickvals=tickvals),
						yaxis=dict(type=scale)
					)
	fig.show()

## `broadcast` messaging

Let's first load the data

In [4]:
# Set path to the data
bcast_path = path + '/bcast/results_bcast/'  # Replace with your actual path

# Get the list of files in the directory
files = os.listdir(bcast_path)

# Create a list to store the dataframes
dfs = []

# Read the data from the files; for each file, check if 
# the name contains binary, default or chain and set the
# corresponding value in the dataframe

for file in files:
    if 'binary' in file:
        df = pd.read_csv(os.path.join(bcast_path, file))
        df['algorithm'] = 'Binary Tree'
        dfs.append(df)
    elif 'default' in file:
        df = pd.read_csv(os.path.join(bcast_path, file))
        df['algorithm'] = 'Default'
        dfs.append(df)
    elif 'chain' in file:
        df = pd.read_csv(os.path.join(bcast_path, file))
        df['algorithm'] = 'Chain'
        dfs.append(df)

# Filter out dataframes where the 'Algorithm' column contains '${algorithm}' or is null
valid_dfs = []
for df in dfs:
    if 'Algorithm' in df.columns:
        df['Algorithm'] = df['Algorithm'].astype(str)
        if not df['Algorithm'].str.contains(r'\$\{algorithm\}').any():
            valid_dfs.append(df)

# Split the data into separate dataframes for each algorithm
default_df = [df for df in valid_dfs if df['Algorithm'].iloc[0] == 'default']
binary_tree_df = [df for df in valid_dfs if df['Algorithm'].iloc[0] == 'binary_tree']
chain_df = [df for df in valid_dfs if df['Algorithm'].iloc[0] == 'chain']

# Select an allocation strategy
default_df = [df[df['Allocation']=='core'] for df in default_df]
binary_tree_df = [df[df['Allocation']=='core'] for df in binary_tree_df]
chain_df = [df[df['Allocation']=='core'] for df in chain_df]

message_sizes = default_df[0]['MessageSize'].unique()
allocations = default_df[0]['Allocation'].unique()
tickvals = [2, 6, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48]
selected_sizes = [8192, 16384, 524288, 1048576]


beta  = 10**6 * (1 / ((3.7 * 10**9 )*4))
beta2 = 10**6 * (1 / ((3.7 * 10**9 )*4))
 
 
plot_latency_vs_processes(default_df, 'Latency vs Processes for the broadcast using Default Algorithm', 'linear')
plot_latency_vs_processes(default_df, 'Latency vs Processes for the broadcast using Default Algorithm', 'log')

beta  = 10**6 * (1 / ((3.7 * 10**9 )*4))
beta2 = 10**6 * (1 / ((3.7 * 10**9 )*4))

plot_latency_vs_processes(binary_tree_df, 'Latency vs Processes for the bcast using binary tree Algorithm', 'linear')
plot_latency_vs_processes(binary_tree_df, 'Latency vs Processes for the bcast using binary tree Algorithm', 'log')

beta  = 10**6 * (1 / ((3.7 * 10**9 )*4))
beta2 = 10**6 * (1 / ((3.7 * 10**9 )*4))

plot_latency_vs_processes(binary_tree_df, 'Latency vs Processes for the bcast using the Chain Algorithm', 'linear')
plot_latency_vs_processes(binary_tree_df, 'Latency vs Processes for the bcast using the Chain Algorithm', 'log')

### Chain algorithm

Latency Model:  
$T = \alpha + \beta n $  
Where:

* $\alpha$ is the latency (startup time) per message.
* $\beta$ is the time per byte.
* $n$ is the number of bytes in the message.

Thus, I expect the total time dependendency on CPUs ($np$) to be:  
$T = \alpha + np(\alpha + \beta n )$  

## `barrier` messaging

Lore ipsum etc etc

In [5]:
# Set path to the data
barrier_path = path + '/barrier/results_barrier/'  # Replace with your actual path

# Get the list of files in the directory
files = os.listdir(barrier_path)

# Create a list to store the dataframes
dfs = []
default_df = []
tree_df = []
linear_df = []

# Read the data from the files; for each file, check if 
# the name contains binary, default or chain and set the
# corresponding value in the dataframe

for file in files:
    if 'tree' in file:
        df = pd.read_csv(os.path.join(barrier_path, file))
        df['algorithm'] = 'Tree'
        dfs.append(df)
    elif 'default' in file:
        df = pd.read_csv(os.path.join(barrier_path, file))
        df['algorithm'] = 'Default'
        dfs.append(df)
    elif 'linear' in file:
        df = pd.read_csv(os.path.join(barrier_path, file))
        df['algorithm'] = 'Linear'
        dfs.append(df)

# Filter out dataframes where the 'Algorithm' column contains '${algorithm}' or is null
valid_dfs = []
for df in dfs:
    if 'Algorithm' in df.columns:
        df['Algorithm'] = df['Algorithm'].astype(str)
        if not df['Algorithm'].str.contains(r'\$\{algorithm\}').any():
            valid_dfs.append(df)

# Split the data into separate dataframes for each algorithm
default_df = [df for df in valid_dfs if df['Algorithm'].iloc[0] == 'default']
tree_df = [df for df in valid_dfs if df['Algorithm'].iloc[0] == 'tree']
linear_df = [df for df in valid_dfs if df['Algorithm'].iloc[0] == 'linear']

# Select an allocation strategy
default_df = [df[df['Allocation']=='core'] for df in default_df]
tree_df = [df[df['Allocation']=='core'] for df in tree_df]
linear_df = [df[df['Allocation']=='core'] for df in linear_df]

allocations = default_df[0]['Allocation'].unique()


# Initialize the figure
fig = go.Figure()

# Start plotting the data
for selected in range(len(default_df)):
	# The two runs
	used_df = default_df[selected]
	# Only plot for 512KB and 1MB, a.k.a. one fitting entirely in L1 and one not fitting
	# Plot the data
	fig.add_trace(go.Scatter(x=used_df['Processes'], 
							y=used_df['Latency'],
							mode='lines+markers',
							name="Run "+str(selected)))
# Update the layout by naming the appropirate tings and highlighting some places of interest in the x-axis
fig.update_layout(title='Latency vs Processes for the barrier using the Default Algorithm', 
					xaxis_title='Processes', 
					yaxis_title='Latency (us)', 
					xaxis=dict(tickvals=tickvals)
				)

fig.show()

In [6]:
# Initialize the figure
fig = go.Figure()

tree_df = [tree_df[i] for i in range(len(tree_df)) if i !=5]

# Start plotting the data
for selected in range(len(tree_df)):
	# The two runs
	used_df = tree_df[selected]
	# Only plot for 512KB and 1MB, a.k.a. one fitting entirely in L1 and one not fitting
	# Plot the data
	fig.add_trace(go.Scatter(x=used_df['Processes'], 
							y=used_df['Latency'],
							mode='lines+markers',
							name="Run "+str(selected)))
# Update the layout by naming the appropirate tings and highlighting some places of interest in the x-axis
fig.update_layout(title='Latency vs Processes for the barrier using the the tree Algorithm', 
					xaxis_title='Processes', 
					yaxis_title='Latency (us)', 
					xaxis=dict(tickvals=tickvals)
				)

fig.show()



In [7]:
# Initialize the figure
fig = go.Figure()

# Start plotting the data
for selected in range(len(linear_df)):
	# The two runs
	used_df = linear_df[selected]
	# Only plot for 512KB and 1MB, a.k.a. one fitting entirely in L1 and one not fitting
	# Plot the data
	fig.add_trace(go.Scatter(x=used_df['Processes'], 
							y=used_df['Latency'],
							mode='lines+markers',
							name="Run "+str(selected)))
# Update the layout by naming the appropirate tings and highlighting some places of interest in the x-axis
fig.update_layout(title='Latency vs Processes for the Barrier using the the Linear Algorithm', 
					xaxis_title='Processes', 
					yaxis_title='Latency (us)', 
					xaxis=dict(tickvals=tickvals)
				)

fig.show()

