# MPI messaging

This script will analyse `broadcast` and `barrier` messging, each with 3 different algorithms.


In [313]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
import plotly.express as px
import plotly.graph_objects as go

path = os.getcwd()

In [314]:
# Data gathered using the latency test within 1 socket
latency_by_size = pd.DataFrame(columns=['Start_CPU', 'End_CPU', 'MessageSize', 'Latency'], 
								data= [
										[0, 2, 1, 		0.19], 
										[0, 2, 2, 		0.19], 
										[0, 2, 4, 		0.19], 
										[0, 2, 8, 		0.19],
										[0, 2, 16, 		0.19],
										[0, 2, 32, 		0.24],
										[0, 2, 64, 		0.24],
										[0, 2, 128, 	0.33],
										[0, 2, 256, 	0.37],
										[0, 2, 512, 	0.44],
										[0, 2, 1024, 	0.54],
										[0, 2, 2048, 	0.75],
										[0, 2, 4096, 	1.21],
										[0, 2, 8192, 	2.04],
										[0, 2, 16384, 	3.15],
										[0, 2, 32768, 	4.97],
										[0, 2, 65536, 	8.40],
										[0, 2, 131072, 	14.95],
										[0, 2, 262144, 	16.22],
										[0, 2, 524288, 	30.01],
										[0, 2, 1048576, 61.65],
										[0, 2, 2097152, 155.70],
										[0, 2, 4194304, 356.84]
									]
								)

In [315]:

def plot_latency_vs_processes(df, algorithm, scale):
	'''
	Plots the latency vs the number of processes for the broadcast operation
	'''

	# Initialize the figure
	fig = go.Figure()

	# Start plotting the data
	for selected in range(len(df)):
		# Select the data
		used_df = df[selected]
		# Select the message sizes
		for message_size in selected_sizes:
			used_size_df = used_df[used_df['MessageSize']==message_size]
			# Plot the data
			fig.add_trace(go.Scatter(x=used_size_df['Processes'], 
									y=used_size_df['Latency'],
									mode='lines+markers', name=str(message_size)))

			# Set the parameter alpha as the one experimentally measured for in-socekt communication
			alpha = latency_by_size[latency_by_size['MessageSize']==message_size]['Latency'].values[0]

			if algorithm == 'binary tree':
				# Set the beta parameter for the theoretical model 
				# This will need to change for the default algorithm, currently working with the chain algorithm
				beta  = 10**6 * (1 / ((3.7 * 10**9 )*15))
				beta2 = 10**6 * (1 / ((3.7 * 10**9 )*8))

				out = np.ceil(np.log2(((used_size_df['Processes'][0:12]))))*(alpha/2 ) + beta * message_size * (used_size_df['Processes'][0:12])
				out2 = np.ceil(np.log2(((used_size_df['Processes'][13:]))))*(alpha/2 ) + beta2 * message_size * (used_size_df['Processes'][13:]) - (np.ceil(np.log2(((used_size_df['Processes'][13]))))*(alpha/2 ) + beta2 * message_size * used_size_df['Processes'][13])
				out = pd.concat([out, out2], ignore_index=True)

				# Differentiate for sizes smaller than 32KB and larger than 32KB, which is the size of the L1 cache
				if message_size <= 32768:
					fig.add_trace(go.Scatter(x=used_size_df['Processes'],

											# The actual model
											# y = alpha/2 + beta * message_size * ( used_df['Processes'] - 1 ),
											y = out,

											mode='lines',
											name=str(message_size)+'Theoretical Latency for size ',
											line=dict(color='black', dash='dash')))

				else:
					fig.add_trace(go.Scatter(x=used_size_df['Processes'],

											# The actual model
											y = alpha/2 + beta * message_size * ( used_size_df['Processes'] - 1 ),


											mode='lines',
											name=str(message_size)+'Theoretical Latency for size ',
											line=dict(color='red', dash='dash')))

			elif algorithm == 'chain':
				# CPU frequency is 3.7 GHz,
				# Unit is in microseconds, so a 10**6 is needed
				# 5 is asspullery ( expected 64 bytes per cycle, but that's the best case scenario; probably lower)
				beta  = 10**6 * (1 / ((3.7 * 10**9 )*5))
				beta2 = 10**6 * (1 / ((3.7 * 10**9 )*5))

				# Differentiate for sizes smaller than 32KB and larger than 32KB, which is the size of the L1 cache
				if message_size <= 32768:
					fig.add_trace(go.Scatter(x=used_size_df['Processes'],

											# The actual model: the pure latency for the size, halved because it's
											# a broadcast operation, then the number of pocesses -1 because it's the
											# number of messages that need to be sent
											y = alpha/2 + beta * message_size * ( used_size_df['Processes'] - 1 ),


											mode='lines',
											name=str(message_size)+'Theoretical Latency for size ',
											line=dict(color='black', dash='dash')))
				else:
					fig.add_trace(go.Scatter(x=used_size_df['Processes'],

											# The actual model: the pure latency for the size, halved because it's
											# a broadcast operation, then the number of pocesses -1 because it's the
											# number of messages that need to be sent
											y = alpha/2 + beta * message_size * ( used_size_df['Processes'] - 1 ),


											mode='lines',
											name=str(message_size)+'Theoretical Latency for size ',
											line=dict(color='red', dash='dash')))

			elif algorithm == 'linear':
				# CPU frequency is 3.7 GHz,
				# Unit is in microseconds, so a 10**6 is needed
				# 5 is asspullery ( expected 64 bytes per cycle, but that's the best case scenario; probably lower)
				beta  = 10**6 * (1 / ((3.7 * 10**9 )*4))
				beta2 = 10**6 * (1 / ((3.7 * 10**9 )*2))
				beta3 = 10**6 * (1 / ((3.7 * 10**9 )*2))
				beta4 = 10**6 * (1 / ((3.7 * 10**9 )*2))

				out = alpha/2 + beta * message_size * used_size_df['Processes'][0:12].apply(lambda p: p-1 )
				out2 = alpha/2 + out.iloc[11] -beta2 * message_size * used_size_df['Processes'].apply(lambda p: p-1 ).iloc[13] + beta2 * message_size * used_size_df['Processes'][13:25].apply(lambda p: p-1 )
				out3 = alpha/2 + out2.iloc[11] -beta3 * message_size * used_size_df['Processes'].apply(lambda p: p-1 ).iloc[25] + beta3 * message_size * used_size_df['Processes'][25:37].apply(lambda p: p-1 )	
				out4 = alpha/2 + out3.iloc[11] -beta4 * message_size * used_size_df['Processes'].apply(lambda p: p-1 ).iloc[37] + beta4 * message_size * used_size_df['Processes'][37:].apply(lambda p: p-1 )
				out = pd.concat([out, out2, out3,out4], ignore_index=True)


				# Differentiate for sizes smaller than 32KB and larger than 32KB, which is the size of the L1 cache
				if message_size <= 32768:
					fig.add_trace(go.Scatter(x=used_size_df['Processes'],

											# The actual model: the pure latency for the size, halved because it's
											# a broadcast operation, then the number of pocesses -1 because it's the
											# number of messages that need to be sent
											y = out,
											mode='lines',
											name=str(message_size)+'Theoretical Latency for size ',
											line=dict(color='black', dash='dash')))
				else:
					fig.add_trace(go.Scatter(x=used_size_df['Processes'],

											# The actual model: the pure latency for the size, halved because it's
											# a broadcast operation, then the number of pocesses -1 because it's the
											# number of messages that need to be sent
											y = alpha/2 + beta * message_size * ( used_size_df['Processes'] - 1 ),
											mode='lines',
											name=str(message_size)+'Theoretical Latency for size ',
											line=dict(color='red', dash='dash')))
  

	# Set the layout of the figure, title, axis labels, etc.
	# Select the scale type of the y axis
	fig.update_layout(title='Latency vs Processes for the broadcast using '+algorithm+' Algorithm', 
						xaxis_title='Processes', 
						yaxis_title='Latency (us)', 
						legend_title='Message Size',
						xaxis=dict(tickvals=tickvals),
						yaxis=dict(type=scale)
					)
	fig.show()



## `broadcast` messaging

Let's first load the data

In [316]:
# Set path to the data
bcast_path = path + '/bcast/results_bcast/'  # Replace with your actual path

# Get the list of files in the directory
files = os.listdir(bcast_path)

# Create a list to store the dataframes
dfs = []

# Read the data from the files; for each file, check if 
# the name contains binary, default or chain and set the
# corresponding value in the dataframe

for file in files:
	if 'binary' in file:
		df = pd.read_csv(os.path.join(bcast_path, file))
		df['algorithm'] = 'Binary Tree'
		dfs.append(df)
	elif 'default' in file:
		df = pd.read_csv(os.path.join(bcast_path, file))
		df['algorithm'] = 'Default'
		dfs.append(df)
	elif 'chain' in file:
		df = pd.read_csv(os.path.join(bcast_path, file))
		df['algorithm'] = 'Chain'
		dfs.append(df)
	elif 'linear' in file:
		df = pd.read_csv(os.path.join(bcast_path, file))
		df['algorithm'] = 'Linear'
		dfs.append(df)

# Filter out dataframes where the 'Algorithm' column contains '${algorithm}' or is null
valid_dfs = []
for df in dfs:
	if 'Algorithm' in df.columns:
		df['Algorithm'] = df['Algorithm'].astype(str)
		if not df['Algorithm'].str.contains(r'\$\{algorithm\}').any():
			valid_dfs.append(df)

# Split the data into separate dataframes for each algorithm
default_df = [df for df in valid_dfs if df['Algorithm'].iloc[0] == 'default']
binary_tree_df = [df for df in valid_dfs if df['Algorithm'].iloc[0] == 'binary_tree']
chain_df = [df for df in valid_dfs if df['Algorithm'].iloc[0] == 'chain']
linear_df = [df for df in valid_dfs if df['Algorithm'].iloc[0] == 'linear']

# Select an allocation strategy
default_df = [df[df['Allocation']=='core'] for df in default_df]
binary_tree_df = [df[df['Allocation']=='core'] for df in binary_tree_df]
chain_df = [df[df['Allocation']=='core'] for df in chain_df]
linear_df = [df[df['Allocation']=='core'] for df in linear_df]

# Select the message sizes and other plot parameters
message_sizes = default_df[0]['MessageSize'].unique()
allocations = default_df[0]['Allocation'].unique()
tickvals = [2, 4, 6, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48]
selected_sizes = [8192] #16384, 524288, 1048576]

linear_df[0]

Unnamed: 0,Algorithm,Allocation,Processes,MessageSize,Latency,algorithm
0,linear,core,2,1,0.19,Linear
1,linear,core,2,2,0.20,Linear
2,linear,core,2,4,0.19,Linear
3,linear,core,2,8,0.19,Linear
4,linear,core,2,16,0.19,Linear
...,...,...,...,...,...,...
982,linear,core,48,65536,105.05,Linear
983,linear,core,48,131072,245.90,Linear
984,linear,core,48,262144,491.28,Linear
985,linear,core,48,524288,990.19,Linear


## Default algorithm

Let's plot the default algorithm

In [317]:
plot_latency_vs_processes(default_df, 'Default', 'linear')
plot_latency_vs_processes(default_df, 'Default', 'log')

## Binary tree algorithm



In [318]:
plot_latency_vs_processes(binary_tree_df, 'binary tree', 'linear')
plot_latency_vs_processes(binary_tree_df, 'binary tree','log')

### Chain algorithm

Latency Model:  
$T = \alpha + \beta n $  
Where:

* $\alpha$ is the latency (startup time) per message.
* $\beta$ is the time per byte.
* $n$ is the number of bytes in the message.

Thus, I expect the total time dependendency on CPUs ($np$) to be:  
$T = \alpha + np(\alpha + \beta n )$  

In [319]:
plot_latency_vs_processes(chain_df, 'chain', 'linear')
plot_latency_vs_processes(chain_df, 'chain','log')

In [320]:
plot_latency_vs_processes(linear_df, 'linear', 'linear')
plot_latency_vs_processes(linear_df, 'linear','log')

In [321]:
chain_summarized_df = pd.concat(chain_df).copy()
binary_tree_summarized_df = pd.concat(binary_tree_df).copy()
default_summarized_df = pd.concat(default_df).copy()
liner_size_summarized_df = pd.concat(linear_df).copy()


chain_summarized_df.drop(columns=['Algorithm', 'Allocation', 'algorithm'], inplace=True)
binary_tree_summarized_df.drop(columns=['Algorithm', 'Allocation', 'algorithm'], inplace=True)
default_summarized_df.drop(columns=['Algorithm', 'Allocation', 'algorithm'], inplace=True)
liner_size_summarized_df.drop(columns=['Algorithm', 'Allocation', 'algorithm'], inplace=True)

summary_default = default_summarized_df.groupby(['Processes', 'MessageSize']).mean().reset_index()
summary_chain = chain_summarized_df.groupby(['Processes', 'MessageSize']).mean().reset_index()
summary_binary_tree = binary_tree_summarized_df.groupby(['Processes', 'MessageSize']).mean().reset_index()
summary_linear = liner_size_summarized_df.groupby(['Processes', 'MessageSize']).mean().reset_index()

fig = go.Figure()

for message_size in selected_sizes:
	summary_default = summary_default[summary_default['MessageSize']==message_size]
	# Plot the data
	fig.add_trace(go.Scatter(x=summary_default['Processes'], 
				y=summary_default['Latency'],
				mode='lines+markers', name='Default '+str(message_size)))

for message_size in selected_sizes:
	summary_binary_tree = summary_binary_tree[summary_binary_tree['MessageSize']==message_size]
	# Plot the data
	fig.add_trace(go.Scatter(x=summary_binary_tree['Processes'], 
				y=summary_binary_tree['Latency'],
				mode='lines+markers', name='Binary Tree '+str(message_size)))

for message_size in selected_sizes:
	summary_chain = summary_chain[summary_chain['MessageSize']==message_size]
	# Plot the data
	fig.add_trace(go.Scatter(x=summary_chain['Processes'], 
				y=summary_chain['Latency'],
				mode='lines+markers', name='Chain '+str(message_size)))

for message_size in selected_sizes:
	summary_linear = summary_linear[summary_linear['MessageSize']==message_size]
	# Plot the data
	fig.add_trace(go.Scatter(x=summary_linear['Processes'], 
				y=summary_linear['Latency'],
				mode='lines+markers', name='Linear '+str(message_size)))

# Set the layout of the figure, title, axis labels, etc.
# Select the scale type of the y axis
fig.update_layout(title='COmparison of the average latency vs Processes for the broadcast using different Algorithms', 
					xaxis_title='Processes', 
					yaxis_title='Latency (us)', 
					legend_title='Message Size',
					xaxis=dict(tickvals=tickvals),
					yaxis=dict(type='linear')
)

fig.show()


## `barrier` messaging

Lore ipsum etc etc

In [322]:
# Set path to the data
barrier_path = path + '/barrier/results_barrier/'  # Replace with your actual path

# Get the list of files in the directory
files = os.listdir(barrier_path)

# Create a list to store the dataframes
dfs = []
default_df = []
tree_df = []
linear_df = []

# Read the data from the files; for each file, check if 
# the name contains binary, default or chain and set the
# corresponding value in the dataframe

for file in files:
    if 'tree' in file:
        df = pd.read_csv(os.path.join(barrier_path, file))
        df['algorithm'] = 'Tree'
        dfs.append(df)
    elif 'default' in file:
        df = pd.read_csv(os.path.join(barrier_path, file))
        df['algorithm'] = 'Default'
        dfs.append(df)
    elif 'linear' in file:
        df = pd.read_csv(os.path.join(barrier_path, file))
        df['algorithm'] = 'Linear'
        dfs.append(df)

# Filter out dataframes where the 'Algorithm' column contains '${algorithm}' or is null
valid_dfs = []
for df in dfs:
    if 'Algorithm' in df.columns:
        df['Algorithm'] = df['Algorithm'].astype(str)
        if not df['Algorithm'].str.contains(r'\$\{algorithm\}').any():
            valid_dfs.append(df)

# Split the data into separate dataframes for each algorithm
default_df = [df for df in valid_dfs if df['Algorithm'].iloc[0] == 'default']
tree_df = [df for df in valid_dfs if df['Algorithm'].iloc[0] == 'tree']
linear_df = [df for df in valid_dfs if df['Algorithm'].iloc[0] == 'linear']


for i in range(len(default_df)):
	default_df[i].drop(columns=['Algorithm', 'algorithm'], inplace=True)

for i in range(len(tree_df)):
	tree_df[i].drop(columns=['Algorithm', 'algorithm'], inplace=True)

for i in range(len(linear_df)):
	linear_df[i].drop(columns=['Algorithm', 'algorithm'], inplace=True)

allocations = default_df[0]['Allocation'].unique()


# Initialize the figure
fig = go.Figure()

# Start plotting the data
for selected in range(len(default_df)):
	used_df = default_df[selected]
	for allocation in allocations:
		used_allocation_df = used_df[used_df['Allocation']==allocation]
		fig.add_trace(go.Scatter(x=used_allocation_df['Processes'], 
								y=used_allocation_df['Latency'],
								mode='lines+markers',
								name=allocation))
# Update the layout by naming the appropirate tings and highlighting some places of interest in the x-axis
fig.update_layout(title='Latency vs Processes for the barrier using the Default Algorithm', 
					xaxis_title='Processes', 
					yaxis_title='Latency (us)', 
					xaxis=dict(tickvals=tickvals)
				)

fig.show()

default_cumulative = pd.concat(default_df, ignore_index=True)
default_cumulative_mean = default_cumulative.groupby(['Processes', 'Allocation'] ,as_index=False).mean()
default_cumulative_std = default_cumulative.groupby(['Processes', 'Allocation'] ,as_index=False).std()
default_cumulative_summarized = default_cumulative_mean.merge(default_cumulative_std, on=['Processes', 'Allocation'], suffixes=('_mean', '_std'))

# Initialize the figure
fig = go.Figure()

# Start plotting the data

used_df = default_cumulative_summarized
for allocation in allocations:
	used_allocation_df = used_df[used_df['Allocation']==allocation]
	fig.add_trace(go.Scatter(x=used_allocation_df['Processes'], 
							y=used_allocation_df['Latency_mean'],
							mode='lines+markers',
							name=allocation))
# Update the layout by naming the appropirate tings and highlighting some places of interest in the x-axis
fig.update_layout(title='Latency vs Processes for the barrier using the Default Algorithm', 
					xaxis_title='Processes', 
					yaxis_title='Latency (us)', 
					xaxis=dict(tickvals=tickvals)
				)

fig.show()


In [323]:
# Initialize the figure
fig = go.Figure()

cleaned_tree_df = []
cleaned_tree_df = [tree_df[i] for i in range(len(tree_df)) if i not in [5, 6]]

cleaned_tree_df = [df.sort_values(['Allocation', 'Processes'], ascending=True) for df in cleaned_tree_df]

# Start plotting the data
for selected in range(len(cleaned_tree_df)):
	# The two runs
	used_df = cleaned_tree_df[selected]
	for allocation in allocations:
		used_allocation_df = used_df[used_df['Allocation']==allocation]
		# Only plot for 512KB and 1MB, a.k.a. one fitting entirely in L1 and one not fitting
		# Plot the data
		fig.add_trace(go.Scatter(x=used_allocation_df['Processes'], 
								y=used_allocation_df['Latency'],
								mode='lines+markers',
								name=allocation+str(selected)))
# Update the layout by naming the appropirate tings and highlighting some places of interest in the x-axis
fig.update_layout(title='Latency vs Processes for the barrier using the the tree Algorithm', 
					xaxis_title='Processes', 
					yaxis_title='Latency (us)', 
					xaxis=dict(tickvals=tickvals)
				)

fig.show()

tree_cumulative = pd.concat(cleaned_tree_df, ignore_index=True)
tree_cumulative_mean = tree_cumulative.groupby(['Processes', 'Allocation'] ,as_index=False).mean()
tree_cumulative_std = tree_cumulative.groupby(['Processes', 'Allocation'] ,as_index=False).std()
tree_cumulative_summarized = tree_cumulative_mean.merge(tree_cumulative_std, on=['Processes', 'Allocation'], suffixes=('_mean', '_std'))

# Initialize the figure
fig = go.Figure()

# Start plotting the data

used_df = tree_cumulative_summarized
for allocation in allocations:
	used_allocation_df = used_df[used_df['Allocation']==allocation]
	fig.add_trace(go.Scatter(x=used_allocation_df['Processes'], 
							y=used_allocation_df['Latency_mean'],
							mode='lines+markers',
							name=allocation))
# Update the layout by naming the appropirate tings and highlighting some places of interest in the x-axis
fig.update_layout(title='Latency vs Processes for the barrier using the Default Algorithm', 
					xaxis_title='Processes', 
					yaxis_title='Latency (us)', 
					xaxis=dict(tickvals=tickvals)
				)

fig.show()

In [324]:
# Initialize the figure
fig = go.Figure()

# Start plotting the data
for selected in range(len(linear_df)):
	# The two runs
	used_df = linear_df[selected]
	for allocation in allocations:
		used_allocation_df = used_df[used_df['Allocation']==allocation]
		# Only plot for 512KB and 1MB, a.k.a. one fitting entirely in L1 and one not fitting
		# Plot the data
		fig.add_trace(go.Scatter(x=used_allocation_df['Processes'], 
								y=used_allocation_df['Latency'],
								mode='lines+markers',
								name=allocation+str(selected)))
# Update the layout by naming the appropirate tings and highlighting some places of interest in the x-axis
fig.update_layout(title='Latency vs Processes for the Barrier using the the Linear Algorithm', 
					xaxis_title='Processes', 
					yaxis_title='Latency (us)', 
					xaxis=dict(tickvals=tickvals)
				)

fig.show()

summarized_linear_df = pd.concat(linear_df, ignore_index=True)
summarized_linear_df_mean = summarized_linear_df.groupby(['Processes', 'Allocation'] ,as_index=False).mean()
summarized_linear_df_std = summarized_linear_df.groupby(['Processes', 'Allocation'] ,as_index=False).std()
summarized_linear_df_summarized = summarized_linear_df_mean.merge(summarized_linear_df_std, on=['Processes', 'Allocation'], suffixes=('_mean', '_std'))

# Initialize the figure
fig = go.Figure()

# Start plotting the data
used_df = summarized_linear_df_summarized
for allocation in allocations:
	used_allocation_df = used_df[used_df['Allocation']==allocation]
	fig.add_trace(go.Scatter(x=used_allocation_df['Processes'],
							y=used_allocation_df['Latency_mean'],
							mode='lines+markers',
							name=allocation))
# Update the layout by naming the appropirate tings and highlighting some places of interest in the x-axis
fig.update_layout(title='Latency vs Processes for the Barrier using the Linear Algorithm', 
					xaxis_title='Processes', 
					yaxis_title='Latency (us)', 
					xaxis=dict(tickvals=tickvals)
				)

fig.show()
