In [None]:
!pip install llnl-thicket==2024.1.0

In [None]:
#import sys
#sys.path.append("/scratch/group/csce-435-f25/python-3.10.8/lib/python3.10/site-packages")
from glob import glob
from tabulate import tabulate

import matplotlib.pyplot as plt
import pandas as pd

import thicket as th
import numpy as np

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

Read all files

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os

# Defining the path to your caliper files in Google Drive
caliper_folder = "/content/drive/My Drive/csce-435/lab-2-files/cali"

# Liist the files in the caliper folder to verify
files_in_folder = os.listdir(caliper_folder)
print("Files in the Folder", caliper_folder, "::")
print(files_in_folder)
print()

#1_trial is a name of a folder containing the cali files, you may create a folder with a different name and replace the folder name here
tk = th.Thicket.from_caliperreader(glob(os.path.join(caliper_folder, "*.cali")))

View Calltree

In [None]:
print(tk.tree(metric_column="Avg time/rank"))

Group Performance data by `matrix_size` in the Thicket metadata table.

In [None]:

print(tk.metadata.columns)

In [None]:
# Merge metadata columns into the performance dataframe
tk.dataframe = tk.dataframe.merge(tk.metadata, left_index=True, right_index=True)

# Reset index and set new index with 'node', 'num_procs', and 'matrix_size'
tk.dataframe = tk.dataframe.reset_index().set_index(["num_procs", "matrix_size"]).sort_index()

tk.dataframe.head()

In [None]:
# Extracting the columns from the table above to make it look nicer and to extract data from
data = tk.metadata[["opts:output", "num_procs", "matrix_size",
                   "MPI_Reduce-worker_send_time_average",
                   "MPI_Reduce-worker_send_time_min",
                   "MPI_Reduce-worker_send_time_max",
                   "MPI_Reduce-worker_calculation_time_average",
                   "MPI_Reduce-worker_calculation_time_min",
                   "MPI_Reduce-worker_calculation_time_max",
                   "MPI_Reduce-worker_recieve_time_average",
                   "MPI_Reduce-worker_receive_time_min",
                   "MPI_Reduce-worker_receive_time_max",
                   "MPI_Reduce-master_send_receive_time",
                   "MPI_Reduce-master_initialization_time",
                   "MPI_Reduce-whole_computation_time"]]

df = pd.DataFrame(data)
display(df.head())

Now that the data is grouped by `matrix_size` in the performance dataframe, you can visualize the performance trends for different matrix sizes.

In [9]:
# Define common variables
processes = [2, 4, 8, 16, 32, 64]
matrix_sizes = [128, 1024, 8192]

In [None]:
# Plotting the data for Whole Computation Time vs Number of Processes for Different Matrix Sizes
plt.figure(figsize=(10, 6))

for size in matrix_sizes:
    subset = df[df['matrix_size'] == size].sort_values(by='num_procs')
    plt.plot(subset['num_procs'], subset['MPI_Reduce-whole_computation_time'], marker='o', label=f'Matrix Size {size}')

plt.xlabel('Number of Processes')
plt.ylabel('Whole Computation Time (s)')
plt.title('Whole Computation Time vs Number of Processes for Different Matrix Sizes')
plt.xscale('log', base=2)
plt.xticks(processes, [str(p) for p in processes])
plt.yscale('log')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# Plotting the data for Whole Computation Time vs Number of Processes for Different Matrix Sizes
plt.figure(figsize=(10, 6))

for size in matrix_sizes:
    subset = df[df['matrix_size'] == size].sort_values(by='num_procs')
    plt.plot(subset['num_procs'], subset['MPI_Reduce-master_initialization_time'], marker='o', label=f'Matrix Size {size}')

plt.xlabel('Number of Processes')
plt.ylabel('Whole Computation Time (s)')
plt.title('Master Initialization Time vs Number of Processes for Different Matrix Sizes')
plt.xscale('log', base=2)
plt.xticks(processes, [str(p) for p in processes])
plt.yscale('log')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# Plotting the data for Whole Computation Time vs Number of Processes for Different Matrix Sizes
plt.figure(figsize=(10, 6))

for size in matrix_sizes:
    subset = df[df['matrix_size'] == size].sort_values(by='num_procs')
    plt.plot(subset['num_procs'], subset['MPI_Reduce-master_send_receive_time'], marker='o', label=f'Matrix Size {size}')

plt.xlabel('Number of Processes')
plt.ylabel('Whole Computation Time (s)')
plt.title('Master Send/Receive Time vs Number of Processes for Different Matrix Sizes')
plt.xscale('log', base=2)
plt.xticks(processes, [str(p) for p in processes])
plt.yscale('log')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# Plotting the data for Whole Computation Time vs Number of Processes for Different Matrix Sizes
plt.figure(figsize=(10, 6))

for size in matrix_sizes:
    subset = df[df['matrix_size'] == size].sort_values(by='num_procs')
    plt.plot(subset['num_procs'], subset['MPI_Reduce-worker_send_time_average'], marker='o', label=f'Matrix Size {size}')

plt.xlabel('Number of Processes')
plt.ylabel('Whole Computation Time (s)')
plt.title('Average Worker Send Time vs Number of Processes for Different Matrix Sizes')
plt.xscale('log', base=2)
plt.xticks(processes, [str(p) for p in processes])
plt.yscale('log')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# Plotting the data for Whole Computation Time vs Number of Processes for Different Matrix Sizes
plt.figure(figsize=(10, 6))

for size in matrix_sizes:
    subset = df[df['matrix_size'] == size].sort_values(by='num_procs')
    plt.plot(subset['num_procs'], subset['MPI_Reduce-worker_send_time_min'], marker='o', label=f'Matrix Size {size}')

plt.xlabel('Number of Processes')
plt.ylabel('Whole Computation Time (s)')
plt.title('Min Worker Send Time vs Number of Processes for Different Matrix Sizes')
plt.xscale('log', base=2)
plt.xticks(processes, [str(p) for p in processes])
plt.yscale('log')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# Plotting the data for Whole Computation Time vs Number of Processes for Different Matrix Sizes
plt.figure(figsize=(10, 6))

for size in matrix_sizes:
    subset = df[df['matrix_size'] == size].sort_values(by='num_procs')
    plt.plot(subset['num_procs'], subset['MPI_Reduce-worker_send_time_max'], marker='o', label=f'Matrix Size {size}')

plt.xlabel('Number of Processes')
plt.ylabel('Whole Computation Time (s)')
plt.title('Max Worker Send Time vs Number of Processes for Different Matrix Sizes')
plt.xscale('log', base=2)
plt.xticks(processes, [str(p) for p in processes])
plt.yscale('log')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# Plotting the data for Whole Computation Time vs Number of Processes for Different Matrix Sizes
plt.figure(figsize=(10, 6))

for size in matrix_sizes:
    subset = df[df['matrix_size'] == size].sort_values(by='num_procs')
    plt.plot(subset['num_procs'], subset['MPI_Reduce-worker_calculation_time_average'], marker='o', label=f'Matrix Size {size}')

plt.xlabel('Number of Processes')
plt.ylabel('Whole Computation Time (s)')
plt.title('Average Worker Calculation Time vs Number of Processes for Different Matrix Sizes')
plt.xscale('log', base=2)
plt.xticks(processes, [str(p) for p in processes])
plt.yscale('log')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# Plotting the data for Whole Computation Time vs Number of Processes for Different Matrix Sizes
plt.figure(figsize=(10, 6))

for size in matrix_sizes:
    subset = df[df['matrix_size'] == size].sort_values(by='num_procs')
    plt.plot(subset['num_procs'], subset['MPI_Reduce-worker_calculation_time_min'], marker='o', label=f'Matrix Size {size}')

plt.xlabel('Number of Processes')
plt.ylabel('Whole Computation Time (s)')
plt.title('Min Worker Calculation Time vs Number of Processes for Different Matrix Sizes')
plt.xscale('log', base=2)
plt.xticks(processes, [str(p) for p in processes])
plt.yscale('log')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# Plotting the data for Whole Computation Time vs Number of Processes for Different Matrix Sizes
plt.figure(figsize=(10, 6))

for size in matrix_sizes:
    subset = df[df['matrix_size'] == size].sort_values(by='num_procs')
    plt.plot(subset['num_procs'], subset['MPI_Reduce-worker_calculation_time_max'], marker='o', label=f'Matrix Size {size}')

plt.xlabel('Number of Processes')
plt.ylabel('Whole Computation Time (s)')
plt.title('Max Worker Calculation Time vs Number of Processes for Different Matrix Sizes')
plt.xscale('log', base=2)
plt.xticks(processes, [str(p) for p in processes])
plt.yscale('log')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# Plotting the data for Whole Computation Time vs Number of Processes for Different Matrix Sizes
plt.figure(figsize=(10, 6))

for size in matrix_sizes:
    subset = df[df['matrix_size'] == size].sort_values(by='num_procs')
    plt.plot(subset['num_procs'], subset['MPI_Reduce-worker_recieve_time_average'], marker='o', label=f'Matrix Size {size}')

plt.xlabel('Number of Processes')
plt.ylabel('Whole Computation Time (s)')
plt.title('Average Worker Receive Time vs Number of Processes for Different Matrix Sizes')
plt.xscale('log', base=2)
plt.xticks(processes, [str(p) for p in processes])
plt.yscale('log')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# Plotting the data for Whole Computation Time vs Number of Processes for Different Matrix Sizes
plt.figure(figsize=(10, 6))

for size in matrix_sizes:
    subset = df[df['matrix_size'] == size].sort_values(by='num_procs')
    plt.plot(subset['num_procs'], subset['MPI_Reduce-worker_receive_time_min'], marker='o', label=f'Matrix Size {size}')

plt.xlabel('Number of Processes')
plt.ylabel('Whole Computation Time (s)')
plt.title('Min Worker Receive Time vs Number of Processes for Different Matrix Sizes')
plt.xscale('log', base=2)
plt.xticks(processes, [str(p) for p in processes])
plt.yscale('log')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# Plotting the data for Whole Computation Time vs Number of Processes for Different Matrix Sizes
plt.figure(figsize=(10, 6))

for size in matrix_sizes:
    subset = df[df['matrix_size'] == size].sort_values(by='num_procs')
    plt.plot(subset['num_procs'], subset['MPI_Reduce-worker_receive_time_max'], marker='o', label=f'Matrix Size {size}')

plt.xlabel('Number of Processes')
plt.ylabel('Whole Computation Time (s)')
plt.title('Max Worker Receive Time vs Number of Processes for Different Matrix Sizes')
plt.xscale('log', base=2)
plt.xticks(processes, [str(p) for p in processes])
plt.yscale('log')
plt.grid(True)
plt.legend()
plt.show()