In [12]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import math
from binarytree import build as btree
from sklearn.linear_model import LinearRegression

As explained in the report, hardcode latencies for critical points and establish a function to obtain the CPU latency between Core #0 and Core#p. Such function trivially interpolates the CPU latency assigning the same value found at the critical point until the next one is reached. 

In [13]:
# Latency values for different configurations
latencies = {
    "1": 0.10,  
    "4": 0.20,   
    "8": 0.30,   
    "16": 0.50,   
    "24": 0.80,  
    "48": 2.00   
}

# Function to get latency based on number of processes
def get_latency(p):
    for k in latencies.keys():
        if int(k) >= p:
            return latencies[k]
    return latencies[list(latencies.keys())[-1]]

# Create DataFrame for latency data
df = pd.DataFrame(data={
    "cpu": range(2, 48),
    "avg_latency": [get_latency(i) for i in range(2, 48)]
}).dropna()

Define a function for computing the execution time for the Binary Tree Broadcast algorithm with np communicating processes. As explained in further detail in the report, the idea is to build a balanced binary tree (using the binarytree Python module) with np nodes, representing the np processes involved in the broadcast. Then, the built tree is traversed recursively from the root keeping track of the maximum cumulative latency (accounted twice) between the traversal of each node's children. This is to appropriately model the parallelized propagation from one layer of the tree to the next one.

In [8]:
# Function to derive the time spent for a chain of np processes
def get_bintree_time(np):
  def get_latency(a, b):
    if a // 4 == b // 4:
      return latencies["1"]
    if a // 8 == b // 8:
      return latencies["4"]
    if a // 16 == b // 16:
      return latencies["8"]
    if a // 24 == b // 24:
      return latencies["16"]
    return latencies["24"]
  def traverse(t, total):
    if not t:
      return total
    return max(
      traverse(t.left, total + 2 * get_latency(t.value, t.left.value) if t.left else total),
      traverse(t.right, total + 2 * get_latency(t.value, t.right.value) if t.right else total)
    )
  return traverse(btree(range(0, np)), 0.0)

In [14]:
actual = pd.read_csv("20240719_112032_thin_bcast5_msize.txt")
df["actual_bcast"] = actual
df["estimate_bcast"] = df.apply(lambda e: get_bintree_time(int(e["cpu"])), axis=1)

In [15]:
df

Unnamed: 0,cpu,avg_latency,actual_bcast,estimate_bcast
0,2,0.2,0.48,0.2
1,3,0.2,0.34,0.2
2,4,0.2,0.43,0.4
3,5,0.3,0.5,0.6
4,6,0.3,0.57,0.6
5,7,0.3,0.71,0.6
6,8,0.3,0.92,0.8
7,9,0.5,0.96,1.0
8,10,0.5,0.89,1.2
9,11,0.5,0.98,1.2


In [16]:
fig = px.scatter(df, x="cpu", y="actual_bcast")
fig.add_trace(go.Scatter(x=df["cpu"], y=df["estimate_bcast"], mode="lines", name="est_bcast"))