# Broadcast: Binary Tree

In [1]:
import pandas as pd

# Initialize empty lists to store data
def txt_to_pd(input_txt): 
    run = []
    process = []
    size = []
    avg_latency = []

    # Open the text file
    with open(input_txt, 'r') as file:
        lines = file.readlines()

        # Initialize variables to hold current run and process numbers
        current_run = None
        current_process = None

        # Iterate through each line in the file
        for line in lines:
            if line.startswith("Run number:"):
                current_run = int(line.split(":")[1].strip())
                
            elif line.startswith("Processes:"):
                current_process = int(line.split(":")[1].strip())
                
            
            elif line[0].isdigit():
                run.append(current_run)
                process.append(current_process)
                parts = line.split()
                size.append(int(parts[0]))
                avg_latency.append(float(parts[1]))

    data = {
    'Run': run,
    'Process': process,
    'Size': size,
    'Avg Latency(us)': avg_latency}

    data = pd.DataFrame(data)

    return data 

df = txt_to_pd('btree_bcast.txt')
b_cast_df = df.drop(['Run', 'Size'], axis=1)

In [14]:
# group the entries by the number of processes
processes = [i for i in range(2, 257)]

def summary_stats(df, processes):
    avg = []
    proc = []
    for process in processes: 
        subdf = df[(df['Process'] == process)]
        avg.append(subdf['Avg Latency(us)'].mean())
        proc.append(process)

    d = {'processes': proc,
    'avg time': avg}

    data = pd.DataFrame(d)

    return data

binary_df = summary_stats(b_cast_df, processes)
binary_df = binary_df.drop(binary_df.index[-1])


In [15]:
# CPUs latencies 

ccx = 0.14 # 3
ccd = 0.31 # 7 
numa_ccd = 0.34 # 15 
socket = 0.44 # 63
node = 0.66 # 127
d_node = 1.82 # 255

ccx_l = [ccx] * 3
ccd_l = [ccd] * 4 
numa_ccd_l = [numa_ccd] * 8
socket_l = [socket] * 48 
node_l = [node] * 64 
d_node_l = [d_node] * 127

latency_list = ccx_l + ccd_l + numa_ccd_l + socket_l + node_l + d_node_l

binary_df['cpu latency'] = latency_list

binary_df['effective time'] = binary_df['avg time'] - binary_df['cpu latency']
binary_df

Unnamed: 0,processes,avg time,cpu latency,effective time
0,2,0.155,0.14,0.015
1,3,0.275,0.14,0.135
2,4,0.175,0.14,0.035
3,5,0.315,0.31,0.005
4,6,0.375,0.31,0.065
...,...,...,...,...
249,251,16.540,1.82,14.720
250,252,16.930,1.82,15.110
251,253,17.390,1.82,15.570
252,254,17.570,1.82,15.750


In [16]:
import plotly.express as px
import plotly.graph_objects as go

# Plot the execution time 

fig = px.line(binary_df, x='processes', y='avg time', title="Broadcast: Binary Tree")
fig

In [5]:
# we can see a change in slope of the line a the point processes=150
cutpoint = 150 
segment1 = binary_df[binary_df['processes'] < 150]
segment2 = binary_df[binary_df['processes'] >= 150]

# Assumption: given tha the 
overhead1 = ((segment1['effective time'].iloc[-1] - segment1['effective time'].iloc[0]) / 
                  (segment1['processes'].iloc[-1] - segment1['processes'].iloc[0]))

overhead2 = ((segment2['effective time'].iloc[-1] - segment2['effective time'].iloc[0]) / 
                  (segment2['processes'].iloc[-1] - segment2['processes'].iloc[0]))

print("Slope of segment 1:", overhead1)
print("Slope of segment 2:", overhead2)

binary_df.loc[(binary_df['processes'] > 2) & (binary_df['processes'] < 150), 'overhead'] = 1 + overhead1
binary_df.loc[binary_df['processes']  >= 150, 'overhead'] = 1 + overhead2
binary_df.loc[binary_df['processes'] == 2, 'overhead'] = 0 
binary_df

Slope of segment 1: 0.011768707482993197
Slope of segment 2: 0.13552380952380955


Unnamed: 0,processes,avg time,cpu latency,effective time,overhead
0,2,0.155,0.14,0.015,0.000000
1,3,0.275,0.14,0.135,1.011769
2,4,0.175,0.14,0.035,1.011769
3,5,0.315,0.31,0.005,1.011769
4,6,0.375,0.31,0.065,1.011769
...,...,...,...,...,...
249,251,16.540,1.82,14.720,1.135524
250,252,16.930,1.82,15.110,1.135524
251,253,17.390,1.82,15.570,1.135524
252,254,17.570,1.82,15.750,1.135524


In [6]:
import math
# determine the tree height

binary_df['height'] = (binary_df['processes'] + 1).apply(lambda x: math.floor(math.log2(x)))
binary_df['communication time'] = binary_df.apply(lambda row: (row['effective time']/ row['overhead']) / math.floor(math.log2(row['processes']-1)), axis=1)
binary_df = binary_df.drop(binary_df.index[0])
binary_df


divide by zero encountered in scalar divide



Unnamed: 0,processes,avg time,cpu latency,effective time,overhead,height,communication time
1,3,0.275,0.14,0.135,1.011769,2,0.133430
2,4,0.175,0.14,0.035,1.011769,2,0.034593
3,5,0.315,0.31,0.005,1.011769,2,0.002471
4,6,0.375,0.31,0.065,1.011769,2,0.032122
5,7,0.500,0.31,0.190,1.011769,3,0.093895
...,...,...,...,...,...,...,...
249,251,16.540,1.82,14.720,1.135524,7,1.851883
250,252,16.930,1.82,15.110,1.135524,7,1.900948
251,253,17.390,1.82,15.570,1.135524,7,1.958819
252,254,17.570,1.82,15.750,1.135524,7,1.981464


In [7]:

fig = px.bar(binary_df, x='processes', y='communication time', title='pt2pt communication time')
fig.show()

## Model and Estimation

# LogP

In [8]:
binary_df['LogP'] = binary_df.apply(lambda row: (math.ceil(math.log2(row['processes'] + 1) -1)) * (row['cpu latency'] * row['overhead']), axis=1)
binary_df

Unnamed: 0,processes,avg time,cpu latency,effective time,overhead,height,communication time,LogP
1,3,0.275,0.14,0.135,1.011769,2,0.133430,0.141648
2,4,0.175,0.14,0.035,1.011769,2,0.034593,0.283295
3,5,0.315,0.31,0.005,1.011769,2,0.002471,0.627297
4,6,0.375,0.31,0.065,1.011769,2,0.032122,0.627297
5,7,0.500,0.31,0.190,1.011769,3,0.093895,0.627297
...,...,...,...,...,...,...,...,...
249,251,16.540,1.82,14.720,1.135524,7,1.851883,14.466573
250,252,16.930,1.82,15.110,1.135524,7,1.900948,14.466573
251,253,17.390,1.82,15.570,1.135524,7,1.958819,14.466573
252,254,17.570,1.82,15.750,1.135524,7,1.981464,14.466573


In [9]:
fig = px.line(binary_df, x='processes', y='avg time', title="Broadcast: Binary")
t = go.Line(x=binary_df["processes"], y=binary_df['LogP'], name="logP")
fig.add_trace(t)


fig.add_vline(x=64, line_dash="dash", line_color="red", annotation_text="Process 64")
fig.add_vline(x=128, line_dash="dash", line_color="blue", annotation_text="Process 128")
fig


plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.


