# Broadcast: Linear

In [1]:
import pandas as pd

In [2]:
# Initialize empty lists to store data
def txt_to_pd(input_txt): 
    run = []
    process = []
    size = []
    avg_latency = []

    # Open the text file
    with open(input_txt, 'r') as file:
        lines = file.readlines()

        # Initialize variables to hold current run and process numbers
        current_run = None
        current_process = None

        # Iterate through each line in the file
        for line in lines:
            if line.startswith("Run number:"):
                current_run = int(line.split(":")[1].strip())
                
            elif line.startswith("Processes:"):
                current_process = int(line.split(":")[1].strip())
                
            
            elif line[0].isdigit():
                run.append(current_run)
                process.append(current_process)
                parts = line.split()
                size.append(int(parts[0]))
                avg_latency.append(float(parts[1]))

    data = {
    'Run': run,
    'Process': process,
    'Size': size,
    'Avg Latency(us)': avg_latency}

    data = pd.DataFrame(data)

    return data 

In [3]:
input_txt = 'linear_bcast.txt'

btree_bcast = txt_to_pd(input_txt)
b_cast_df = btree_bcast.drop(['Run', 'Size'], axis=1)
b_cast_df

Unnamed: 0,Process,Avg Latency(us)
0,2,0.15
1,3,0.25
2,4,0.13
3,5,0.25
4,6,0.28
...,...,...
493,252,38.39
494,253,38.89
495,254,39.13
496,255,37.96


In [4]:
processes = [i for i in range(2, 257)]

def summary_stats(df, processes):
    avg = []
    proc = []
    for process in processes: 
        subdf = df[(df['Process'] == process)]
        avg.append(subdf['Avg Latency(us)'].mean())
        proc.append(process)

    d = {'processes': proc,
    'avg time': avg}

    data = pd.DataFrame(d)

    return data

linear_bc = summary_stats(b_cast_df, processes)
linear_bc


Unnamed: 0,processes,avg time
0,2,0.155
1,3,0.255
2,4,0.130
3,5,0.250
4,6,0.280
...,...,...
250,252,38.915
251,253,38.910
252,254,39.140
253,255,39.250


In [5]:
# CPUs latencies
ccx = 0.14 # 3
ccd = 0.31 # 7 
numa_ccd = 0.34 # 15 
socket = 0.44 # 63
node = 0.66 # 127
d_node = 1.82 # 255

ccx_l = [ccx] * 3
ccd_l = [ccd] * 4 
numa_ccd_l = [numa_ccd] * 8
socket_l = [socket] * 48 
node_l = [node] * 64 
d_node_l = [d_node] * 128

latency_list = ccx_l + ccd_l + numa_ccd_l + socket_l + node_l + d_node_l

linear_bc['cpu latency'] = latency_list
linear_bc['effective time'] = ((linear_bc['avg time'] - (2 * linear_bc['cpu latency']))) / linear_bc['processes']
mask = linear_bc['effective time'] > 0
linear_bc = linear_bc[mask]

pt2pt = 0.073477

### Send Receive

In [6]:
linear_bc['new estimate'] = (linear_bc['cpu latency']) + pt2pt + ((linear_bc['processes'] - 1) * pt2pt * (linear_bc['cpu latency']))
linear_bc

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  linear_bc['new estimate'] = (linear_bc['cpu latency']) + pt2pt + ((linear_bc['processes'] - 1) * pt2pt * (linear_bc['cpu latency']))


Unnamed: 0,processes,avg time,cpu latency,effective time,new estimate
6,8,0.930,0.31,0.038750,0.542922
7,9,1.035,0.34,0.039444,0.613334
8,10,1.015,0.34,0.033500,0.638317
9,11,1.130,0.34,0.040909,0.663299
10,12,1.110,0.34,0.035833,0.688281
...,...,...,...,...,...
250,252,38.915,1.82,0.139980,35.459240
251,253,38.910,1.82,0.139407,35.592968
252,254,39.140,1.82,0.139764,35.726696
253,255,39.250,1.82,0.139647,35.860425


### Model and estimation

In [7]:
import plotly.graph_objects as go
import plotly.express as px

fig = px.scatter(linear_bc, x='processes', y='avg time', title="Broadcast: Linear")

t = go.Line(x=linear_bc["processes"], y=linear_bc['new estimate'], name="estimate")
fig.add_trace(t)

fig.add_vline(x=64, line_dash="dash", line_color="red", annotation_text="Process 64")
fig.add_vline(x=128, line_dash="dash", line_color="blue", annotation_text="Process 128")
fig


plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.


