In [27]:
#import pandas as pd
import numpy as np 
import plotly.express as px
import plotly.graph_objects as go
import chart_studio
import chart_studio.plotly as py
import os

import experiment_helpers

#debug
from importlib import reload
from experiment_helpers import *
reload(experiment_helpers)
from experiment_helpers import *

In [2]:
### update per hardware setting
## updates plot titles when uploading charts to chart-studio.
PLACE="CLOUDLAB:"

In [3]:
def add_annotation(text):
    anno = "<h3><pre>%s</pre></h3>" % text
    annotations.append(anno)

In [4]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

### set up remote plotting

In [11]:
if os.getlogin() == 'narekg':
    # Please do not override my chartly plots if you are not me
    chart_studio.tools.set_credentials_file(username='ngalstyan', api_key='dApbes8sgyjaWkPJL8cA')

from datetime import date
UPLOAD_PLOTS = False
def plot(fig):
    fig.layout.title.text = PLACE + fig.layout.title.text
    if UPLOAD_PLOTS:
        f = "%s:%s" % (date.today(), fig.layout.title.text)
        f = f[:50]
        url = py.plot(fig, auto_open=False)
        print(url)
        fig.layout.title.text += url

    return fig.show()

# Experiment plots

## Define experiments

In [23]:
EXPERIMENT_TYPES = [
#     "no_prefetching",
    "linux_prefetching_asyncwrites",
    #"linux_prefetching_ssdopt",
    #"linux_prefetching_ssdopt_asyncwrites",
    #"linux_prefetching_ssdopt_asyncwrites",

#     "tape_prefetching_syncwrites",
    "tape_prefetching_asyncwrites",
   # "tape_prefetching_asyncwrites_linux",

    #"tape_prefetching_asyncwrites_offload_fetch",
]
annotations=[]

In [24]:
WORKLOADS = ["mmult_eigen", "mmult_eigen_vec", "mmult_eigen_dot"
             # "vec_eigen",
             # "dot_eigen", "mmap_random_rw","kissfft",  "kmeans", "torch",
             # "bitonic_merge", "native_sort",
             # "bitonic_sort", "torch", "torch_par","torch_par4","linpack", "mmult_eigen_par"
             # "alexnet", "vgg16", "batcher_sort"
            ]

## Load data(single pandas table of all experiments per workload)

In [35]:
data = {}
for workload in WORKLOADS:
    data[workload] = augment_tables(get_experiment_data(EXPERIMENT_TYPES, 1, workload,"../experiment_results_nproc_4/"), filter_raw=True)

## Explore & Plot

In [36]:
## TODO:::take_column_named:: VERY VERY HACKY.. assumes all tables have data appropriately sorted
## sanity check later with more data, if these plots become crucial
runtime = take_column_named("Measured(wallclock) runtime",data).dropna()
fig = px.line(runtime, title='Absolute(Wallclock) Runtime vs. Local Memory', 
              animation_frame="Experiment Name")
fig.update_layout(
    xaxis_title="Ratio(%)",
    yaxis_title="Measured(wallclock) runtime(s)",
)
#plot(fig)

degradations = take_column_named("Degradation(%)",data).dropna()
fig = px.line(degradations, title='Normalized Runtime(Wallclock) vs. Local Memory',
                animation_frame="Experiment Name")

fig.update_layout(
    xaxis_title="Ratio(%)",
    yaxis_title="Degradation(%)",
)
plot(fig)

degradations_no_ev = take_column_named("Degradation w/o Evictions(%)",data).dropna()
fig = px.line(degradations_no_ev, title='Normalized Runtime vs. Local Memory (SUBTRACT time spent in evictions)',
             animation_frame="Experiment Name")
fig.update_layout(
    xaxis_title="Ratio(%)",
    yaxis_title="Degradation w/o Evictions(%)",
)
#plot(fig)

    
for exp in WORKLOADS:
    fig = px.line(degradations.pivot(columns="Experiment Name", values="(%s)Degradation(%%)" % exp), title='Perfrormance comparison under different prefetching strategies - %s' % exp)
    fig.update_layout(
        xaxis_title="Ratio(%)",
        yaxis_title="Degradation(%)",
    )
    plot(fig)

In [None]:
runtime_components = []

for workload in WORKLOADS:
    fig = get_components_of_runtime(data[workload], "%s" % workload)
    runtime_components.append(fig)
    fig.update_layout(yaxis_range=[0,15])
    plot(fig)

In [None]:
nic_data = get_nic_monitor_data(EXPERIMENT_TYPES, "mmult_eigen", "../experiment_results")

In [None]:
for exp in EXPERIMENT_TYPES:
    fig = px.line(nic_data.loc[nic_data["Experiment Name"] == exp].sort_values(["RATIO", "Time(s)"]), 
            x='Time(s)', y=['Xmit(MB)','Recv(MB)'],
                  animation_frame="RATIO",
                  title= "NIC traffic: mmult_eigen, %s" % exp
                )
    
    fig.update_layout(
        xaxis_title="Seconds after App start",
        yaxis_title="Data (MB)",
    )
    #plot(fig)
    fig.show()

Multi-Switch Analysis
==================

In [None]:
hops = (0, 1, 2, 3, 4)
data_by_hop = {}
for hop in hops:
    data_by_hop[hop] = {}
    for workload in WORKLOADS:
        data_by_hop[hop][workload] = augment_tables(get_experiment_data(EXPERIMENT_TYPES, workload,"../experiment_results_{0}".format(hop)), filter_raw=True)

In [None]:
SLICE_RATIO = 30

frames_by_hop = []
for hop in data_by_hop:
    data = data_by_hop[hop]
    degradations = take_column_named("Degradation(%)",data).dropna()
#     degradations = take_column_named("Measured(wallclock) runtime",data).dropna()
    degradation_for_hop = degradations.loc[SLICE_RATIO]
    degradation_for_hop["Hop Count"] = hop
    degradation_for_hop = degradation_for_hop.set_index("Hop Count")
    frames_by_hop.append(degradation_for_hop)
    
hop_frame = pd.concat(frames_by_hop)
hop_frame

In [None]:
fig = px.line(hop_frame, title='Normalized Runtime(Wallclock) vs. Local Memory', 
              animation_frame="Experiment Name")
fig.update_layout(
    xaxis_title="Hop Count",
    yaxis_title="Normalized Runtime(Wallclock)",
    yaxis_range=[0, 800],
)

In [None]:
runtime_components = []

def revised_get_components_of_runtime(table, name="unnamed"):
    sub_tbl = table[["Baseline User Time",
                     "Extra User Time",
                     "Eviction Time",
                     "Baseline minor PF Time",
                     "Extra Minor PF Time",
                     "Major PF Time",
                                     ]] / 1e6
    sub_tbl["Experiment Name"] = table["Experiment Name"]
    fig = px.area(sub_tbl, title='Components of runtime(%s)'%name,
                  color_discrete_sequence=['#ab63fa', '#3c0c73', '#636efa', '#ef553b',  '#9e1700','#00cc96'],
                  animation_frame="Experiment Name")
    fig.update_layout(
        xaxis_title="Hop Count",
        yaxis_title="Time(seconds)",
    )
 #   fig.add_trace(px.line(table["Measured(wallclock) runtime"]).data[0])
  #  fig.add_trace(px.line(table["sys+usr"] / 1e6).data[0])

    def anno(text, posx = 1.1, posy=0.32):
        dy = -0.04
        if anno.counter > 0:
            posx += 0.15
        fig.add_annotation(text=text,
              xref="paper", yref="paper",
              x=posx, y=posy + dy * anno.counter, showarrow=False)
        anno.counter+= 1
    anno.counter = 0

    #anno("Workload constants:")
    #anno("Baseline System Time(s): %.2f" % (table["Baseline System Time"].values[0]/1e6))
    #anno("Baseline App Time(s): %.2f" % (table["Baseline App Time(us)"].values[0] / 1e6))
    #anno("Baseline Minor PF Time(us): %.2f" % table["Baseline Single Minor PF Time(us)"].values[0])

    return fig

for workload in WORKLOADS:
    hop_data_frames = []
    for hop in data_by_hop:
        data_for_hop = data_by_hop[hop][workload]
        data_for_hop = data_for_hop.loc[SLICE_RATIO]
        data_for_hop["Hop Count"] = hop
        data_for_hop = data_for_hop.set_index("Hop Count")
        hop_data_frames.append(data_for_hop)
    hop_data = pd.concat(hop_data_frames)
    fig = revised_get_components_of_runtime(hop_data, "%s" % workload)
    runtime_components.append(fig)
    fig.update_layout(yaxis_range=[0,20])
    plot(fig)