## Initialize

In [34]:
from importlib import reload
from pathlib import Path

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec

import plotutils as pu
reload(pu)

%matplotlib widget

In [64]:
def read(name):
    jobs = pd.read_csv(Path('../log') / name / 'jobs.csv')
    jobs['Latency'] = jobs.Finished - jobs.Admitted
    return jobs


def show(jobs, figsize=(13, 8)):
    fig = plt.figure(figsize=figsize, constrained_layout=True)
    axs = fig.subplot_mosaic('''BAC
                                BAC
                                BDC
                                BDC
                                BEC
                                ''')
    axs['B'].sharex(axs['C'])
    axs['B'].sharey(axs['C'])
    axs['A'].sharex(axs['D'])
    axs['A'].sharey(axs['D'])

    # length CDF
    ax = pu.cdf(jobs.Length, ax=axs['A'])
    ax.set_ylabel('CDF')
    ax.set_xlabel('Length')
    
    # release timeline
    ax = pu.job_timeline(jobs.JobId, jobs.Admitted, jobs.Admitted + jobs.Length, ax = axs['B'])
    ax.set_title('Release Timeline')
    ax.set_ylabel('Job')
    ax.set_xlabel('Time')
    
    # execution
    ax = pu.job_timeline(jobs.JobId, jobs.Admitted, jobs.Deadline, ax=axs['C'], numeric_workers=True)
    done = jobs[jobs.State == 'done']
    ax = pu.job_timeline(done.JobId, done.Started, done.Finished, ax=axs['C'], numeric_workers=True)
    ax.set_title('Execution Timeline')
    ax.set_ylabel('Job')
    ax.set_xlabel('Time') 
    
    # latency CDF
    # for the sake of this CDF, set the latency of requets past due to Nan
    tmp = jobs.copy()
    tmp.loc[tmp.State != 'done', 'Latency'] = np.nan
    ax = pu.cdf(tmp.Latency, ax=axs['D'])
    ax.set_ylabel('CDF')
    ax.set_xlabel('Latency')
    
    # percentage
    ax = axs['E']
    total = len(jobs)
    states = ['past_due', 'done']
    pos = np.arange(len(states))
    data = [
        len(jobs.State[jobs.State == state]) / total
        for state in states
    ]
    ax.barh(pos, data, align='center')
    for x, y in zip(data, pos):
        pu.bar_show_data(x, y, data_y=x, fmt='{:.0%}', ax=ax, xytext=(4, 0), horizontalalignment='left', verticalalignment='center')
    pu.cleanup_axis_categorical(ax.yaxis, states)
    pu.cleanup_axis_percent(ax.xaxis, xmax=1.0)
    ax.set_xlabel('Percentage')
    
    # utilization
    #ax = axs['U']
    
    # fixups
    axs['A'].set_ylim([0, 1.05])
    return fig, axs

## Simulator results

### Rand

In [3]:
rand_jobs = read('rand')
rand_jobs

Unnamed: 0,JobId,Length,Admitted,Deadline,Started,Finished,State,Latency
0,1,32.195560,13.824637,213.824637,13.824637,49.282643,done,35.458006
1,0,35.458006,13.824637,213.824637,13.824637,49.282643,done,35.458006
2,2,29.943253,20.003531,220.003531,49.282643,89.581673,done,69.578142
3,5,16.161537,38.613228,238.613228,49.282643,89.581673,done,50.968445
4,4,40.299029,38.613228,238.613228,49.282643,89.581673,done,50.968445
...,...,...,...,...,...,...,...,...
65,65,29.500122,906.803296,1106.803296,911.389312,966.328626,done,59.525330
66,69,11.488560,964.890443,1164.890443,,,past_due,
67,67,28.204770,918.576290,1118.576290,966.328626,994.533397,done,75.957107
68,66,12.547054,918.576290,1118.576290,966.328626,994.533397,done,75.957107


In [4]:
show(rand_jobs)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(<Figure size 1300x800 with 5 Axes>,
 {'A': <AxesSubplot:label='A', xlabel='Length', ylabel='CDF'>,
  'B': <AxesSubplot:label='B', title={'center':'Release Timeline'}, xlabel='Time', ylabel='Job'>,
  'C': <AxesSubplot:label='C', title={'center':'Execution Timeline'}, xlabel='Time', ylabel='Job'>,
  'E': <AxesSubplot:label='E', xlabel='Percentage'>,
  'D': <AxesSubplot:label='D', xlabel='Latency', ylabel='CDF'>})

### Fifo

In [5]:
fifo = read('fifo')
fifo

Unnamed: 0,JobId,Length,Admitted,Deadline,Started,Finished,State,Latency
0,0,35.458006,13.824637,213.824637,13.824637,49.282643,done,35.458006
1,1,32.195560,13.824637,213.824637,13.824637,49.282643,done,35.458006
2,2,29.943253,20.003531,220.003531,49.282643,89.581673,done,69.578142
3,3,13.610144,20.003531,220.003531,49.282643,89.581673,done,69.578142
4,4,40.299029,38.613228,238.613228,49.282643,89.581673,done,50.968445
...,...,...,...,...,...,...,...,...
65,65,29.500122,906.803296,1106.803296,906.803296,961.742610,done,54.939314
66,68,10.504058,964.890443,1164.890443,,,past_due,
67,69,11.488560,964.890443,1164.890443,,,past_due,
68,66,12.547054,918.576290,1118.576290,961.742610,989.947380,done,71.371091


In [6]:
show(fifo)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(<Figure size 1300x800 with 5 Axes>,
 {'A': <AxesSubplot:label='A', xlabel='Length', ylabel='CDF'>,
  'B': <AxesSubplot:label='B', title={'center':'Release Timeline'}, xlabel='Time', ylabel='Job'>,
  'C': <AxesSubplot:label='C', title={'center':'Execution Timeline'}, xlabel='Time', ylabel='Job'>,
  'E': <AxesSubplot:label='E', xlabel='Percentage'>,
  'D': <AxesSubplot:label='D', xlabel='Latency', ylabel='CDF'>})

### My

In [7]:
my = read('my')
my

Unnamed: 0,JobId,Length,Admitted,Deadline,Started,Finished,State,Latency
0,0,35.458006,13.824637,213.824637,13.824637,49.282643,done,35.458006
1,1,32.195560,13.824637,213.824637,13.824637,49.282643,done,35.458006
2,2,29.943253,20.003531,220.003531,49.282643,89.581673,done,69.578142
3,3,13.610144,20.003531,220.003531,49.282643,89.581673,done,69.578142
4,4,40.299029,38.613228,238.613228,49.282643,89.581673,done,50.968445
...,...,...,...,...,...,...,...,...
65,65,29.500122,906.803296,1106.803296,906.803296,961.742610,done,54.939314
66,68,10.504058,964.890443,1164.890443,,,past_due,
67,69,11.488560,964.890443,1164.890443,,,past_due,
68,66,12.547054,918.576290,1118.576290,961.742610,989.947380,done,71.371091


In [8]:
my['LastStartPoint'] = my.Deadline - 60.3
my['WouldStart'] = my.Started
my['WouldFinish'] = my.Finished

earliest_available = my.Deadline.max()
finished = my.Finished.unique()
finished.sort()
for f in reversed(finished):
    if pd.isna(f):
        continue
    selected = my[my.Finished == f].index
    print(f'Working on {selected} finished at {f}')
    latency = f - my.loc[selected].Started.min()
    # can't start later than this
    start_latest = earliest_available - latency
    # also can't start later than this
    last_start_point = my.iloc[selected].LastStartPoint.min()
    print(f'Last start point {last_start_point} {start_latest}')
    start_latest = min(start_latest, last_start_point)
    
    print(f'Earliest must start {start_latest}')
    earliest_available = start_latest
    
    my.loc[selected, 'Started'] = start_latest
    my.loc[selected, 'Finished'] = start_latest + latency
    my.loc[selected, 'Latency'] = my.loc[selected, 'Finished'] - my.loc[selected, 'Admitted']

Working on Int64Index([68, 69], dtype='int64') finished at 989.9473803284308
Last start point 1058.2762896070521 1136.6856726650067
Earliest must start 1058.2762896070521
Working on Int64Index([64, 65], dtype='int64') finished at 961.7426098826428
Last start point 1046.503295793211 1003.3369755176204
Earliest must start 1003.3369755176204
Working on Int64Index([61, 62, 63], dtype='int64') finished at 904.8685169893877
Last start point 978.8834311176513 965.3362529966121
Earliest must start 965.3362529966121
Working on Int64Index([52, 53], dtype='int64') finished at 866.8677944683793
Last start point 958.9245192898461 931.1362828881564
Earliest must start 931.1362828881564
Working on Int64Index([50, 51], dtype='int64') finished at 832.6678243599237
Last start point 947.0472291074989 905.8156876357316
Earliest must start 905.8156876357316
Working on Int64Index([48, 49], dtype='int64') finished at 803.2031777070198
Last start point 926.3179529867377 889.2304629154493
Earliest must start 8

In [9]:
show(my)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(<Figure size 1300x800 with 5 Axes>,
 {'A': <AxesSubplot:label='A', xlabel='Length', ylabel='CDF'>,
  'B': <AxesSubplot:label='B', title={'center':'Release Timeline'}, xlabel='Time', ylabel='Job'>,
  'C': <AxesSubplot:label='C', title={'center':'Execution Timeline'}, xlabel='Time', ylabel='Job'>,
  'E': <AxesSubplot:label='E', xlabel='Percentage'>,
  'D': <AxesSubplot:label='D', xlabel='Latency', ylabel='CDF'>})

#### Interactive

In [10]:
import mpl_interactions.ipyplot as iplt
import matplotlib.collections as mcoll
from matplotlib.lines import Line2D
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

def itimeline(workers, begin, end, colors=None, label=None,
             ax=None,
             marker_begin=None, marker_end=None):
    '''Draw horizontal timelines of jobs
        Args:
            These are usually a column of a dataframe
            
            workers: list
            begin: list
            end: list
            groupby: list
            
            The following two controls small offset in y_pos, to create a wave like shape
            
            group_num: int
            group_radius: float
    '''
    if marker_begin is None:
        marker_begin = pu.default_marker_begin()
    if marker_end is None:
        marker_end = pu.default_marker_end()

    if not len(workers) == len(begin) == len(end):
        raise ValueError('Length of workers, begin, end should be equal,'
                         f' but got ({len(workers)}, {len(begin)}, {len(end)})')
    
    if ax is None:
        ax = plt.gca()
    static_c = next(ax._get_lines.prop_cycler)['color']
    
    lines = mcoll.LineCollection([], label=label)
    ax.add_collection(lines, autolim=True)
    # use scatter to draw markers such that each marker can have different colors
    markers_begin = ax.scatter(x=[], y=[], c=colors, marker=marker_begin)
    markers_end = ax.scatter(x=[], y=[], c=colors, marker=marker_end)   
    
    def updater(w, b, e, c=None):
        # create y_pos according to workers, so workers doesn't has to be numeric
        y_values, y_pos = np.unique(w, return_inverse=True)
        y_pos = y_pos.astype(np.float64)
        
        if c is None:
            c = [static_c]
        
        segs = np.zeros((len(y_values), 2, 2))
        segs[:, :, 1] = y_pos[:, np.newaxis]  # begin/end, y
        segs[:, 0, 0] = b  # begin, x
        segs[:, 1, 0] = e  # end, x
        lines.set_paths(segs)
        lines.set_color(c)
        
        markers_begin.set_offsets(np.column_stack([b, y_pos]))
        markers_end.set_offsets(np.column_stack([e, y_pos]))
        markers_begin.set_facecolor(c)
        markers_end.set_facecolor(c)
        
        if len(y_pos) > 0:
            minx = min(b.min(), e.min())
            maxx = max(e.max(), b.max())
            miny = y_pos.min()
            maxy = y_pos.max()

            corners = (minx, miny), (maxx, maxy)

            ax.update_datalim(corners)
            ax.update_datalim(markers_begin.get_datalim(ax.transData))
            ax.update_datalim(markers_end.get_datalim(ax.transData))
        
        ax.autoscale_view()

        # fix yticks to categorical
        pu.cleanup_axis_categorical(ax.yaxis, y_values)

    updater(workers, begin, end)

    # set a default title
    ax.set_ylabel('Worker')
    ax.set_xlabel('Time')

    return ax, updater


def ishow(jobs, figsize=(13, 8)):
    """jobs are only used to initialize budget intervals
    """
    fig = plt.figure(figsize=figsize, constrained_layout=True)
    axs = fig.subplot_mosaic('''BCCC''')
    
    # release timeline
    ax = pu.job_timeline(jobs.JobId, jobs.Admitted, jobs.Admitted + jobs.Length, ax = axs['B'])
    ax.set_title('Release Timeline')
    ax.set_ylabel('Job')
    ax.set_xlabel('Time')
    
    # execution
    ax = pu.job_timeline(jobs.JobId, jobs.Admitted, jobs.Deadline, ax=axs['C'])
    ax, updater = itimeline(jobs.JobId, jobs.Admitted, jobs.Admitted + jobs.Length, ax=axs['C'])
    #ax = pu.job_timeline(jobs.JobId, jobs.Started, jobs.Finished, ax=axs['C'])
    ax.set_title('Execution Timeline')
    ax.set_ylabel('Job')
    ax.set_xlabel('Time')
    
    return fig, axs, updater

In [11]:
def push_jobs(offset):
    jobs['Adjusted'] = jobs.Admitted
    jobs['Feasible'] = True
    jobs['FeasibleColor'] = 'yellow'

    # push and try fix all others
    prev_batch = []  # indices of job of the batch
    curr_batch = []
    batch_size = 3
    
    def smin(idx):
        sss = [
            # offset push
            jobs.loc[0, 'Admitted'] + offset,
            # previous job's start
            jobs.loc[idx - 1, 'Adjusted'] if idx > 0 else 0,
            # previous batch done
            (jobs.loc[prev_batch, 'Adjusted'] + jobs.loc[prev_batch, 'Length']).max() if len(prev_batch) > 0 else 0,
            jobs.loc[idx, 'Admitted']
        ]
        ss =  np.max(sss)
        print(f"{idx}: start_min {ss} = min({sss})")
        return ss

    for idx in jobs.index:
        if len(curr_batch) == batch_size:
            # current batch full
            print(f'{idx}: Cut off full batch curr_batch = {curr_batch}')
            prev_batch = curr_batch
            curr_batch = []

        start_min = smin(idx)
        if len(curr_batch) > 0 and start_min > jobs.loc[curr_batch, 'Adjusted'].max():
            # not possible to put in current batch
            print(f'{idx}: Cut off partial batch curr_batch = {curr_batch}')
            prev_batch = curr_batch
            curr_batch = []
            # recalculate
            start_min = smin(idx)
        if start_min == jobs.loc[idx, 'Admitted']:
            break

        curr_batch.append(idx)

        start_max = np.min([
            # feasible interval
            jobs.loc[idx, 'Deadline'] - jobs.loc[idx, 'Length']
        ])
        print(f"{idx}: first start_max {start_min} = max({jobs.loc[idx, 'Deadline'] - jobs.loc[idx, 'Length']}))")
        
        if start_min > start_max:
            jobs.loc[idx, 'FeasibleColor'] = 'red'
        else:
            jobs.loc[idx, 'FeasibleColor'] = 'green'

        jobs.loc[idx, 'Adjusted'] = start_min
    updater(jobs.JobId, jobs.Adjusted, jobs.Adjusted + jobs.Length, jobs.FeasibleColor)

plt.ioff()
fig, ax, updater = ishow(my, figsize=(10, 8))
plt.ion()

jobs = my.sort_values(by='JobId').reset_index(drop=True)
jobs['LengthP99'] = 40.5

offsetSlider = widgets.FloatSlider(value=0., min=0., max=200., step=1., continuous_update=True)
out = widgets.interactive_output(push_jobs, {'offset':offsetSlider})
out.layout.overflow = 'scroll scroll'
out.layout.max_height = '700px'
out.layout.max_width = '500px'

vbox = widgets.VBox([offsetSlider, out])
hbox = widgets.HBox([fig.canvas, widgets.VBox([offsetSlider, out])])

hbox

HBox(children=(Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Ba…

### NSDI

In [37]:
sim_fifo = read('nsdi/bs10')

In [65]:
show(sim_fifo)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(<Figure size 1300x800 with 5 Axes>,
 {'A': <AxesSubplot:label='A', xlabel='Length', ylabel='CDF'>,
  'D': <AxesSubplot:label='D', xlabel='Latency', ylabel='CDF'>,
  'B': <AxesSubplot:label='B', title={'center':'Release Timeline'}, xlabel='Time', ylabel='Job'>,
  'C': <AxesSubplot:label='C', title={'center':'Execution Timeline'}, xlabel='Time', ylabel='Job'>,
  'E': <AxesSubplot:label='E', xlabel='Percentage'>})

## Nexus

In [45]:
def read_nexus(name):
    jobs = pd.read_csv(Path('../log/') / name / 'nexus' / 'output.csv', comment='#')
    jobs['LengthUS'] = pd.to_numeric(jobs.LengthUS, errors='coerce')
    jobs['LatencyUS'] = pd.to_numeric(jobs.LatencyUS, errors='coerce')
    jobs['Budget'] = pd.to_numeric(jobs.Budget)
    jobs['Timestamp'] = pd.to_numeric(jobs.Timestamp)

    jobs = jobs.sort_values(by='Timestamp').reset_index(drop=True)

    # make the result compatible with simulator
    jobs['JobId'] = jobs.index
    jobs['Latency'] = jobs.LatencyUS / 1000
    jobs['Length'] = jobs.LengthUS / 1000
    jobs['Admitted'] = jobs.Timestamp
    jobs['Deadline'] = jobs.Timestamp + jobs.Budget
    jobs['Started'] = jobs.Timestamp
    jobs['Finished'] = jobs.Timestamp + jobs.Latency
    # State: done, past_due
    def mapping(arg):
        if arg == 'done':
            return arg
        return 'past_due'
    jobs['State'] = jobs.State.map(mapping)
    return jobs

In [46]:
df = read_nexus('nsdi/bs10')

In [66]:
show(df)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(<Figure size 1300x800 with 5 Axes>,
 {'A': <AxesSubplot:label='A', xlabel='Length', ylabel='CDF'>,
  'D': <AxesSubplot:label='D', xlabel='Latency', ylabel='CDF'>,
  'B': <AxesSubplot:label='B', title={'center':'Release Timeline'}, xlabel='Time', ylabel='Job'>,
  'C': <AxesSubplot:label='C', title={'center':'Execution Timeline'}, xlabel='Time', ylabel='Job'>,
  'E': <AxesSubplot:label='E', xlabel='Percentage'>})

In [67]:
fig, ax = plt.subplots()

sim_fifo_tmp = sim_fifo.copy()
sim_fifo_tmp.loc[sim_fifo_tmp.State != 'done', 'Latency'] = np.nan
pu.cdf(sim_fifo_tmp.Latency, label='SimFIFO', ax=ax)
pu.cdf(df.Latency, label="Nexus", ax=ax)
ax.legend()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.legend.Legend at 0x7f907c228710>