# Experiment 4

- Sweep over the number of resources for the Azure packing 2020 dataset
- 20 machines
- Fixed number of jobs, with skip factor
- We fix the total number of jobs (4.096M jobs)
- This looks at jobs with $r_\text{max} = 12.3$
- We observe a varying $p_\text{max}$, but it is lower bounded by 73.819 days

Schedulers:
```
schedulers = [
        MRIS(sort='WSJF'),
        PriorityQueueScheduler(sort='WSJF', online=True),
        OnlinePriorityQueueScheduler(sort='WSJF'),
        TetrisScheduler(),
        BFEXECScheduler()
]
```

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import scienceplots
import itertools

In [None]:
plt.style.use(['science'])
params = {
    "font.family": "serif",
    "text.usetex": True,
    'text.latex.preamble':
        r"""
        \usepackage{libertine}
        \usepackage[libertine]{newtxmath}
        """,
}
# https://matplotlib.org/stable/gallery/lines_bars_and_markers/linestyles.html
def get_linestyles():
     return itertools.cycle([
         ('solid', (0, ())),
         # ('loosely dotted',        (0, (1, 10))),
         ('dotted',                (0, (1, 1))),
         # ('densely dotted',        (0, (1, 1))),
         ('long dash with offset', (5, (10, 3))),
         ('loosely dashed',        (0, (5, 10))),
         #('dashed',                (0, (5, 5))),
         ('densely dashed',        (0, (5, 1))),
    
         #('loosely dashdotted',    (0, (3, 10, 1, 10))),
         ('dashdotted',            (0, (3, 5, 1, 5))),
         ('densely dashdotted',    (0, (3, 1, 1, 1))),
    
         ('dashdotdotted',         (0, (3, 5, 1, 5, 1, 5))),
         ('loosely dashdotdotted', (0, (3, 10, 1, 10, 1, 10))),
         ('densely dashdotdotted', (0, (3, 1, 1, 1, 1, 1)))
    ])
mpl.rcParams.update(params)

In [None]:
N = 4_096_000
skip_factor = 128

resources = np.arange(4, 20 + 1, dtype=int)

In [None]:
dfs = {R: pd.read_parquet(f'resource_sweep_azure_packing_2020_{N}_{skip_factor}_{R}.parquet') for R in resources}

In [None]:
dfs[resources[2]]['d'][0:100]

In [None]:
schedulers = dfs[resources[0]]['scheduler'].unique()
schedulers

# Average Weighted Completion Time

In [None]:
weighted_completion_time_data = {}
for i, R in enumerate(resources):        
    for scheduler in schedulers:
        df = dfs[R]
        df = df[df['scheduler'] == scheduler]
        weighted_completion_time = df['C'].dot(df['w']) / len(df)
        vals = weighted_completion_time_data.get(scheduler, np.zeros_like(resources).astype(float))
        vals[i] = weighted_completion_time
        weighted_completion_time_data[scheduler] = vals

In [None]:
plt.figure(figsize=(6, 2), dpi=200)
linestyles = get_linestyles()
for scheduler, weighted_completion_times in weighted_completion_time_data.items():
    if scheduler == 'Tetris-instantaneous':
        scheduler = r'\textsc{Tetris}'
    plt.plot(resources, weighted_completion_times, label=scheduler, linestyle=next(linestyles)[1], linewidth=1)
#plt.xscale('log')
#plt.xticks(resources, resources, rotation=90)
plt.yticks(list(range(15, 37, 5)))
plt.ylim(11, 37)
# plt.yscale('log', base=2)
# plt.title(rf'Sweep Over Number of Jobs - $M={20}$, $R={4}$, Azure Packing 2020 Dataset')
plt.ylabel("Average weighted\n completion time")
plt.xlabel(r"Number of resources types")
plt.legend(loc='upper left', prop={'size': 8}, handlelength=3, ncol=2)
plt.savefig('resource_sweep.pdf', bbox_inches='tight')
plt.show()