In [23]:
import os
import sys
import pandas as pd
import numpy as np
import time
from collections import defaultdict
from tqdm import tqdm
import torch
from torch import nn
from torch.utils.data import DataLoader
from utils.dataset import *
from size_gru import GRU, SizeToHidden, inputTensor
from size_vae import SizeEncoder, SizeDecoder
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
os.chdir(sys.path[0])
pairs = 1000
pairdata, freqpairs, n_size, n_interval = get_univ_data(pairs)
sizedata = get_data(pairdata, freqpairs, 'size_index', n_size)
size_cdf = pd.read_csv('data/univ/size_cdf.csv')

In [24]:
decoder = torch.load('model/final/vae-decoder.pth')
gru = torch.load('model/final/gru.pth')
s2h = torch.load('model/final/s2h.pth')
vae_latent_dim = decoder.decoder[0][0].in_features
gru_hidden_size = gru.gru.hidden_size

In [32]:
def get_mean_interval(mean_size, load):
    return 1/(100E9*load/8./mean_size) * 1E9

def sample_size(cdf, index):
    if index == 0:
        return cdf.iloc[0]['size']
    return cdf.iloc[index - 1]['size'] + np.random.rand() * (cdf.iloc[index]['size'] - cdf.iloc[index - 1]['size'])

all_size = []
for i in range(pairs):
    all_size.extend(list(pairdata[freqpairs[i]]['size_index'].values))
values, counts = np.unique(all_size, return_counts=True)
all_size = np.zeros(n_size, dtype=np.float64)
all_size[values] = counts
all_size /= all_size.sum()

def gen_real(seed, pair, load, n):
    np.random.seed(seed)
    size_index = pairdata[freqpairs[pair]]['size_index'].values
    sizes = []
    while len(sizes) < n:
        sizes += list(size_index)
    sizes = np.array(sizes)[:n]
    sizes = np.array([sample_size(size_cdf, i) for i in sizes])
    mean_size = int(get_mean_interval(np.mean(sizes), load))
    intervals = np.random.exponential(mean_size, n).astype(int)
    return sizes, intervals


def gen_common(seed, pair, load, n):
    np.random.seed(seed)
    sizes = []
    while len(sizes) < n:
        sizes.append(np.random.choice(n_size, p=all_size))
    sizes = np.array([sample_size(size_cdf, i) for i in sizes])
    mean_size = int(get_mean_interval(np.mean(sizes), load))
    intervals = np.random.exponential(mean_size, n).astype(int)
    return sizes, intervals


def sample_batch(size_data, seq_length, start_size, batch_size):
    gru.eval() 
    s2h.eval()
    softmax = nn.Softmax(dim=2).to(device)
    with torch.no_grad():  # no need to track history in sampling
        size_tensor = torch.tensor(size_data, dtype=torch.float).to(device)
        hn = s2h(size_tensor)
        size = start_size
        output_seq = [size]
        for _ in range(seq_length - 1):
            input = inputTensor(np.array([size])).to(device)
            input = input.float().permute(1,0,2)
            output, hn = gru(input, hn)
            output = softmax(output)
            p_size = output.detach().cpu().numpy().squeeze()
            size = [np.random.choice(n_size, p=p_size[i]) for i in range(batch_size)]
            output_seq.append(size)
        return output_seq


def gen_encore(seed, pair, load, n):
    batch_size = 1000
    torch.manual_seed(0)
    z = torch.randn((batch_size, vae_latent_dim)).to(device)
    size = decoder(z)
    size = size.squeeze().detach().to('cpu').numpy().astype(np.float64)
    size[size < 1e-3] = 0
    size /= size.sum(axis=1).reshape(-1, 1)
    start_size = [np.random.choice(n_size, p=size[i]) for i in range(batch_size)]
    size_index = []
    while len(size_index) < n:
        output_seq = sample_batch(size, 16, start_size, batch_size)
        size_index += output_seq[1:]
        start_size = output_seq[-1]
        print(len(size_index))
    size_index = np.array(size_index).T
    sizes, intervals = [], []
    for i in tqdm(range(batch_size)):
        sizes.append([int(sample_size(size_cdf, j)) for j in size_index[i]])
        mean_size = int(get_mean_interval(np.mean(sizes[i]), load))
        intervals.append(np.random.exponential(mean_size, n).astype(int))
    return sizes, intervals

In [26]:
class Flow:
	def __init__(self, src, dst, size, t):
		self.src, self.dst, self.size, self.t = src, dst, size, t
	def __str__(self):
		return "%d %d 3 100 %d %.9f"%(self.src, self.dst, self.size, self.t)

def gen_traces(dir, func, load, n):
    for trace in tqdm(range(1000)):
        np.random.seed(trace)
        src = 1
        dst = 0
        sizes, intervals = func(trace, trace, load, n)
        base_t = 2e9
        f_list = []
        t = base_t
        for size, interval in zip(sizes, intervals):
            t += interval
            f_list.append(Flow(src, dst, size, t * 1e-9))
            if t >= base_t + 1e9:
                break
                
        flow_file = '{dir}/trace{trace}.txt'.format(trace=trace, dir=dir)
        f = open(flow_file, 'w')
        f_list.sort(key = lambda x: x.t)
        f.write(str(len(f_list)) + '\n')
        for flow in f_list:
            f.write(str(flow) + '\n')
        f.close()

In [28]:
trace_dir = './simulation/data/trace/ground_truth'
if not os.path.exists(trace_dir):
    os.makedirs(trace_dir)
gen_traces(trace_dir, gen_real, 0.03, 5000)

100%|██████████| 1000/1000 [07:13<00:00,  2.30it/s]


In [30]:
trace_dir = './simulation/data/trace/common_practice'
if not os.path.exists(trace_dir):
    os.makedirs(trace_dir)
gen_traces(trace_dir, gen_common, 0.03, 5000)

100%|██████████| 1000/1000 [08:09<00:00,  2.04it/s]


In [33]:
trace_dir = './simulation/data/trace/encore'
if not os.path.exists(trace_dir):
    os.makedirs(trace_dir)
gen_traces(trace_dir, gen_common, 0.03, 5000)

100%|██████████| 1000/1000 [08:34<00:00,  1.94it/s]


In [31]:
mean_fcts_gt = []
gt_sizes = []
for i in range(1000):
    fct = pd.read_csv('simulation/data/result/ground_truth/config0_trace{i}/fct.csv'.format(i=i)).sort_values('fid')
    slowdown = fct['fct'] / fct['standalone_fct']
    slowdown[slowdown < 1] = 1
    print(i, np.mean(slowdown), np.median(slowdown), np.percentile(slowdown, 90), np.percentile(slowdown, 95))
    mean_fcts_gt.append(np.mean(slowdown))
    gt_sizes.append(fct['size'].mean())

0 1.0041243138811307 1.0 1.015590639576223 1.0268213553021888
1 1.099297463117426 1.0 1.439268864914288 1.582697893728348
2 3.8104976200492757 1.0 11.880061891415169 19.03349907227974
3 1.092221133368279 1.0 1.3946760908811195 1.5682317061195092
4 1.9051668635155041 1.0 4.341355124031321 6.973380914810703
5 1.1842273272562003 1.0 1.7955798584603757 2.1224539477557665
6 1.0610740764649451 1.0 1.2750440079156882 1.43957662241003
7 1.0656394022068845 1.0 1.276345758866949 1.394878224293133
8 1.0282144904225545 1.0 1.1144660393909174 1.1912032235254697
9 4.921864765358586 1.0 17.394863849042345 22.358209032751294
10 1.0211847731498593 1.0 1.0558087093226818 1.1982579547320273
11 1.0547073932740045 1.0 1.1826848283134086 1.3362334949022774
12 1.301028008463208 1.0 1.9478704026556004 2.6518822310757786
13 1.445066648123586 1.0 2.368411477331501 3.616679629596051
14 1.0002472664144428 1.0 1.0 1.0
15 2.568991869715555 1.0 8.54363057101976 11.132552253307374
16 15.381675077051264 1.0 55.0419385