In [12]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import networkx as nx
import os
np.random.seed(1024)

# NUM_STREAM_SPEC = list(range(8, 188, 10))
NUM_STREAM_SPEC = [10, 40, 70, 100, 130, 160, 190, 220]

ERROR = 2_000

PERIOD_SPEC = [1, 2, 3, 4, 5, 6]
def period_spec(opt):
    if opt == 1:
        return 2_000_000
    if opt == 2:
        return 400_000
    if opt == 3:
        return int(np.random.choice([500_000, 1_000_000, 2_000_000, 4_000_000]))
    if opt == 4:
        return int(np.random.choice([100_000, 200_000, 400_000, 800_000]))
    if opt == 5:
        return int(np.random.choice([250_000, 500_000, 1_250_000, 2_500_000, 4_000_000]))
    if opt == 6:
        return int(np.random.choice([50_000, 100_000, 250_000, 500_000, 800_000]))
    assert False, "Invalid option"

SIZE_SPEC = [1,2,3,4,5]
def data_spec(opt):
    if opt == 1:
        return 50
    if opt == 2:
        return int(np.random.choice(range(100, 501, 100)))
    if opt == 3:
        return int(np.random.choice(range(200, 1501, 100)))
    if opt == 4:
        return int(np.random.choice(range(500, 4501, 100)))
    if opt == 5:
        return int(np.random.choice(range(1500, 4501, 100)))
    assert False, "Invalid option"

DEADLINE_SPEC = [1,2,3,4,5]
def deadline_spec(opt):
    if opt == 1:
        assert False
    if opt == 2:
        return int(np.random.choice([100_000, 200_000, 400_000, 800_000, 1_600_000]))
    if opt == 3:
        return int(np.random.choice([10_000, 25_000, 50_000, 100_000, 200_000, 400_000]))
    if opt == 4:
        return int(np.random.choice([0, 10_000, 20_000, 25_000, 50_000]))
    if opt == 5:
        return 0
    assert False, "Invalid option"

In [13]:
def bfs_paths(graph, start, goal):
    return nx.shortest_path(graph, start, goal)

In [14]:
def line(num_sw, num_queue, data_rate, header):
    num_node = num_sw * 2
    net = np.zeros(shape = (num_node, num_node))

    ## Connect the line
    for i in range(0, num_sw - 1):
        net[i, i+1] = 1
        net[i+1, i] = 1
    ## Connect the switch and the end-station
    for i in range(num_sw):
        net[i+num_sw, i] = 1
        net[i, i+num_sw] = 1

    result = []
    for i in range(num_node):
        for j in range(num_node):
            if net[i][j]:
                link = []
                link.append((i, j))
                link.append(num_queue)
                link.append(data_rate)
                link.append(ERROR)
                link.append(0)
                result.append(link)

    result = pd.DataFrame(result, columns=['link','q_num','rate','t_proc','t_prop'])
    result.to_csv(header + '.csv', index=False)
    return net

In [15]:
def ring(num_sw, num_queue, data_rate, header):
    num_node = num_sw * 2
    net = np.zeros(shape = (num_node, num_node))

    ## Connect the line
    for i in range(0, num_sw - 1):
        net[i, i+1] = 1
        net[i+1, i] = 1
    ## Connect the switch and the end-station
    for i in range(num_sw):
        net[i+num_sw, i] = 1
        net[i, i+num_sw] = 1
    
    ## Connect the ring
    net[0, num_sw - 1] = 1
    net[num_sw - 1, 0] = 1

    result = []
    for i in range(num_node):
        for j in range(num_node):
            if net[i][j]:
                link = []
                link.append((i, j))
                link.append(num_queue)
                link.append(data_rate)
                link.append(ERROR)
                link.append(0)
                result.append(link)

    result = pd.DataFrame(result, columns=['link','q_num','rate','t_proc','t_prop'])
    result.to_csv(header + '.csv', index=False)
    return net

In [16]:
def tree(num_sw, num_queue, data_rate, header):
    num_node = num_sw * 2 + 1
    net = np.zeros(shape = (num_node, num_node))

    for i in range(num_sw):
        net[i, i * 2 + 1] = 1
        net[i * 2 + 1, i] = 1
        net[i, i * 2 + 2] = 1
        net[i * 2 + 2, i] = 1
    result = []
    for i in range(num_node):
        for j in range(num_node):
            if net[i][j]:
                link = []
                link.append((i, j))
                link.append(num_queue)
                link.append(data_rate)
                link.append(ERROR)
                link.append(0)
                result.append(link)

    result = pd.DataFrame(result, columns=['link','q_num','rate','t_proc','t_prop'])
    result.to_csv(header + '.csv', index=False)
    return net

In [17]:
def mesh(num_sw, num_queue, data_rate, header):
    num_node = num_sw * 2
    net = np.zeros(shape = (num_node, num_node))
    
    ## Connect the line
    for i in range(0, num_sw - 1):
        net[i, i+1] = 1
        net[i+1, i] = 1
    ## Connect the switch and the end-station
    for i in range(num_sw):
        net[i+num_sw, i] = 1
        net[i, i+num_sw] = 1
    
    ## Connect the mesh
    net[0, num_sw - 1] = 1
    net[num_sw - 1, 0] = 1

    ## Connect sw on the ring like DNA
    for i in range(0, num_sw // 2):
        net[i, num_sw - i - 1] = 1
        net[num_sw - i - 1, i] = 1

    result = []
    for i in range(num_node):
        for j in range(num_node):
            if net[i][j]:
                link = []
                link.append((i, j))
                link.append(num_queue)
                link.append(data_rate)
                link.append(ERROR)
                link.append(0)
                result.append(link)

    result = pd.DataFrame(result, columns=['link','q_num','rate','t_proc','t_prop'])
    result.to_csv(header + '.csv', index=False)
    return net

In [18]:
topo_func = [line, ring, tree, mesh]

In [19]:
def generate_flowset(graph, size_param, period_param, deadline_param, num_thres_param, num_sw, num_es, header):
    result = []
    i = 0
    uti = 0
    uti_ports = np.zeros(num_es)
    while True:
        if i >= num_thres_param:
            result = pd.DataFrame(result, columns = ['id','src','dst','size','period','deadline','jitter'])
            result.to_csv(header + '.csv', index=False)
            return

        availble_es = np.argwhere(uti_ports <= 0.75).reshape(-1)
        if availble_es.size == 0:
            availble_es = np.array([x for x in range(num_es)])
        
        start = int(np.random.choice(availble_es + num_sw))
        end = int(np.random.choice([x for x in range(num_sw, num_sw + num_es) if x != start]))
        path = bfs_paths(graph, start, end)

        period = period_spec(period_param)
        size = data_spec(size_param)
        deadline = (len(path) - 1) * (ERROR + size * 8) + deadline_spec(deadline_param) if deadline_param > 1 else period
        if deadline <= period:
            result.append([i, start, [end], size, period, deadline, deadline])
            uti += size * 8 / period
            uti_ports[start - num_sw] += size * 8 / period
            i += 1
        else:
            continue

In [20]:
from multiprocessing import Pool

In [21]:
def grid_generator(ins, max_ins = 9999999):
    global SIZE_SPEC, PERIOD_SPEC, DEADLINE_SPEC, NUM_STREAM_SPEC

    if not os.path.exists(f'grid/{ins}'):
        os.makedirs(f'grid/{ins}')
        print(f"Subfolder {ins} created successfully.")   
    else:
        print(f"Subfolder {ins} already exists.")

    total = len(SIZE_SPEC) * len(PERIOD_SPEC) * len(DEADLINE_SPEC) * len(NUM_STREAM_SPEC) * 8 * 4
    count = ins * total
    dataset_logs = []
    with tqdm(total=total, desc=f"Process {ins}", position=ins) as pbar:
        try:
            for size in SIZE_SPEC:
                for period in PERIOD_SPEC:
                    for deadline in DEADLINE_SPEC:
                        for num_thres in NUM_STREAM_SPEC:
                            for num_sw in range(8, 88, 10):
                                for topo in range(4):
                                    header = f'grid/{ins}/' + str(count)
                                    net = topo_func[topo](num_sw, num_queue = 8, data_rate=1, header=header + '_topo')
                                    generate_flowset(nx.DiGraph(net), size, period, deadline, num_thres, num_sw, num_sw, header + '_task')

                                    exp_info = [count, ins, size, period, deadline, topo, num_thres, num_sw]
                                    dataset_logs.append(exp_info)

                                    count += 1
                                    pbar.update(1)
                                
                                    if count >= max_ins:
                                        raise StopIteration
        except StopIteration:
            pass
                            
    exp_logs = pd.DataFrame(dataset_logs, columns = ['id','ins','size','period','deadline','topo','num_thres','num_sw'])
    exp_logs.to_csv(f'grid/{ins}/dataset_logs.csv', index=False)

In [22]:
if __name__ == '__main__':
    ## Generate the test data
    # header = "test"
    # num_bridge = 10
    # num_stream = 10
    # net = topo_func[0](num_bridge, num_queue = 8, data_rate=1, header=header + '_topo')
    # generate_flowset(nx.DiGraph(net), 2, 1, 1, num_stream, num_bridge, num_bridge, header + '_task')
    
    # Generate the dataset used in paper
    if not os.path.exists('grid'):
        os.makedirs('grid')
        print("Folder created successfully.")
    else:
        print("Folder already exists.")

    grid_generator(0, 1000)
    ## Generate the grid topology in parallel
    # with Pool(4) as p:
    #     p.map(grid_generator, range(4))
    # p.close()
    # p.join()
    
    

Folder already exists.
Subfolder 0 already exists.


Process 0:   0%|          | 0/38400 [00:00<?, ?it/s]

## 2. Verification:

In [13]:
ins0_directory = pd.read_csv('grid/0/dataset_logs.csv')

In [14]:
ins0_task = {}
ins0_topo = {}

for i, row in tqdm(ins0_directory.iterrows(), total=len(ins0_directory), desc="Loading data"):
    ins0_task[row['id']] = pd.read_csv(f"grid/0/{row['id']}_task.csv")
    ins0_topo[row['id']] = pd.read_csv(f"grid/0/{row['id']}_topo.csv")

Loading data:   0%|          | 0/38400 [00:00<?, ?it/s]

### 2.1 Check size

In [17]:
for size in SIZE_SPEC:
    spec_id = ins0_directory[ins0_directory['size'] == size]['id'].values
    all_size = []
    for id in spec_id:
        all_size.append(ins0_task[id]['size'].values)
    all_size = np.concatenate(all_size)
    print(f"Size {size} has {len(all_size)} flows, with average size {np.mean(all_size)} and std {np.std(all_size)}")

Size 1 has 883200 flows, with average size 50.0 and std 0.0
Size 2 has 883200 flows, with average size 298.64639945652175 and std 141.3152014230948
Size 3 has 883200 flows, with average size 835.5451766304348 and std 403.07598215838766
Size 4 has 883200 flows, with average size 2360.315330615942 and std 1181.5430307983695
Size 5 has 883200 flows, with average size 2919.305366847826 and std 893.6494976446756


### 2.2 Check period

In [19]:
for period in PERIOD_SPEC:
    spec_id = ins0_directory[ins0_directory['period'] == period]['id'].values
    all_period = []
    for id in spec_id:
        all_period.append(ins0_task[id]['period'].values)
    all_period = np.concatenate(all_period)
    print(f"Period {period} has {len(all_period)} flows, with average period {np.mean(all_period)} and std {np.std(all_period)}")

Period 1 has 736000 flows, with average period 2000000.0 and std 0.0
Period 2 has 736000 flows, with average period 400000.0 and std 0.0
Period 3 has 736000 flows, with average period 1958218.0706521738 and std 1344960.346210985
Period 4 has 736000 flows, with average period 476963.5869565217 and std 273931.4274320488
Period 5 has 736000 flows, with average period 1887141.3043478262 and std 1391150.8863013366
Period 6 has 736000 flows, with average period 473892.6630434783 and std 270613.2265246511


### 2.3 Check deadline

In [16]:
for deadline in DEADLINE_SPEC:
    spec_id = ins0_directory[ins0_directory['deadline'] == deadline]['id'].values
    all_deadline = []
    for id in spec_id:
        all_deadline.append(ins0_task[id]['deadline'].values)
    all_deadline = np.concatenate(all_deadline)
    print(f"Deadline {deadline} has {len(all_deadline)} flows, with average deadline {np.mean(all_deadline)} and std {np.std(all_deadline)}")

Deadline 1 has 883200 flows, with average deadline 1115102.5249094204 and std 1099454.418758253
Deadline 2 has 883200 flows, with average deadline 476389.58423913043 and std 430343.0203285485
Deadline 3 has 883200 flows, with average deadline 232136.19701086957 and std 196622.23397128755
Deadline 4 has 883200 flows, with average deadline 149805.76811594202 and std 159165.13861016821
Deadline 5 has 883200 flows, with average deadline 129566.6875 and std 158966.88125838185


### 2.4 Check num-stream

In [24]:
for num_stream in NUM_STREAM_SPEC:
    spec_id = ins0_directory[ins0_directory['num_thres'] == num_stream]['id'].values
    all_num_stream = []
    for idd in spec_id:
        all_num_stream.append(len(ins0_task[idd]))
    print(f"Num stream {num_stream} has {len(all_num_stream)} problem instances, with average num stream {np.mean(all_num_stream)} and std {np.std(all_num_stream)}")

Num stream 10 has 4800 problem instances, with average num stream 10.0 and std 0.0
Num stream 40 has 4800 problem instances, with average num stream 40.0 and std 0.0
Num stream 70 has 4800 problem instances, with average num stream 70.0 and std 0.0
Num stream 100 has 4800 problem instances, with average num stream 100.0 and std 0.0
Num stream 130 has 4800 problem instances, with average num stream 130.0 and std 0.0
Num stream 160 has 4800 problem instances, with average num stream 160.0 and std 0.0
Num stream 190 has 4800 problem instances, with average num stream 190.0 and std 0.0
Num stream 220 has 4800 problem instances, with average num stream 220.0 and std 0.0


### 2.5 Check number of links for each network scale

In [25]:
for num_sw in range(8, 88, 10):
    spec_id = ins0_directory[ins0_directory['num_sw'] == num_sw]['id'].values
    all_num_sw = []
    for idd in spec_id:
        all_num_sw.append(len(ins0_topo[idd]))
    print(f"Num sw {num_sw} has {len(all_num_sw)} problem instances, with average links {np.mean(all_num_sw)} and std {np.std(all_num_sw)}")

Num sw 8 has 4800 problem instances, with average num sw 32.5 and std 2.179449471770337
Num sw 18 has 4800 problem instances, with average num sw 75.0 and std 6.4031242374328485
Num sw 28 has 4800 problem instances, with average num sw 117.5 and std 10.712142642814275
Num sw 38 has 4800 problem instances, with average num sw 160.0 and std 15.033296378372908
Num sw 48 has 4800 problem instances, with average num sw 202.5 and std 19.35846068260594
Num sw 58 has 4800 problem instances, with average num sw 245.0 and std 23.68543856465402
Num sw 68 has 4800 problem instances, with average num sw 287.5 and std 28.01338965566288
Num sw 78 has 4800 problem instances, with average num sw 330.0 and std 32.341923257592455


### 2.6 Check number of links for each topology

In [27]:
for topo in range(4):
    spec_id = ins0_directory[ins0_directory['topo'] == topo]['id'].values
    all_topo = []
    for idd in spec_id:
        all_topo.append(len(ins0_topo[idd]))
    print(f"Topo {topo} has {len(all_topo)} problem instances, with average links {np.mean(all_topo)} and std {np.std(all_topo)}")

Topo 0 has 9600 problem instances, with average links 170.0 and std 91.6515138991168
Topo 1 has 9600 problem instances, with average links 172.0 and std 91.6515138991168
Topo 2 has 9600 problem instances, with average links 172.0 and std 91.6515138991168
Topo 3 has 9600 problem instances, with average links 211.0 and std 114.564392373896
