# Data preprocessing for WDP<sub>bal</sub> model
[(original data)](https://bitbucket.org/m_lombardi/eml-aij-2015-resources/src/5c9a3b409ed057a4b86ad34605786fb476190b32/results/scc_ann1_logs/wld_warm_div20-0-101-ls.txt?at=master&fileviewer=file-view-default)

## 1. Neighbour cores
1) parse **neigh.txt** to a list of lists of neighbour core numbers

In [1]:
with open("neigh.txt", 'r') as f:
    lines = f.readlines()

neighs = [line.split(',')[0].split(': ')[1].split() for line in lines]

# "neighbors of 0: 1 6, number of others: 45\n" --- line[i]
# "neighbors of 0: 1 6"                                         --- .split(',')[0]
# "1 6"                                                                  --- .split(':')[1]
# ['1', '6']                                                              --- neighs[i]

2) construct neighbour lookup function `N(k)` in **neigh.mzn**

In [2]:
ans = "set of int: N(int: k) = \n"

for i in range(len(neighs)):
    ns = ""  # neighbour core numbers
    for n in neighs[i]:
        ns += str(int(n) + 1) + ', '
    ns = ns[:-2]  # trim ',' and ' '
    
    if i == len(neighs) - 1:  # else clause
        ans += "else {{ {} }}\n".format(ns)
        continue
    
    if i != 0:  # add else to elseif cluse
        ans += "else"
    
    ans += "if k == {0} then {{ {1} }}\n".format(i + 1, ns)

ans += "endif;"

with open("neigh.mzn", 'w') as f:
    f.write(ans)

## 2. Solution
1) parse **sol.txt** to a list of integers representing which core to map to

In [3]:
n = 288
m = 48

with open("sol.txt", 'r') as f:
    line = f.readlines() # ["0->16, 1->38, 2->6, ..."]  (only one line))
    
lst = line[0].split(", ") # ["0->16", "1->38", "2->6", ...]

sol_lst = list(map(lambda x : int(x.split("->")[1]) , lst))  # [16, 38, 6, ...]

2) construct data file **data.dzn** and write 2d mapping array `x` to the file

In [4]:
# sol[i][j] is true iif job i is mapped to core j
sol = [["true" if sol_lst[i] == j else "false" for j in range(m)] for i in range(n)]

ans = "x = [|"

for i in range(n):
    for j in range(m):
        ans += sol[i][j].replace("'", '') + ', '  # remove string single quote
    ans += "\n|"

ans = ans[:-1]  # remove the last '|'
ans += "|];\n\n"

with open("data.dzn", 'w') as f:
    f.write(ans)

## 3. CPI values
read cpi values from **cpi.txt** and append the 1d array `cpi` to **data.dzn**

In [5]:
with open("cpi.txt", 'r') as f:
    line = f.readlines() # (only one line)

cpi = line[0].rstrip() # remove '\n'

ans = "cpi = [ {} ];\n\n".format(cpi)

with open("data.dzn", 'a') as f:
    f.write(ans)

## 3. Neural network
<img src="ann1_layout.png" alt="ann1_layout" style="width:300px;" align="left"/>

1) read from **ann1.txt** and split `theta1` and `theat2` as two Pandas DataFrames

In [6]:
import pandas as pd

In [7]:
ann = pd.read_csv("ann1.txt", sep=",", header=None)

ann.columns = ["# of inputs", "# of hidden neurons", "# of output neurons",
                          "x1-a1", "x2-a1", "x3-a1", "x4-a1", "bias-a1", 
                          "x1-a2", "x2-a2", "x3-a2", "x4-a2", "bias-a2", 
                          "a1-h", "a2-h", "bias-h"]

ann[:1]

Unnamed: 0,# of inputs,# of hidden neurons,# of output neurons,x1-a1,x2-a1,x3-a1,x4-a1,bias-a1,x1-a2,x2-a2,x3-a2,x4-a2,bias-a2,a1-h,a2-h,bias-h
0,4,2,1,6.45294,0.017452,1.58234,2.29305,2.12057,3.95113,12.7206,-19.3278,5.58789,9.38298,0.003064,0.003539,0.795949


In [8]:
# remove the first three columns
ann = ann.iloc[:,3:]

# split theta1 and theta2
theta1 = ann.iloc[:,:10]
theta2 = ann.iloc[:,10:]

In [9]:
theta1[47:]

Unnamed: 0,x1-a1,x2-a1,x3-a1,x4-a1,bias-a1,x1-a2,x2-a2,x3-a2,x4-a2,bias-a2
47,1.76683,-0.336959,-0.116305,-0.069643,-1.62311,2.51027,5.65249,0.180271,0.065705,7.97995


In [10]:
theta2[47:]

Unnamed: 0,a1-h,a2-h,bias-h
47,-0.050054,0.457869,0.308072


2) write `theta1` and `theta2` to **data.dzn** file

In [11]:
# flat theta1 for minizinc array3d constructor
theta1_1d = theta1.values.flatten().tolist()  # (a list of float)

ans = "theta1 = array3d(1..m,aRange,xRangeExd, [\n"
for val in theta1_1d:
    ans += "{:8.6f}".format(val) + ', '
ans = ans[:-2] # remove the last ',' and ' '
ans += "]);\n\n"

with open("data.dzn", 'a') as f:
    f.write(ans)

In [12]:
# convert from df to python 2d list
theta2_2d = theta2.values.tolist()

ans = "theta2 = \n["
for lst in theta2_2d:
    ans += "| "
    for val in lst:
        ans += "{:8.6f}".format(val) + ', '
    ans = ans[:-2] # remove the last ',' and ' '
    ans += '\n'
ans = ans[:-1] # remove the last '\n'
ans += "|];\n\n"

with open("data.dzn", 'a') as f:
    f.write(ans)

write constants to **data.dzn**

In [13]:
constants = "n = 288; \nm = 48; \nh = 8; \ninput_layer_size = 4; \nhidden_layer_size = 2; \n\n"

with open("data.dzn", 'a') as f:
    f.write(constants)

write `avgcpi` to **data.dzn** (to verify the solution)

In [14]:
with open("avgcpi.txt", 'r') as f:
    lines = f.readlines()

ans = "avgcpi = ["
for line in lines:
    vals = line.rstrip().split('\t')
    for val in vals:
        ans += val + ', '
ans = ans[:-2]
ans += "];"

with open("data.dzn", 'a') as f:
    f.write(ans)

calculate ann to verify the solution

In [56]:
neigh_idxs  = [list(map(int, i)) for i in neighs]
for idxs in neigh_idxs:
    print(idxs)

[1, 6]
[0, 2, 6, 7]
[1, 3, 7, 8]
[2, 4, 8, 9]
[3, 5, 9, 10]
[4, 10, 11]
[0, 1, 7, 12]
[1, 2, 6, 8, 12, 13]
[2, 3, 7, 9, 13, 14]
[3, 4, 8, 10, 14, 15]
[4, 5, 9, 11, 15, 16]
[5, 10, 16, 17]
[6, 7, 13, 18]
[7, 8, 12, 14, 18, 19]
[8, 9, 13, 15, 19, 20]
[9, 10, 14, 16, 20, 21]
[10, 11, 15, 17, 21, 22]
[11, 16, 22, 23]
[12, 13, 19, 24]
[13, 14, 18, 20, 24, 25]
[14, 15, 19, 21, 25, 26]
[15, 16, 20, 22, 26, 27]
[16, 17, 21, 23, 27, 28]
[17, 22, 28, 29]
[18, 19, 25, 30]
[19, 20, 24, 26, 30, 31]
[20, 21, 25, 27, 31, 32]
[21, 22, 26, 28, 32, 33]
[22, 23, 27, 29, 33, 34]
[23, 28, 34, 35]
[24, 25, 31, 36]
[25, 26, 30, 32, 36, 37]
[26, 27, 31, 33, 37, 38]
[27, 28, 32, 34, 38, 39]
[28, 29, 33, 35, 39, 40]
[29, 34, 40, 41]
[30, 31, 37, 42]
[31, 32, 36, 38, 42, 43]
[32, 33, 37, 39, 43, 44]
[33, 34, 38, 40, 44, 45]
[34, 35, 39, 41, 45, 46]
[35, 40, 46, 47]
[36, 37, 43]
[37, 38, 42, 44]
[38, 39, 43, 45]
[39, 40, 44, 46]
[40, 41, 45, 47]
[41, 46]


In [55]:
cpi_lst = cpi.split(", ")

# for each core, the numbers of jobs mapped on this core
core_jobs = [[] for _ in range(m)] 
for i in range(n):
    core_jobs[sol_lst[i]].append(i)

for i in range(m): # for each core
    print(('0' if i < 10 else '')+str(i) + ": ", end="") # print core number
    
    cpi_idxs = core_jobs[i] # list of job idxs on this core
    assert len(cpi_idxs) == n/m # 6
    
    total = 0
    for idx in neigh_idxs[i]:
        total += float(cpi_lst[idx])
    neigh_avg = total / (float)(len(neigh_idxs[i]))
        
    total = 0
    min_cpi = float(cpi_lst[0])
    for idx in cpi_idxs: # for each job idx on this core
        cur_cpi = float(cpi_lst[idx])
        min_cpi = min(min_cpi, cur_cpi)
        total += cur_cpi
        print(cpi_lst[idx] + ('0' * (8 - len(cpi_lst[idx]))), end=", ")
        
    print("(avg={:8.6f}, min={:8.6f}, neigh={:8.6f})".format(total / 6.0, min_cpi, neigh_avg))

00: 0.500000, 0.896481, 0.500000, 2.142660, 0.779448, 0.653012, (avg=0.911934, min=0.500000, neigh=2.420590)
01: 3.005530, 1.451600, 2.101520, 0.500000, 1.444350, 0.917113, (avg=1.570019, min=0.500000, neigh=2.225650)
02: 1.178100, 0.911030, 2.270200, 1.501400, 2.214300, 2.205120, (avg=1.713358, min=0.911030, neigh=2.357438)
03: 1.537860, 1.799310, 0.899128, 0.817779, 0.732482, 0.617474, (avg=1.067339, min=0.617474, neigh=1.788155)
04: 1.323270, 1.835150, 2.334760, 1.054050, 3.224610, 0.509979, (avg=1.713637, min=0.509979, neigh=1.660322)
05: 2.619680, 1.254840, 2.690010, 5.499210, 0.500000, 1.331590, (avg=2.315888, min=0.500000, neigh=1.908073)
06: 2.334890, 0.964015, 2.483130, 0.713665, 1.633940, 1.659520, (avg=1.631527, min=0.713665, neigh=2.316640)
07: 1.980320, 5.512690, 2.138800, 2.161600, 2.306450, 2.092480, (avg=2.698723, min=1.980320, neigh=2.808528)
08: 1.878710, 4.039730, 4.526440, 1.062100, 4.196510, 1.216800, (avg=2.820048, min=1.062100, neigh=2.414002)
09: 1.953950, 10.04

In [59]:
res = """
min cpi = 0.5
max cpi = 21.1907

avg cpi map:
	0.91193, 1.57002, 1.71336, 1.06734, 1.71364, 2.31589
	1.63153, 2.69872, 2.82005, 3.63384, 4.09801, 1.46403
	0.90800, 4.06325, 4.20127, 4.62407, 2.75180, 1.25637
	0.86878, 4.70876, 3.95169, 5.77777, 4.48564, 1.87435
	1.85356, 5.46415, 4.20439, 4.50503, 3.60117, 1.34399
	1.67057, 3.60128, 7.94994, 4.18464, 4.02550, 0.80922
	1.96845, 6.57037, 2.75949, 3.84514, 3.64512, 1.07450
	1.07511, 4.53770, 3.32360, 2.54902, 3.41280, 0.91516
	
min cpi:
	0.50000, 0.50000, 0.91103, 0.61747, 0.50998, 0.50000
	0.71366, 1.98032, 1.06210, 1.95395, 2.09786, 1.21575
	0.50000, 2.11795, 2.19001, 1.88191, 2.31026, 0.50000
	0.50000, 1.70830, 1.84052, 1.62044, 1.89826, 0.63135
	0.50000, 1.39503, 1.98405, 1.66125, 2.57248, 0.50000
	0.50000, 2.35171, 2.49331, 2.17840, 1.81811, 0.50895
	1.40994, 0.50000, 2.21138, 2.56038, 1.96670, 0.55368
	0.50000, 1.10261, 1.59411, 2.29096, 1.20222, 0.50000
	
neighbor avg cpi:
	1.60078, 1.73889, 2.03903, 2.47022, 2.77877, 2.42523
	1.52217, 2.11770, 2.89630, 3.08740, 2.75054, 2.60552
	2.31557, 2.70093, 3.96694, 4.06906, 3.61765, 2.64396
	2.88339, 3.40045, 4.83007, 4.08710, 3.29441, 2.67179
	3.17806, 3.33171, 5.20831, 5.03392, 3.40319, 2.57756
	3.22186, 4.63798, 4.30420, 4.44771, 2.90488, 2.52228
	3.22933, 3.64866, 5.06857, 3.41456, 2.61936, 2.19558
	4.35884, 3.43214, 3.42284, 3.55667, 2.04595, 2.24365
	
other avg cpi:
	3.10859, 3.15057, 3.11931, 3.09423, 3.05050, 3.05474
	3.16930, 3.13647, 3.01957, 2.97175, 3.00973, 3.07242
	3.11232, 3.01783, 2.82920, 2.80394, 2.91567, 3.07367
	3.06041, 2.89972, 2.70897, 2.77316, 2.92068, 3.05671
	3.01010, 2.89136, 2.64746, 2.66564, 2.92633, 3.07781
	3.01028, 2.74563, 2.68841, 2.75925, 2.98891, 3.09539
	3.00266, 2.81799, 2.70315, 2.91872, 3.03997, 3.11961
	2.95110, 2.92404, 2.95314, 2.95871, 3.07915, 3.07995
	***********************
efficiency:
	0.66546, 0.66606, 0.66979, 0.66599, 0.29493, 0.68024
	0.66401, 0.66818, 0.66814, 0.67021, 0.66506, 0.66727
	-0.04900, 0.66852, 0.66947, -0.99980, 0.66660, 0.66765
	0.69087, 0.66327, 0.67032, 0.66495, 0.67048, 0.66726
	0.66590, 0.66927, 0.63690, 0.80415, 0.67002, 0.66619
	0.66941, 0.66928, 0.66785, 0.66425, 0.67060, 0.66642
	0.66702, 0.67027, -0.99779, 0.67304, 0.66845, 0.66708
	0.97427, 0.66263, 0.67009, 0.66475, 0.66771, 0.66098
	
worst-case core efficiency: -0.999800686676253
"""

todo:
 - fix neigh (not avg of cpi, but avg of avgcpi, ,so use dt to store them, instead of just print)
 - add other
 - add forward propagation