# Data preprocessing for WDP<sub>bal</sub> model
[(original data)](https://bitbucket.org/m_lombardi/eml-aij-2015-resources/src/5c9a3b409ed057a4b86ad34605786fb476190b32/results/scc_ann1_logs/wld_warm_div20-0-101-ls.txt?at=master&fileviewer=file-view-default)

## 1. Neighbour cores
1) parse **neigh.txt** to a list of lists of neighbour core numbers

In [1]:
with open("neigh.txt", 'r') as f:
    lines = f.readlines()

neighs = [line.split(',')[0].split(': ')[1].split() for line in lines]

# "neighbors of 0: 1 6, number of others: 45\n" --- line[i]
# "neighbors of 0: 1 6"                                         --- .split(',')[0]
# "1 6"                                                                  --- .split(':')[1]
# ['1', '6']                                                              --- neighs[i]

2) construct neighbour lookup function `N(k)` in **neigh.mzn**

In [2]:
ans = "set of int: N(int: k) = \n"

for i in range(len(neighs)):
    ns = ""  # neighbour core numbers
    for n in neighs[i]:
        ns += str(int(n) + 1) + ', '
    ns = ns[:-2]  # trim ',' and ' '
    
    if i == len(neighs) - 1:  # else clause
        ans += "else {{ {} }}\n".format(ns)
        continue
    
    if i != 0:  # add else to elseif cluse
        ans += "else"
    
    ans += "if k == {0} then {{ {1} }}\n".format(i + 1, ns)

ans += "endif;"

with open("neigh.mzn", 'w') as f:
    f.write(ans)

## 2. Solution
1) parse **sol.txt** to a list of integers representing which core to map to

In [3]:
n = 288
m = 48

with open("sol.txt", 'r') as f:
    line = f.readlines() # ["0->16, 1->38, 2->6, ..."]  (only one line))
    
lst = line[0].split(", ") # ["0->16", "1->38", "2->6", ...]

sol_lst = list(map(lambda x : int(x.split("->")[1]) , lst))  # [16, 38, 6, ...]

2) construct data file **data.dzn** and write 2d mapping array `x` to the file

In [4]:
# sol[i][j] is true iif job i is mapped to core j
sol = [["true" if sol_lst[i] == j else "false" for j in range(m)] for i in range(n)]

ans = "x = [|"

for i in range(n):
    for j in range(m):
        ans += sol[i][j].replace("'", '') + ', '  # remove string single quote
    ans += "\n|"

ans = ans[:-1]  # remove the last '|'
ans += "|];\n\n"

with open("data.dzn", 'w') as f:
    f.write(ans)

## 3. CPI values
read cpi values from **cpi.txt** and append the 1d array `cpi` to **data.dzn**

In [5]:
with open("cpi.txt", 'r') as f:
    line = f.readlines() # (only one line)

cpi = line[0].rstrip() # remove '\n'
ans = "cpi = [ {} ];\n\n".format(cpi)

with open("data.dzn", 'a') as f:
    f.write(ans)

## 3. Neural network
<img src="ann1_layout.png" alt="ann1_layout" style="width:300px;" align="left"/>

1) read from **ann1.txt** and split `theta1` and `theat2` as two Pandas DataFrames

In [6]:
import pandas as pd

In [7]:
ann = pd.read_csv("ann1.txt", sep=",", header=None)

ann.columns = ["# of inputs", "# of hidden neurons", "# of output neurons",
                          "x1-a1", "x2-a1", "x3-a1", "x4-a1", "bias-a1", 
                          "x1-a2", "x2-a2", "x3-a2", "x4-a2", "bias-a2", 
                          "a1-h", "a2-h", "bias-h"]

ann[:1]

Unnamed: 0,# of inputs,# of hidden neurons,# of output neurons,x1-a1,x2-a1,x3-a1,x4-a1,bias-a1,x1-a2,x2-a2,x3-a2,x4-a2,bias-a2,a1-h,a2-h,bias-h
0,4,2,1,6.45294,0.017452,1.58234,2.29305,2.12057,3.95113,12.7206,-19.3278,5.58789,9.38298,0.003064,0.003539,0.795949


In [8]:
# remove the first three columns
ann = ann.iloc[:,3:]

# split theta1 and theta2
theta1 = ann.iloc[:,:10]
theta2 = ann.iloc[:,10:]

In [9]:
theta1[47:]

Unnamed: 0,x1-a1,x2-a1,x3-a1,x4-a1,bias-a1,x1-a2,x2-a2,x3-a2,x4-a2,bias-a2
47,1.76683,-0.336959,-0.116305,-0.069643,-1.62311,2.51027,5.65249,0.180271,0.065705,7.97995


In [10]:
theta2[47:]

Unnamed: 0,a1-h,a2-h,bias-h
47,-0.050054,0.457869,0.308072


2) write `theta1` and `theta2` to **data.dzn** file

In [11]:
# flat theta1 for minizinc array3d constructor
theta1_1d = theta1.values.flatten().tolist()  # (a list of float)

ans = "theta1 = array3d(1..m,aRange,xRangeExd, [\n"
for val in theta1_1d:
    ans += "{:8.6f}".format(val) + ', '
ans = ans[:-2] # remove the last ',' and ' '
ans += "]);\n\n"

with open("data.dzn", 'a') as f:
    f.write(ans)

In [12]:
# convert from df to python 2d list
theta2_2d = theta2.values.tolist()

ans = "theta2 = \n["
for lst in theta2_2d:
    ans += "| "
    for val in lst:
        ans += "{:8.6f}".format(val) + ', '
    ans = ans[:-2] # remove the last ',' and ' '
    ans += '\n'
ans = ans[:-1] # remove the last '\n'
ans += "|];\n\n"

with open("data.dzn", 'a') as f:
    f.write(ans)

write constants to **data.dzn**

In [13]:
constants = "n = 288; \nm = 48; \nh = 8; \ninput_layer_size = 4; \nhidden_layer_size = 2; \n\n"

with open("data.dzn", 'a') as f:
    f.write(constants)

write `avgcpi` to **data.dzn**

In [14]:
with open("avgcpi.txt", 'r') as f:
    lines = f.readlines()

ans = "avgcpi = ["
for line in lines:
    vals = line.rstrip().split('\t')
    for val in vals:
        ans += val + ', '
ans = ans[:-2]
ans += "];"

with open("data.dzn", 'a') as f:
    f.write(ans)

In [24]:
with open("cpi.txt", 'r') as f:
    line = f.readlines() # (only one line)

cpi = line[0].rstrip() # remove '\n'

lst = cpi.split(' ')

ans = "cpi = [ "
for i in range(168):
    ans += lst[i]

ans += "];"

ans

'cpi = [ 2.32528,2.69661,2.33489,2.61968,1.39219,0.631348,2.14457,2.09786,2.0156,1.40994,1.98032,2.35171,2.14681,5.51269,0.508952,0.535891,0.5,2.79566,3.22107,2.76887,0.692256,1.46167,0.850599,3.00553,2.23892,1.87871,1.2304,2.62408,1.53786,2.49003,2.48813,1.34152,1.20766,2.56005,2.63035,1.77229,2.66158,1.90349,1.20222,0.5,2.9669,2.76581,1.98381,1.59411,2.21138,1.7083,2.1388,2.3636,2.19001,0.5,4.03973,1.23369,3.32551,0.711065,3.24643,1.1781,4.52644,1.21575,0.964015,0.869259,0.938447,3.922,0.843693,1.07256,1.25476,3.96854,1.39503,3.56897,0.5,2.38786,2.90278,1.24536,2.93462,0.896481,1.06807,2.45856,1.87207,2.7702,1.69628,2.78384,1.12446,0.5,4.4353,1.46012,2.38702,1.72511,2.41288,0.580633,2.60767,2.28668,3.58483,1.62044,2.45795,1.4516,1.25484,1.63034,0.91103,2.63715,3.19185,2.24391,1.0621,1.95395,1.32327,1.66125,1.74398,2.48313,2.38041,2.40796,2.4193,1.97648,3.89831,1.45677,1.43626,0.812887,1.06923,2.69108,2.69001,2.40738,0.5,2.6423,2.42072,2.2702,2.33269,1.11317,2.14266,1.72055,1.81811,2.