In [None]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import random

from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Dense, Normalization
from tensorflow.keras.models import Model
from tensorflow.keras import utils, callbacks
from tensorflow.keras import regularizers

import sys
!pip install pyyaml h5py  # Required to save models in HDF5 format

# Set random seed for reproducibility
rs = 42
np.random.seed(rs)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
files = [
  "kroB150_data.csv",
  "si175_data.csv",
  "rat195_data.csv",
  "gr229_data.csv",
  "pr299_data.csv",
  "gr431_data.csv",
  "d493_data.csv",
  "att532_data.csv",
  "si535_data.csv",
  "rat575_data.csv",
  "gr666_data.csv",
  "pr1002_data.csv",
  "u1060_data.csv",
  "vm1084_data.csv",
  "pcb1173_data.csv",
  "rl1304_data.csv",
  "rl1323_data.csv",
  "nrw1379_data.csv",
  "vm1748_data.csv",
  "u1817_data.csv",
  "rl1889_data.csv",
  #"d2103_data.csv",
  "u2152_data.csv",
  "pcb3038_data.csv",
  "fnl4461_data.csv"
]

In [None]:
def preproces(data):

  too_much_candidates = 0

  am = np.sum(np.isnan([data['Alpha']]))
  print(am)

  xs = -1*np.ones((am, 16), dtype=np.float64)
  ys = -1*np.ones((am, 8), dtype=np.float64) # could be a problem, maybe zeros instead?

  i = 0
  k = 0
  size = len(data)

  while i < size:

      start = i
      # indexing second element of i-th row of data(frame)
      # any because isnan returns list or something
      while not np.isnan(data.iloc[[i]]['Alpha']).any():
        i += 1

      # Use idxs to randomize the order in x
      # Note that the order is still [cost, alpha, cost, alpha, ...]

      for j in range(min(5,i-start)):
        xs[k,j*2]   = data.iloc[start + j]['Cost']
        xs[k,j*2+1] = data.iloc[start + j]['Alpha']
        ys[k,j]     = data.iloc[start + j]['Q-Value']

      max_cost  = np.max(xs[k,[0,2,4,6,8,10,12,14]])
      max_alpha = np.max(xs[k,[1,3,5,7,9,11,13,15]])

      if i - start > 8:
        too_much_candidates +=1

      # If our input/output pair is not made up out of 5 candidates,
      # the remainig posts will have a high costs, high alpha and low q-value
      for j in [0,2,4,6,8,10,12,14]:
        if xs[k,j] == -1:
          xs[k,j] = 0
      
      for j in [1,3,5,7,9,11,13,15]:
        if xs[k,j] == -1:
          xs[k,j] = 0

      for j in range(8):
        if ys[k,j] == -1:
          ys[k,j] = 0

      i += 1
      k += 1

  print("   ", too_much_candidates)
  assert k == am
  return xs, ys

In [None]:
path = "/content/drive/MyDrive/Unibo/comb_opt_project/input_nn/"

input = []

for i in range(0, len(files)):
  df = pd.read_csv(path + files[i])
  x, y = preproces(df)
  input.append((x,y))

input_copy = input.copy()

150
    0
175
    0
195
    0
229
    0
299
    0
431
    0
493
    0
532
    0
535
    0
575
    0
666
    0
1002
    0
1060
    0
1084
    0
1173
    0
1304
    0
1323
    0
1379
    0
1748
    0
1817
    0
1889
    0
2152
    0
3038
    0
4461
    0


In [None]:
input = input_copy.copy()

In [None]:
# sometimes there are massive q-values in the input, these then get stored as inf values
# Here we we set them to almost float max
for i in range(len(input)):
  x, y = input[i]
  y = np.where(np.isinf(y) , 1.6976931348623157e+308, y)
  input[i] = (x,y)

In [None]:
# and then divide the array by the biggest value.
# Results in a range of [0,1]

print(len(input))

for i in range(len(input)):

  x, y = input[i]


  mins = x[:,[0,2,4,6,8,10,12,14]].min(axis=1)
  length = mins.shape[0]
  maxs = (x[:,[0,2,4,6,8,10,12,14]] -  mins.reshape(length,1)).max(axis=1)
  x[:,[0,2,4,6,8,10,12,14]] = (x[:,[0,2,4,6,8,10,12,14]] - mins.reshape((length,1))) / maxs.reshape((length,1))

  mins = x[:,[1,3,5,7,9,11,13,15]].min(axis=1)
  maxs = (x[:,[1,3,5,7,9,11,13,15]] -  mins.reshape(length,1)).max(axis=1)
  x[:,[1,3,5,7,9,11,13,15]] = (x[:,[1,3,5,7,9,11,13,15]] - mins.reshape((length,1))) / maxs.reshape((length,1))

  # For the Q-values we do the same as for the cost values, but the q-value can also be negative
  # So we make sure every value is >= 0 by adding the minimum value to all
  # Then we repeat the above process

  mins = y[:,:].min(axis=1)
  y -= mins.reshape(length,1)
  mins = y[:,:].min(axis=1)
  maxs = (y[:,:] -  mins.reshape(length,1)).max(axis=1)

  y[:,:] = (y[:,:] - mins.reshape((length,1))) / maxs.reshape((length,1))


  # If all entries in x[even] are nan, we have divided by zero because all costs were equal (to zero)
  for j in range(length):
    if np.isnan(x[j,[0,2,4,6,8,10,12,14]]).all():
      x[j,[0,2,4,6,8,10,12,14]] = 0

  # If all entries in x[odd] are nan, we have divided by zero because all alphas were equal (to zero)
  for j in range(length):
    if np.isnan(x[j,[1,3,5,7,9,11,13,15]]).all():
      x[j,[1,3,5,7,9,11,13,15]] = 0

  # If all entries in y are nan, we have divided by zero because all values were equal (to infinity)
  for j in range(length):
    if np.isnan(y[j,:]).all():
      y[j,:] = 1

  print(i)
  input[i] = (x,y)

24
0
1
2
3
4
5
6
7
8
9
10
11
12


  x[:,[1,3,5,7,9,11,13,15]] = (x[:,[1,3,5,7,9,11,13,15]] - mins.reshape((length,1))) / maxs.reshape((length,1))


13
14
15
16
17
18
19
20
21
22
23


In [None]:
for i in range(len(input)):

  x, y = input[i]

  for i in range(x.shape[0]):
    for j in range(16):
      if np.isnan(x[i,j]) or np.isinf(x[i,j]):
        print(x[i,:])
        print(y[i,:])
        print(max(y[i,:]))
        print(i)
        assert False
    for j in range(8):
      if np.isnan(y[i,j]) or np.isinf(y[i,j]):
        print(x[i,:])
        print(y[i,:])
        print(max(y[i,:]))
        print(i)
        assert False

In [None]:
xin = Input(shape=(16,))
x = Dense(100, activation='relu', kernel_regularizer=regularizers.L2(0.000001))(xin)
x = Dense(200, activation='relu', kernel_regularizer=regularizers.L2(0.000001))(x)
x = Dense(300, activation='relu', kernel_regularizer=regularizers.L2(0.000001))(x)
x = Dense(400, activation='relu', kernel_regularizer=regularizers.L2(0.000001))(x)
x = Dense(600, activation='relu', kernel_regularizer=regularizers.L2(0.000001))(x)
x = Dense(400, activation='relu', kernel_regularizer=regularizers.L2(0.000001))(x)
x = Dense(300, activation='relu', kernel_regularizer=regularizers.L2(0.000001))(x)
x = Dense(200, activation='relu', kernel_regularizer=regularizers.L2(0.000001))(x)
x = Dense(100, activation='relu', kernel_regularizer=regularizers.L2(0.000001))(x)
out = Dense(8, activation='sigmoid')(x)
# with high alpha and cost, low q: train 0.1, test 0.2, 5 files, 100 epochs
# with low alpha, cos and q: 0.0778 train , 0.21 test , 5 files, 100 epochs
# bigger model? :) nope

model = Model(inputs=xin,outputs=out)

In [None]:
model.summary()
model.compile(optimizer='adam',loss='mse',metrics=['mse'])

checkpoint_filepath = '/content/drive/MyDrive/Unibo/comb_opt_project/Checkpoint/'
model_checkpoint_callback = callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='mse',
    save_best_only=True)

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 16)]              0         
                                                                 
 dense_8 (Dense)             (None, 100)               1700      
                                                                 
 dense_9 (Dense)             (None, 200)               20200     
                                                                 
 dense_10 (Dense)            (None, 300)               60300     
                                                                 
 dense_11 (Dense)            (None, 400)               120400    
                                                                 
 dense_12 (Dense)            (None, 600)               240600    
                                                                 
 dense_13 (Dense)            (None, 400)               2404

In [None]:
# add code to load from checkpoint
model.load_weights(checkpoint_filepath)

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f414005ba00>

In [None]:
output = []
for i in range(len(input)):
  output.append(model.predict(input[i][0]))



In [None]:
print(output)

[array([[0.4308196 , 0.3149786 , 0.36475253, ..., 0.5597905 , 0.5615306 ,
        0.6089149 ],
       [0.6201098 , 0.32097182, 0.35392857, ..., 0.41941312, 0.569819  ,
        0.5460937 ],
       [0.6785147 , 0.34389517, 0.37503335, ..., 0.39673948, 0.59588075,
        0.51632136],
       ...,
       [0.61399126, 0.32120666, 0.35236895, ..., 0.44078308, 0.5666465 ,
        0.53625643],
       [0.37891418, 0.36308673, 0.39700013, ..., 0.59815574, 0.5999956 ,
        0.6632221 ],
       [0.38985616, 0.7195515 , 0.40791005, ..., 0.41304457, 0.48540768,
        0.59850997]], dtype=float32), array([[0.37634695, 0.36178115, 0.39548725, ..., 0.5901088 , 0.6073873 ,
        0.6638131 ],
       [0.41745085, 0.32905266, 0.37013394, ..., 0.5722209 , 0.56023306,
        0.637476  ],
       [0.37139505, 0.35998538, 0.3914508 , ..., 0.5649713 , 0.62255275,
        0.6727863 ],
       ...,
       [0.37468246, 0.36152557, 0.39359987, ..., 0.5772541 , 0.6129784 ,
        0.67057294],
       [0.36754778

In [None]:
# We will have to glue input together with output
# We have to handle > 1 situations
path = "/content/drive/MyDrive/Unibo/comb_opt_project/out_nn/"
for i in range(len(files)):
  with open(path + files[i], 'w') as writefile:
    for j in range(len(output[i])):
      writefile.write(str(output[i][j][0]) + ',')
      writefile.write(str(output[i][j][1]) + ',')
      writefile.write(str(output[i][j][2]) + ',')
      writefile.write(str(output[i][j][3]) + ',')
      writefile.write(str(output[i][j][4]) + ',')
      writefile.write(str(output[i][j][5]) + ',')
      writefile.write(str(output[i][j][6]) + ',')
      writefile.write(str(output[i][j][7]) + '\n')
