In [1]:
# Import
from __future__ import absolute_import, division, print_function

# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras

# Helper libraries
import numpy as np
import matplotlib.pyplot as plt

print(tf.__version__)
tf.enable_eager_execution()

1.13.1


In [2]:
# Load Data 
filename = 'C:\\Users\\deniz\\Desktop\\Thesis of ML for AutoPas\\Data\\Batch3\\b3e1-6.txt'
arr = np.genfromtxt(filename, delimiter=',')
print(arr.shape)

(878, 30)


In [3]:
# Define how many parameters you have (it is assumed that the rest are classes)
parameter_count = 7

In [4]:
# Copy the array
data = arr

# Shuffle data and take 80% as test data
np.random.shuffle(data)
test_size = (np.ceil(data.shape[0] * 4 / 5)).astype(int)
train_params = data[test_size:,0:parameter_count]
train_labels = np.argmin(data[test_size:,parameter_count:], 1).astype(int)
test_params = data[:test_size,0:parameter_count]
test_labels = np.argmin(data[:test_size,parameter_count:], 1).astype(int)


# Posible normalization functions
def normalize01(array):
    divisor = np.max(array)
    for i in range(array.size):
        array[i] = array[i] / divisor
        
def normalize02(array):
    divisor = np.max(array)
    sub = np.min(array)
    for i in range(array.size):
        array[i] = (array[i] - sub) / divisor
        
# Normalize the data
np.apply_along_axis(normalize02, 1, train_params)
np.apply_along_axis(normalize02, 1, test_params)
print(train_params[0])

[2.44140625e-05 7.32421875e-04 0.00000000e+00 0.00000000e+00
 2.44140625e-04 1.00000000e+00 3.70371094e-02]


In [5]:
# Check how the total data is distributed among the labels
dist = np.bincount(np.concatenate((train_labels, test_labels)))
print(dist)
print(np.argmax(dist))
print(np.max(dist) / data.shape[0])

[281   9  54   0  33   0   0  43   0   0   0   0   5   3   0   0   0   1
   0   0 409   0  40]
20
0.46583143507972663


In [6]:
# Model
model = keras.Sequential([
    #keras.layers.Dense(parameter_count, activation=tf.nn.relu, input_dim=7),
    keras.layers.Dense(parameter_count, activation=tf.nn.relu),
    keras.layers.Dense(arr.shape[1] - parameter_count, activation=tf.nn.softmax)
])

opt = keras.optimizers.Adam(lr=0.25)

model.compile(optimizer=opt, 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [7]:
print(arr.shape[1] - parameter_count)

23


In [8]:
# Fun
model.fit(train_params, train_labels , epochs=50)

Instructions for updating:
Colocations handled automatically by placer.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x20ead6fd400>

In [9]:
predictions = model.predict(test_params)
test_loss, test_acc = model.evaluate(test_params, test_labels)
print('Test accuracy:', test_acc)

Test accuracy: 0.5078236


In [10]:
# Print prediction, result, and how certain the result is
best = np.argsort(predictions)
print(' Best  Guess1  Guess2  Certainty1      Certainty2')
for i in range(test_size):
    print(' ', test_labels[i], '\t', best[i][-1], '\t', best[i][-2], '\t', \
          "{:.2%}".format(predictions[i][best[i][-1]]), '\t', "{:.2%}".format(predictions[i][best[i][-2]]))

 Best  Guess1  Guess2  Certainty1      Certainty2
  0 	 20 	 0 	 48.67% 	 47.93%
  20 	 20 	 2 	 44.23% 	 18.07%
  20 	 20 	 0 	 49.35% 	 15.56%
  20 	 0 	 20 	 48.34% 	 47.94%
  20 	 20 	 0 	 48.48% 	 48.13%
  0 	 0 	 20 	 48.21% 	 46.82%
  20 	 0 	 20 	 48.33% 	 48.22%
  0 	 0 	 20 	 48.24% 	 46.96%
  20 	 20 	 2 	 44.52% 	 17.82%
  2 	 20 	 2 	 43.78% 	 18.44%
  20 	 20 	 0 	 53.60% 	 19.41%
  20 	 0 	 20 	 48.24% 	 46.96%
  20 	 20 	 0 	 48.70% 	 47.90%
  0 	 20 	 0 	 48.74% 	 47.86%
  20 	 20 	 2 	 47.34% 	 15.51%
  20 	 20 	 2 	 44.54% 	 17.80%
  0 	 20 	 0 	 55.25% 	 21.73%
  0 	 0 	 20 	 48.22% 	 46.90%
  7 	 22 	 13 	 84.80% 	 8.95%
  20 	 20 	 0 	 49.86% 	 15.98%
  0 	 0 	 20 	 48.12% 	 46.51%
  1 	 20 	 2 	 43.46% 	 18.69%
  20 	 20 	 2 	 44.66% 	 17.70%
  0 	 0 	 20 	 48.22% 	 46.88%
  20 	 20 	 0 	 48.57% 	 48.03%
  22 	 22 	 13 	 84.80% 	 8.95%
  0 	 20 	 0 	 48.59% 	 48.03%
  20 	 20 	 0 	 53.11% 	 18.83%
  0 	 20 	 0 	 53.25% 	 19.02%
  0 	 20 	 0 	 48.65% 	 47.96%
  20

  0 	 20 	 0 	 48.37% 	 48.26%
  20 	 0 	 20 	 48.10% 	 46.44%
  20 	 0 	 20 	 48.33% 	 48.15%
  20 	 0 	 20 	 48.34% 	 47.96%
  20 	 0 	 20 	 48.33% 	 47.60%
  20 	 0 	 20 	 48.34% 	 48.07%
  4 	 20 	 2 	 46.85% 	 15.90%
  20 	 0 	 20 	 48.24% 	 46.94%
  0 	 20 	 0 	 57.52% 	 26.67%
  20 	 20 	 0 	 55.10% 	 21.80%
  20 	 20 	 0 	 53.18% 	 18.92%
  0 	 0 	 20 	 48.23% 	 46.91%
  22 	 22 	 13 	 84.80% 	 8.95%
  12 	 20 	 2 	 43.07% 	 19.01%
  0 	 20 	 2 	 45.81% 	 16.71%
  4 	 20 	 2 	 46.53% 	 16.17%
  0 	 0 	 20 	 48.24% 	 46.95%
  20 	 20 	 2 	 45.11% 	 17.31%
  0 	 20 	 0 	 48.66% 	 47.96%
  20 	 20 	 0 	 53.77% 	 19.65%
  0 	 20 	 0 	 48.75% 	 47.85%
  20 	 20 	 0 	 53.20% 	 18.93%
  2 	 20 	 2 	 44.19% 	 18.10%
  20 	 20 	 0 	 53.17% 	 18.91%
  20 	 0 	 20 	 48.22% 	 46.88%
  20 	 20 	 2 	 44.47% 	 17.86%
  20 	 20 	 0 	 53.09% 	 18.81%
  22 	 22 	 13 	 84.80% 	 8.95%
  7 	 20 	 2 	 44.09% 	 18.18%
  0 	 20 	 2 	 46.24% 	 16.34%
  2 	 20 	 2 	 44.34% 	 17.98%
  0 	 0 	 20 	 48.23%

  20 	 20 	 0 	 54.06% 	 19.98%
  22 	 22 	 13 	 84.80% 	 8.95%
  20 	 20 	 0 	 53.17% 	 18.91%
  20 	 0 	 20 	 48.20% 	 46.80%
  0 	 0 	 20 	 48.32% 	 48.28%
  2 	 20 	 2 	 44.13% 	 18.15%
  4 	 20 	 2 	 45.08% 	 17.37%
  20 	 20 	 0 	 54.28% 	 20.28%
  2 	 20 	 2 	 44.18% 	 18.11%
  20 	 20 	 0 	 48.67% 	 47.94%
  20 	 20 	 2 	 44.30% 	 18.00%
  0 	 20 	 0 	 57.52% 	 26.62%
  2 	 20 	 2 	 44.01% 	 18.26%
  0 	 20 	 0 	 48.33% 	 48.32%
  0 	 0 	 20 	 48.32% 	 48.31%
  20 	 20 	 0 	 53.12% 	 18.85%
  0 	 20 	 0 	 48.40% 	 48.25%
  0 	 20 	 0 	 48.76% 	 47.85%
  20 	 20 	 0 	 49.95% 	 16.06%
  0 	 20 	 0 	 53.18% 	 18.93%
  0 	 20 	 0 	 48.76% 	 47.84%
  7 	 20 	 2 	 45.92% 	 16.69%
  20 	 20 	 0 	 54.93% 	 21.22%
  4 	 20 	 2 	 44.63% 	 17.75%
  22 	 22 	 13 	 84.80% 	 8.95%
  20 	 20 	 2 	 44.63% 	 17.73%
  20 	 0 	 20 	 48.22% 	 46.87%
  20 	 20 	 2 	 44.34% 	 17.97%
  22 	 22 	 13 	 84.80% 	 8.95%
  0 	 0 	 20 	 48.24% 	 46.95%
  20 	 20 	 2 	 46.97% 	 15.83%
  4 	 20 	 2 	 44.84% 	

In [11]:
# Print general statistics about in how many guesses the AI would be correct
correct = np.zeros(best.shape[1])
most_occuring = np.sort(np.bincount(test_labels))[::-1]
for i in range(test_size):
    for j in range(correct.size):
        if best[i][-j-1] == test_labels[i]:
            correct[j] = correct[j] + 1
            break
np.set_printoptions(precision=3)
print('The count of guesses until correct choice:', correct.astype(int))
print('Cumilative chance that the choice was correct by:', \
      np.apply_along_axis(lambda x: x / test_size, 0, np.cumsum(correct))[0:5])
print('The count of most occuring tests:', most_occuring)
print('Cumilative chance that the choice was correct by:', \
      np.apply_along_axis(lambda x: x / test_size, 0, np.cumsum(most_occuring))[0:5])

The count of guesses until correct choice: [357 256  40   8  29   4   8   0   0   0   0   1   0   0   0   0   0   0
   0   0   0   0   0]
Cumilative chance that the choice was correct by: [0.508 0.872 0.929 0.94  0.982]
The count of most occuring tests: [333 220  42  33  32  28   8   4   2   1   0   0   0   0   0   0   0   0
   0   0   0   0   0]
Cumilative chance that the choice was correct by: [0.474 0.787 0.846 0.893 0.939]


In [66]:
# Display relative timing of all experiments, and print the ones which are relatively close
timings = np.apply_along_axis(lambda x: np.sort(x), 1, arr[:, parameter_count:])
for i in range(timings.shape[0]):
    fastest = timings[i][0]
    for j in range(timings.shape[1]):
        timings[i][j] = timings[i][j] / fastest

for i in range(3):
    print(timings[i])
    
print('The average:', np.average(timings, 0))

count = 0
for i in range(timings.shape[0]):
    if timings[i][1] < 1.01:
        count = count + 1
print(count, 'second best results from', timings.shape[0], 'are within 1 percent speed difference')

count = 0
for i in range(timings.shape[0]):
    for j in range(1, timings.shape[1]):
        if timings[i][j] < 1.01:
            count = count + 1
print(count, 'non best results from', timings.shape[0], 'are within 1 percent speed difference')


count = 0
for i in range(timings.shape[0]):
    if timings[i][1] < 1.05:
        count = count + 1
print(count, 'second best results from', timings.shape[0], 'are within 5 percent speed difference')

[ 1.     2.578  4.406  5.26   5.638  5.916  7.789  8.758 11.277 15.075
 15.372 15.449 20.166 20.265 20.404 20.747 26.301 29.122 30.071 38.377
 39.047 41.726 41.981]
[ 1.     1.414  2.818  3.115  3.554  5.004  5.472  5.536  5.59   6.23
  6.912  8.48   8.691  9.654  9.97  12.202 12.32  14.212 15.444 21.156
 22.543 24.691 38.048]
[ 1.     1.101  1.83   1.873  2.461  2.908  3.708  3.71   3.739  4.221
  4.82   4.83   7.33   7.486  8.574  9.997 10.109 11.325 15.863 19.491
 20.405 24.777 41.972]
The average: [ 1.     1.435  2.41   3.08   3.687  4.098  4.695  5.136  5.71   6.347
  7.047  7.625  8.293  9.144 10.107 11.193 13.002 15.33  17.725 21.235
 25.066 30.43  41.326]
30 second best results from 878 are within 1 percent speed difference
34 non best results from 878 are within 1 percent speed difference
100 second best results from 878 are within 5 percent speed difference


In [49]:
# Count the first guesses that were relatively quick
test_timings = data[:test_size,parameter_count:]
for i in range(test_timings.shape[0]):
    fastest = np.min(test_timings[i])
    for j in range(test_timings.shape[1]):
        test_timings[i][j] = test_timings[i][j] / fastest
        
count = 0
for i in range(test_size):
    if test_timings[i][best[i][-1]] < 1.01:
        count = count + 1
print(count, 'first guesses from', test_size, 'are within 1 percent speed difference')

count = 0
for i in range(test_size):
    if test_timings[i][best[i][-1]] < 1.05:
        count = count + 1
print(count, 'first guesses from', test_size, 'are within 5 percent speed difference')

count = 0
for i in range(test_size):
    if test_timings[i][best[i][-1]] < 1.01 or test_timings[i][best[i][-2]] < 1.01:
        count = count + 1
print(count, 'of first two guesses from', test_size, 'are within 1 percent speed difference')

for i in r

365 first guesses from 703 are within 1 percent speed difference
381 first guesses from 703 are within 5 percent speed difference
615 of first two guesses from 703 are within 1 percent speed difference


In [14]:
import pydot
tf.keras.utils.plot_model(
    model,
    to_file='model.png',
    show_shapes=False,
    show_layer_names=True,
    rankdir='TB'
)

ImportError: Failed to import pydot. You must install pydot and graphviz for `pydotprint` to work.