In [1]:
# Import
from __future__ import absolute_import, division, print_function

# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras

# Helper libraries
import numpy as np
import matplotlib.pyplot as plt

print(tf.__version__)
tf.enable_eager_execution()

1.13.1


In [2]:
# Load Data 
filename = 'C:\\Users\\deniz\\Desktop\\Thesis of ML for AutoPas\\Data\\Batch5\\half5.txt'
arr = np.genfromtxt(filename, delimiter=',')
print(arr.shape)

(12487, 540)


In [3]:
# Define how many parameters you have (it is assumed that the rest are classes)
parameter_count = arr.shape[1] - 23
print(parameter_count)

517


In [408]:
# Copy the array
data = np.copy(arr)

# Shuffle data and take 25% as test data
np.random.shuffle(data)
test_size = (np.ceil(data.shape[0] * 99 / 100)).astype(int)
train_params = data[test_size:,4:516]
train_labels = np.argmin(data[test_size:,-23:-3], 1).astype(int)
test_params = data[:test_size,4:516]
test_labels = np.argmin(data[:test_size,-23:-3], 1).astype(int)

# Posible normalization functions
def normalize02(array):
    divisor = np.max(array) - np.min(array)
    sub = np.min(array)
    for i in range(array.size):
        array[i] = (array[i] - sub) / divisor
        
# Normalize the data along the other axis, because now the parameters of a picture are related to each other
np.apply_along_axis(normalize02, 0, train_params)
np.apply_along_axis(normalize02, 0, test_params)
print(train_params[0])

[0.    0.25  0.    0.028]


In [25]:
print(train_params[:25,0])

[1.         0.33333333 0.5       ]


In [33]:
# Check how the total data is distributed among the labels
dist = np.bincount(np.concatenate((train_labels, test_labels)))
print(dist)
print(np.argmax(dist))
print(np.max(dist) / data.shape[0])

[ 605   44  135    0  276    0    0  112    0   23    1  255   84  411
    0   34    0   63    0    0 9576    0  868]
20
0.766877552654761


In [8]:
# Model
model = keras.Sequential([
    #keras.layers.Dense(parameter_count, activation=tf.nn.relu),
    keras.layers.Dense(16, activation=tf.nn.relu, input_shape=(4,)),
    keras.layers.Dense(23, activation=tf.nn.softmax)
])

opt = keras.optimizers.Adam(lr=0.001)

model.compile(optimizer=opt, 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

Instructions for updating:
Colocations handled automatically by placer.


In [9]:
# Fun
model.fit(train_params, train_labels , epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x2accef388d0>

In [10]:
predictions = model.predict(test_params)
test_loss, test_acc = model.evaluate(test_params, test_labels)
print('Test accuracy:', test_acc)

Test accuracy: 0.7946829


In [11]:
# Print prediction, result, and how certain the result is
best = np.argsort(predictions)
print(' Best  Guess1  Guess2  Certainty1      Certainty2')
for i in range(20):
    print(' ', test_labels[i], '\t', best[i][-1], '\t', best[i][-2], '\t', \
          "{:.2%}".format(predictions[i][best[i][-1]]), '\t', "{:.2%}".format(predictions[i][best[i][-2]]))

 Best  Guess1  Guess2  Certainty1      Certainty2
  20 	 20 	 0 	 79.52% 	 7.94%
  20 	 20 	 22 	 78.13% 	 14.58%
  20 	 20 	 0 	 79.11% 	 13.01%
  20 	 20 	 0 	 85.90% 	 8.29%
  20 	 20 	 22 	 69.64% 	 22.33%
  20 	 20 	 22 	 75.66% 	 16.07%
  4 	 4 	 20 	 64.41% 	 16.79%
  20 	 20 	 22 	 93.24% 	 3.27%
  22 	 20 	 0 	 83.42% 	 9.78%
  20 	 20 	 22 	 74.11% 	 17.94%
  22 	 22 	 20 	 27.31% 	 26.57%
  22 	 20 	 22 	 83.46% 	 9.39%
  20 	 20 	 0 	 88.94% 	 7.61%
  20 	 20 	 7 	 90.59% 	 3.27%
  20 	 20 	 22 	 85.62% 	 8.08%
  20 	 20 	 22 	 78.98% 	 13.79%
  0 	 20 	 22 	 80.89% 	 10.16%
  0 	 4 	 20 	 62.80% 	 18.86%
  20 	 20 	 22 	 79.34% 	 13.44%
  22 	 13 	 20 	 32.47% 	 28.26%


In [12]:
# Print general statistics about in how many guesses the AI would be correct
correct = np.zeros(best.shape[1])
most_occuring = np.sort(np.bincount(test_labels))[::-1]
for i in range(test_size):
    for j in range(correct.size):
        if best[i][-j-1] == test_labels[i]:
            correct[j] = correct[j] + 1
            break
np.set_printoptions(precision=3)
print('The count of guesses until correct choice:', correct.astype(int))
print('Cumilative chance that the choice was correct by:', \
      np.apply_along_axis(lambda x: x / test_size, 0, np.cumsum(correct))[0:5])
print('The count of most occuring tests:', most_occuring)
print('Cumilative chance that the choice was correct by:', \
      np.apply_along_axis(lambda x: x / test_size, 0, np.cumsum(most_occuring))[0:5])

The count of guesses until correct choice: [2481  277  221   79   22   19   12    2    9    0    0    0    0    0
    0    0    0    0    0    0    0    0    0]
Cumilative chance that the choice was correct by: [0.795 0.883 0.954 0.98  0.987]
The count of most occuring tests: [2400  220  156  102   74   71   24   22   17   11   10   10    5    0
    0    0    0    0    0    0    0    0    0]
Cumilative chance that the choice was correct by: [0.769 0.839 0.889 0.922 0.946]


In [17]:
# Display relative timing of all experiments, and print the ones which are relatively close
timings = np.apply_along_axis(lambda x: np.sort(x), 1, arr[:, parameter_count:])
for i in range(timings.shape[0]):
    fastest = timings[i][0]
    for j in range(timings.shape[1]):
        timings[i][j] = timings[i][j] / fastest

for i in range(3):
    print(timings[i])
    
print('The average:', np.average(timings, 0))

count = 0
for i in range(timings.shape[0]):
    if timings[i][1] < 1.01:
        count = count + 1
print(count, 'second best results from', timings.shape[0], 'are within 1 percent speed difference')

count = 0
for i in range(timings.shape[0]):
    for j in range(1, timings.shape[1]):
        if timings[i][j] < 1.01:
            count = count + 1
print(count, 'non best results from', timings.shape[0], 'are within 1 percent speed difference')


count = 0
for i in range(timings.shape[0]):
    if timings[i][1] < 1.05:
        count = count + 1
print(count, 'second best results from', timings.shape[0], 'are within 5 percent speed difference')

  
  


[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan na

0 non best results from 12487 are within 1 percent speed difference
0 second best results from 12487 are within 5 percent speed difference


In [370]:
# Count the first guesses that were relatively quick
test_timings = data[:test_size,parameter_count:]
for i in range(test_timings.shape[0]):
    fastest = np.min(test_timings[i])
    for j in range(test_timings.shape[1]):
        test_timings[i][j] = test_timings[i][j] / fastest
        
count = 0
for i in range(test_size):
    if test_timings[i][best[i][-1]] < 1.01:
        count = count + 1
print(count, 'first guesses from', test_size, 'are within 1 percent speed difference')

count = 0
for i in range(test_size):
    if test_timings[i][best[i][-1]] < 1.05:
        count = count + 1
print(count, 'first guesses from', test_size, 'are within 5 percent speed difference')

count = 0
for i in range(test_size):
    if test_timings[i][best[i][-1]] < 1.01 or test_timings[i][best[i][-2]] < 1.01:
        count = count + 1
print(count, 'of first two guesses from', test_size, 'are within 1 percent speed difference')

10266 first guesses from 12363 are within 1 percent speed difference
10652 first guesses from 12363 are within 5 percent speed difference
10520 of first two guesses from 12363 are within 1 percent speed difference


In [13]:
model.save('C:/Users/deniz/Desktop/Thesis of ML for AutoPas/keras_model.h5', include_optimizer=False) #include_optimizer=False