In [1]:
# Change directory to the upper directory.
import os

analysis_dir_name = "analysis"

current_dir = os.path.split(os.getcwd())[1]
if current_dir == analysis_dir_name:
    os.chdir("..")

In [2]:
import ai.nn.data_set as ds
from ai.nn.neural_network import create_model_1, format_data_set, get_inputs_real_outputs
from ai.nn.evaluation import errors
from ai.helpers import data_set_file_path

%matplotlib notebook
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import random

Using TensorFlow backend.
  return f(*args, **kwds)


In [4]:
seed = 42
batch_size_list = [10, 20, 50, 100, 500, 1000, 2000, 5000, 10000]

subgrid_radius = 2
num_tiles_subgrids = ((subgrid_radius * 2) + 1) ** 2
num_rows_grid = 10
num_columns_grid = 10
num_bombs_grid = 10
num_no_bm_subgrids = 500000
num_bm_subgrids = 500000
# 'bm' means that the tile in the middle of the subgrids contains a bomb.
num_masked_subgrids = 10

ds_no_bm_file_name = data_set_file_path(num_rows_grid, num_columns_grid, num_bombs_grid, subgrid_radius, False)
ds_bm_file_name = data_set_file_path(num_rows_grid, num_columns_grid, num_bombs_grid, subgrid_radius, True)
# 'bm' means that the tile in the middle of the subgrids contains a bomb.

random.seed(seed)
np.random.seed(int(seed)) # Makes Keras deterministic.
tf.set_random_seed(seed) # Makes TensorFlow deterministic.

# Training set.
# Load the training set.
data_set_gen = ds.read_data_set(ds_no_bm_file_name)
training_set = [next(data_set_gen) for i in range(num_no_bm_subgrids)]

data_set_gen = ds.read_data_set(ds_bm_file_name)
training_set.extend([next(data_set_gen) for i in range(num_bm_subgrids)])
print("Training set loaded.")
    
# Format the training set.
training_set = format_data_set(training_set, num_masked_subgrids)
print("Training set formatted.")
    
# Get the 'x' and 'y_true' vectors.
x_training, y_true_training = get_inputs_real_outputs(training_set)
print("Inputs and real outputs extracted.")

# Test set.
# Load the test set.
data_set_gen = ds.read_data_set(ds_no_bm_file_name)
for i in range(int(ds.SIZE / 2)): next(data_set_gen) # Skip the half of the data set.
test_set = [next(data_set_gen) for i in range(5000)]

data_set_gen = ds.read_data_set(ds_bm_file_name)
for i in range(int(ds.SIZE / 2)): next(data_set_gen) # Skip the half of the data set.
test_set.extend([next(data_set_gen) for i in range(5000)])
print("Test set loaded.")

# Format the test set.
test_set = format_data_set(test_set, num_masked_subgrids)
print("Test set formatted.")

# Get the 'x' and 'y_true' vectors.
x_test, y_true_test = get_inputs_real_outputs(test_set)
print("Inputs and real outputs extracted.\n")

err_list = []
for batch_size in batch_size_list:
    print("Batch size: {}".format(batch_size))
    
    # Create the model.
    model = create_model_1(num_tiles_subgrids)
    
    # Train the model.
    model.fit(x_training, y_true_training, epochs=1, batch_size=batch_size)
    print("Neural network trained.")
    
    # Prediction and errors.
    y_pred = model.predict(x_test)
    y_pred = [y_p[0] for y_p in y_pred]

    # Errors
    error_func = lambda y_t, y_p: abs(y_t - y_p)
    err = errors(y_true_test, y_pred, error_func)
    print("Errors computed.")

    err_list.append(err)
    
    print('')

Training set loaded.
Training set formatted.
Inputs and real outputs extracted.
Test set loaded.
Test set formatted.
Inputs and real outputs extracted.

Batch size: 10
Epoch 1/1
Neural network trained.
Errors computed.

Batch size: 20
Epoch 1/1
Neural network trained.
Errors computed.

Batch size: 50
Epoch 1/1
Neural network trained.
Errors computed.

Batch size: 100
Epoch 1/1
Neural network trained.
Errors computed.

Batch size: 500
Epoch 1/1
Neural network trained.
Errors computed.

Batch size: 1000
Epoch 1/1
Neural network trained.
Errors computed.

Batch size: 2000
Epoch 1/1
Neural network trained.
Errors computed.

Batch size: 5000
Epoch 1/1
Neural network trained.
Errors computed.

Batch size: 10000
Epoch 1/1
Neural network trained.
Errors computed.



In [67]:
opacity = 1
color = '#FED487'
edge_color = 'black'
mean_color = 'orange'
percentile_25_color = 'limegreen'
median_color = 'red'
percentile_75_color = 'limegreen'
dot_color = 'black'

save_fig = True
plot_path = analysis_dir_name + "/plots/"
fig_format = 'png'
fig_dpi = 500

In [93]:
batch_sizes = []
errs = []
i = 0
for bs, err in zip(batch_size_list, err_list):
    batch_sizes.extend([i] * len(err))
    errs.extend(err)
    i += 1

#random.seed(42)

# Add a jitter.
jitter = 0.5
batch_sizes = [(bs + random.uniform(-jitter, jitter)) for bs in batch_sizes]

"""
# Add a second jitter.
jitter /= 2
batch_sizes = [(bs + random.uniform(-jitter, jitter)) for bs in batch_sizes]
"""

'\n# Add a second jitter.\njitter /= 2\nbatch_sizes = [(bs + random.uniform(-jitter, jitter)) for bs in batch_sizes]\n'

In [94]:
# Scatter plot of errors depending on the batch size.

fig = plt.figure(figsize=(8, 8))
plt.scatter(batch_sizes, errs, s=np.arange(0.01, 0.41, 0.01), linewidths=0.01, c=dot_color)
plt.xticks(range(len(batch_size_list)), batch_size_list)
plt.yticks(np.arange(0.0, 1.05, 0.05))
plt.title('Errors depending on the batch size')
plt.xlabel('Batch size')
plt.ylabel('Error')
plt.tight_layout()
plt.show()
if save_fig:
    plt.savefig((plot_path + 'errors_depending_on_batch_size_scatter_plot.' + fig_format), format=fig_format, dpi=fig_dpi)

<IPython.core.display.Javascript object>

In [95]:
# Box plot of errors depending on the batch size.

fig = plt.figure(figsize=(8, 8))
bplot = plt.boxplot(err_list, showmeans=True, meanline=True, sym='.')
plt.xticks(range(1, len(batch_size_list) + 1), batch_size_list)
plt.yticks(np.arange(0.0, 1.05, 0.05))
plt.title('Errors depending on the batch size')
plt.xlabel('Batch size')
plt.ylabel('Error')
plt.setp(bplot['means'], color=mean_color)
plt.setp(bplot['medians'], color=median_color)
plt.tight_layout()
plt.show()
if save_fig:
    plt.savefig((plot_path + 'errors_depending_on_batch_size_box_plot.' + fig_format), format=fig_format, dpi=fig_dpi)

<IPython.core.display.Javascript object>

In [91]:
means = []
percentiles_25 = []
percentiles_50 = []
percentiles_75 = []
for err in err_list:
    means.append(np.mean(err))
    percentiles_25.append(np.percentile(err, 25))
    percentiles_50.append(np.percentile(err, 50))
    percentiles_75.append(np.percentile(err, 75))

In [87]:
# Line plot of errors depending on the batch size.

fig = plt.figure(figsize=(8, 8))
plt.plot(batch_size_list, means, color=mean_color, marker='o', linestyle='dashed', markersize=4)
plt.plot(batch_size_list, percentiles_25, color=percentile_25_color)
plt.plot(batch_size_list, percentiles_50, color=median_color, marker='o', markersize=4)
plt.plot(batch_size_list, percentiles_75, color=percentile_75_color)
plt.fill_between(batch_size_list, percentiles_25, percentiles_75, color='palegreen')
plt.xticks(range(0, 11000, 1000))
plt.yticks(np.arange(0.0, 0.45, 0.05))
plt.title('Errors depending on the batch size')
plt.xlabel('Batch size')
plt.ylabel('Error')
plt.legend(['Mean', '25th percentile (1st quartile)', '50th percentile (2nd quartile, median)', '75th percentile (3rd quartile)'], loc='upper right')
plt.tight_layout()
plt.show()
if save_fig:
    plt.savefig((plot_path + 'errors_depending_on_batch_size_line_plot.' + fig_format), format=fig_format, dpi=fig_dpi)

<IPython.core.display.Javascript object>

In [92]:
# Line plot of errors depending on the batch size.

fig = plt.figure(figsize=(8, 8))
plt.plot(batch_size_list, means, color=mean_color, marker='o', linestyle='dashed', markersize=4)
plt.plot(batch_size_list, percentiles_25, color=percentile_25_color)
plt.plot(batch_size_list, percentiles_50, color=median_color, marker='o', markersize=4)
plt.plot(batch_size_list, percentiles_75, color=percentile_75_color)
plt.fill_between(batch_size_list, percentiles_25, percentiles_75, color='palegreen')
fig.axes[0].set_xscale('symlog') # Logarithmic scale.
plt.xticks(batch_size_list, batch_size_list, rotation=45)
plt.yticks(np.arange(0.0, 0.45, 0.05))
plt.title('Errors depending on the batch size')
plt.xlabel('Batch size')
plt.ylabel('Error')
plt.legend(['Mean', '25th percentile (1st quartile)', '50th percentile (2nd quartile, median)', '75th percentile (3rd quartile)'], loc='upper right')
plt.tight_layout()
plt.show()
if save_fig:
    plt.savefig((plot_path + 'errors_depending_on_batch_size_line_plot_log_scale.' + fig_format), format=fig_format, dpi=fig_dpi)

<IPython.core.display.Javascript object>