In [20]:
# Change directory to the upper directory.
import os

analysis_dir_name = "analysis"

current_dir = os.path.split(os.getcwd())[1]
if current_dir == analysis_dir_name:
    os.chdir("..")

In [21]:
import ai.nn.data_set as ds
from ai.nn.neural_network import create_model_1, format_data_set, get_inputs_real_outputs
from ai.nn.evaluation import print_loss_metric_functions, pivot_value, confusion_matrix, accuracy_recall_specificity, errors
from ai.helpers import data_set_file_path

%matplotlib notebook
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import random

In [30]:
seed = 42
min_ratio = 0.25
max_ratio = 0.75
step = 0.05

subgrid_radius = 2
num_tiles_subgrids = ((subgrid_radius * 2) + 1) ** 2
num_rows_grid = 10
num_columns_grid = 10
num_bombs_grid = 10
max_num_subgrids = 100000
# 'bm' means that the tile in the middle of the subgrids contains a bomb.
num_masked_subgrids = 10

ds_no_bm_file_name = data_set_file_path(num_rows_grid, num_columns_grid, num_bombs_grid, subgrid_radius, False)
ds_bm_file_name = data_set_file_path(num_rows_grid, num_columns_grid, num_bombs_grid, subgrid_radius, True)
# 'bm' means that the tile in the middle of the subgrids contains a bomb.

random.seed(seed)
np.random.seed(int(seed)) # Makes Keras deterministic.
tf.set_random_seed(seed) # Makes TensorFlow deterministic.

# Test set.
print("Test set.")
data_set_gen = ds.read_data_set(ds_no_bm_file_name)
for i in range(int(ds.SIZE / 2)): next(data_set_gen) # Skip the half of the data set.
test_set = [next(data_set_gen) for i in range(5000)]

data_set_gen = ds.read_data_set(ds_bm_file_name)
for i in range(int(ds.SIZE / 2)): next(data_set_gen) # Skip the half of the data set.
test_set.extend([next(data_set_gen) for i in range(5000)])
print("Test set loaded.")

# Format the test set.
test_set = format_data_set(test_set, num_masked_subgrids)
print("Test set formatted.")

# Get the 'x' and 'y_true' vectors.
x_test, y_true_test = get_inputs_real_outputs(test_set)
print("Inputs and real outputs extracted.\n")

ratio_list = [round(ratio, 3) for ratio in np.arange(min_ratio, (max_ratio + step), step)]
err_list = []
for ratio in ratio_list:
    print("Ratio: {}".format(ratio))
    
    num_no_bm_subgrids = int(max_num_subgrids * (1 - ratio))
    num_bm_subgrids = int(max_num_subgrids * ratio)
    # So, 'num_bm_subgrids' / ('num_no_bm_subgrids' + 'num_bm_subgrids') = 'ratio'.
    
    # Load the data set.
    data_set_gen = ds.read_data_set(ds_no_bm_file_name)
    data_set = [next(data_set_gen) for i in range(num_no_bm_subgrids)]
    
    data_set_gen = ds.read_data_set(ds_bm_file_name)
    data_set.extend([next(data_set_gen) for i in range(num_bm_subgrids)])
    
     # Format the data set.
    data_set = format_data_set(data_set, num_masked_subgrids)
    
    # Get the 'x' and 'y_true' vectors.
    x_training, y_true_training = get_inputs_real_outputs(data_set)

    # Create the model.
    model = create_model_1(num_tiles_subgrids)
    
    # Train the model.
    model.fit(x_training, y_true_training, epochs=1, batch_size=10)
    
    # Prediction and errors.
    y_pred = model.predict(x_test)
    y_pred = [y_p[0] for y_p in y_pred]
    
    # Print the results of loss and metric functions.
    print_loss_metric_functions(model, x_test, y_true_test)
    
    # Confusion matrix.
    pivot = pivot_value(y_pred, (1 - ratio))
    conf_mat = confusion_matrix(y_true_test, y_pred, pivot)
    conf_mat_names = ["True negatives", "False positives", "False negatives", "True positives"]
    accuracy, recall, specificity = accuracy_recall_specificity(conf_mat)
    
    print("Confusion matrix, accuracy, recall and specificity:")
    print("\tPivot: {}".format(pivot))
    for cf_name, cf_tile in zip(conf_mat_names, conf_mat):
        print("\t{}: {}".format(cf_name, cf_tile))
    print("\tAccuracy: {}\n\tRecall: {}\n\tSpecificity: {}".format(accuracy, recall, specificity))

    # Errors
    error_func = lambda y_t, y_p: abs(y_t - y_p)
    err = errors(y_true_test, y_pred, error_func)

    err_list.append(err)
    
    print('')
    

Test set.
Data set loaded.
Test set formatted.
Inputs and real outputs extracted.

Ratio: 0.25
Epoch 1/1
Results of loss and metric functions:
	loss: 0.133
	mean_squared_error: 0.133
	mean_absolute_error: 0.219
	acc: 0.826
Confusion matrix, accuracy, recall and specificity:
	Pivot: 0.7189352512359619
	True negatives: 49411
	False positives: 589
	False negatives: 25589
	True positives: 24411
	Accuracy: 0.73822
	Recall: 0.48822
	Specificity: 0.98822

Ratio: 0.3
Epoch 1/1
Results of loss and metric functions:
	loss: 0.109
	mean_squared_error: 0.109
	mean_absolute_error: 0.190
	acc: 0.847
Confusion matrix, accuracy, recall and specificity:
	Pivot: 0.8030697166919706
	True negatives: 48784
	False positives: 1216
	False negatives: 21216
	True positives: 28784
	Accuracy: 0.77568
	Recall: 0.57568
	Specificity: 0.97568

Ratio: 0.35
Epoch 1/1
Results of loss and metric functions:
	loss: 0.113
	mean_squared_error: 0.113
	mean_absolute_error: 0.195
	acc: 0.850
Confusion matrix, accuracy, recall an

In [31]:
opacity = 1
color = '#FED487'
edge_color = 'black'
mean_color = 'orange'
median_color = 'red'
dot_color = 'black'

save_fig = True
plot_path = analysis_dir_name + "/plots/"
fig_format = 'png'
fig_dpi = 500

In [37]:
ratios = []
errs = []
for ratio, err in zip(ratio_list, err_list):
    ratios.extend([ratio] * len(err))
    errs.extend(err)

#random.seed(42)

# Add a jitter.
jitter = step / 2
ratios = [(ratio + random.uniform(-jitter, jitter)) for ratio in ratios]

"""
# Add a second jitter.
jitter /= 2
ratios = [(ratio + random.uniform(-jitter, jitter)) for ratio in ratios]
"""

'\n# Add a second jitter.\njitter /= 2\nratios = [(ratio + random.uniform(-jitter, jitter)) for ratio in ratios]\n'

In [38]:
# Scatter plot of errors depending on the ratio of subgrids whose the tile in the middle contains a bomb.

fig = plt.figure(figsize=(8, 8))
plt.scatter(ratios, errs, s=np.arange(0.1, 0.6, 0.1), linewidths=0.01, c=dot_color)

plt.xticks(ratio_list)
plt.yticks(np.arange(0.0, 1.05, 0.05))
plt.title('Errors depending on the ratio of subgrids whose the tile in the middle contains a bomb')
plt.xlabel('Ratio of subgrid whose the tile in the middle contains a bomb')
plt.ylabel('Error')
plt.tight_layout()
plt.show()
if save_fig:
    plt.savefig((plot_path + 'errors_depending_on_bm_subgrid_ratio.' + fig_format), format=fig_format, dpi=fig_dpi)

<IPython.core.display.Javascript object>