In [1]:
#https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/udacity/2_fullyconnected.ipynb

import timeit
script_start_time = timeit.default_timer()


import numpy as np
import os
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range

# ********** Logging settings

import logging

logger = logging.getLogger('notMNIST.TF.MLR.training.GD')

file_log_handler = logging.FileHandler('logfile.log')
logger.addHandler(file_log_handler)

stderr_log_handler = logging.StreamHandler()
logger.addHandler(stderr_log_handler)

formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_log_handler.setFormatter(formatter)
stderr_log_handler.setFormatter(formatter)

logger.setLevel('DEBUG')

def logInfo(*args):
  logger.info(concatenate(args))

def logDebug(*args):
  logger.debug(concatenate(args))
  
def logError(*args):
  logger.error(concatenate(args))

def concatenate(args):
  return ' '.join(str(v) for v in args)

# ********** End of Logging settings



data_root = '.' # Change me to store data elsewhere


# ******* load  file

logInfo('Loading pickle file...')
start_time = timeit.default_timer()

pickle_file = os.path.join(data_root, 'notMNIST.pickle')

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  logDebug('Training set', train_dataset.shape, train_dataset.dtype, train_labels.shape)
  logDebug('Validation set', valid_dataset.shape, valid_dataset.dtype, valid_labels.shape)
  logDebug('Test set', test_dataset.shape, test_dataset.dtype, test_labels.shape)

logInfo('Pickle file loaded ({:f} sec).'.format(timeit.default_timer() - start_time))

 
# ******* reformat

logInfo('Reformating data...')
start_time = timeit.default_timer()

image_size = 28
num_labels = 10

def reformat(dataset, labels):
  #reshape to (same training examples quantity, 28*28). Type from float64 to float32 (less memory used)
  dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
  # Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...]
  # np.arange(num_labels) -> [0,1,2,3,4,5,6,7,8,9]
  # se labels = [1 0 ...] -> labels[:,None] = [[1][0]...]
  # np.arange(num_labels) == labels[:,None]  -> [[False True False ...][True False False ...]...] 
  # .astype(np.float32) -> [[0.0 1.0 0.0 ...][1.0 0.0 0.0 ...]...]
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
logDebug('Training set', train_dataset.shape, train_labels.shape)
logDebug('Validation set', valid_dataset.shape, valid_labels.shape)
logDebug('Test set', test_dataset.shape, test_labels.shape)

logInfo('data reformatted ({:f} sec).'.format(timeit.default_timer() - start_time))


# ******* build computation graph


logInfo('Building computation graph...')
start_time = timeit.default_timer()

# With gradient descent training, much data is prohibitive.
# Subset the training data for faster turnaround.
train_subset = 10000

graph = tf.Graph()
with graph.as_default():

  # Input data.
  # Load the training, validation and test data into constants that are
  # attached to the graph.
  tf_train_dataset = tf.constant(train_dataset[:train_subset, :])
  tf_train_labels = tf.constant(train_labels[:train_subset])
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables.
  # These are the parameters that we are going to be training. The weight
  # matrix will be initialized using random values following a (truncated)
  # normal distribution. The biases get initialized to zero.
  # weights and bias type:  <class 'tensorflow.python.ops.variables.Variable'>
  weights = tf.Variable(
    tf.truncated_normal([image_size * image_size, num_labels]))
  biases = tf.Variable(tf.zeros([num_labels]))
  # Training computation.
  # We multiply the inputs with the weight matrix, and add biases. We compute
  # the softmax and cross-entropy (it's one operation in TensorFlow, because
  # it's very common, and it can be optimized). We take the average of this
  # cross-entropy across all training examples: that's our loss.
  # logits = Y = WX+b
  # logits type:  <class 'tensorflow.python.framework.ops.Tensor'>
  logits = tf.matmul(tf_train_dataset, weights) + biases
  # tf.nn.softmax_cross_entropy_with_logits = D(S(Y),L) 
  # rappresenta la misura dell'entropia, degli errori di tutti i logit Y calcolati rispetto a quanto ci aspettiamo nelle label L
  # L sono le Hot Label, cioè in formato [[0.0 1.0 0.0 ...][1.0 0.0 0.0 ...]...]
  # Y sono i Logits
  # loss = mean(D(S(Y),L)= 1/N * Sum (D(S(Y),L)) rappresenta la media di tutti gli errori
  # loss type:  <class 'tensorflow.python.framework.ops.Tensor'>
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
  
  # Optimizer.
  # We are going to find the minimum of this loss using gradient descent.
  # ad ogni passo decrementa W di -αΔloss
  # 0.5 è α, il learning rate
  # optimizer type:  <class 'tensorflow.python.framework.ops.Operation'>
  optimizer = tf.train.GradientDescentOptimizer(0.1).minimize(loss)
  # Predictions for the training, validation, and test data.
  # These are not part of training, but merely here so that we can report
  # accuracy figures as we train.
  # S(Y) per i set di training, validation e test
  # sono quindi nel formato formato [[0.0 1.0 0.0 ...][1.0 0.0 0.0 ...]...]
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(
    tf.matmul(tf_valid_dataset, weights) + biases)
  test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)


logInfo('Computation graph built ({:f} sec).'.format(timeit.default_timer() - start_time)) 

# ******* training


logInfo('Training...')
start_time = timeit.default_timer()

num_steps = 20000

def accuracy(predictions, labels):
  # percentuale del rapporto tra 
  # il numero di predictions corrette 
  # e il numero totale di predictions per il caso in questione (train, valid o test)

  # np.argmax(x,1) di una matrice x, ritorna l'indice delle colonne (axis=1) con valore massimo
  # le label sono in hot encoding, per come è fatto funzionerebbe anche se non lo fossero
  # le predictions dopo softmax tendono ad avere valori come [  4.26267942e-14   1.02976919e-15   9.33372438e-01 ...,   6.24717325e-02]
  # np.argmax(predictions, 1) darà per [[0.0 0.0 1.0 ...][1.0 0.0 0.0 ...]...] -> [2 0 ...]
  # idem np.argmax(labels, 1)
  # np.argmax(predictions, 1) == np.argmax(labels, 1) è un array di booleani e np.sum() li converte in interi e li somma
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

with tf.Session(graph=graph) as session:
  # This is a one-time operation which ensures the parameters get initialized as
  # we described in the graph: random weights for the matrix, zeros for the
  # biases. 
  tf.global_variables_initializer().run()
  logInfo('Initialized')
  for step in range(num_steps):
    # Run the computations. We tell .run() that we want to run the optimizer,
    # and get the loss value and the training predictions returned as numpy
    # arrays.
    _, l, predictions = session.run([optimizer, loss, train_prediction])
    if (step % 100 == 0):
      logInfo('Loss at step %d: %f' % (step, l))
      logInfo('Training accuracy: %.1f%%' % accuracy(
        predictions, train_labels[:train_subset, :]))
      # Calling .eval() on valid_prediction is basically like calling run(), but
      # just to get that one numpy array. Note that it recomputes all its graph
      # dependencies.
      logInfo('Validation accuracy: %.1f%%' % accuracy(
        valid_prediction.eval(), valid_labels))
  logInfo('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))


logInfo('Trained ({:f} sec).'.format(timeit.default_timer() - start_time))


# Plot outputs

# non si può disegnare un grafico come quello in questo esempio dove le x corrispondono a uno scalare
# http://scikit-learn.org/stable/auto_examples/linear_model/plot_ols.html#sphx-glr-auto-examples-linear-model-plot-ols-py
# qui d2_test_x_dataset ha shape per es. (100, 784) cioè 100 (o quanti impostati) vettori con 784=28x28 valori

# plt.scatter(d2_test_x_dataset, test_y_labels,  color='black')
# plt.plot(d2_test_x_dataset, pred_y_dataset, color='blue', linewidth=3)

# plt.xticks(())
# plt.yticks(())

#si possono disegnare però per ogni label i valori predetti
#quanto più si avvicinano alla linea di inclinazione 1 tanto più sono corretti

# plt.scatter(test_y_labels, pred_y_dataset, color='blue')
# plt.plot([0,10], [0,10], color='black', linewidth=2)

# # plt.grid(True)
# # plt.legend()

# # plt.xticks(())
# # plt.yticks(())

# plt.show()
 

logInfo('Script execution time: {:f} sec.'.format(timeit.default_timer() - script_start_time))

2018-03-04 00:05:10,526 - notMNIST.TF.MLR.training.GD - INFO - Loading pickle file...
2018-03-04 00:05:10,575 - notMNIST.TF.MLR.training.GD - DEBUG - Training set (5000, 28, 28) float32 (5000,)
2018-03-04 00:05:10,577 - notMNIST.TF.MLR.training.GD - DEBUG - Validation set (1000, 28, 28) float32 (1000,)
2018-03-04 00:05:10,578 - notMNIST.TF.MLR.training.GD - DEBUG - Test set (1000, 28, 28) float32 (1000,)
2018-03-04 00:05:10,579 - notMNIST.TF.MLR.training.GD - INFO - Pickle file loaded (0.051460 sec).
2018-03-04 00:05:10,581 - notMNIST.TF.MLR.training.GD - INFO - Reformating data...
2018-03-04 00:05:10,605 - notMNIST.TF.MLR.training.GD - DEBUG - Training set (5000, 784) (5000, 10)
2018-03-04 00:05:10,607 - notMNIST.TF.MLR.training.GD - DEBUG - Validation set (1000, 784) (1000, 10)
2018-03-04 00:05:10,609 - notMNIST.TF.MLR.training.GD - DEBUG - Test set (1000, 784) (1000, 10)
2018-03-04 00:05:10,611 - notMNIST.TF.MLR.training.GD - INFO - data reformatted (0.028655 sec).
2018-03-04 00:05: