# Inicialización de parámetros

In [None]:
import tensorflow as tf
import numpy as np
%matplotlib inline
import pylab as plt
import time
from IPython import display
from sklearn.metrics import roc_curve, auc, confusion_matrix

# Dígito a identificar. Fija el dígito verificador de tu RUT (K=0).
RUT_veri_number = 1

# Parámetros de entrenamiento
minibatch_size = 32
n_epochs = 100
n_training_samples = 55000 # Max: 55000
target_cost = 0.01

# Parámetros de Gradient Descent
learning_rate = 0.1
momentum = 0.0

# Número de neuronas
n_inputs = 28*28
n_hidden = 25
n_classes = 2
n_neurons = np.hstack((n_inputs,n_hidden,n_classes)).astype(int)



# Preparación de base de datos MNIST


In [None]:
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)


training_images = mnist.train.images[0:n_training_samples,:]
training_labels = mnist.train.labels[0:n_training_samples,:]
training_labels = training_labels[:,RUT_veri_number]

validation_images = mnist.validation.images
validation_labels = mnist.validation.labels
validation_labels = validation_labels[:,RUT_veri_number]

testing_images = mnist.test.images
testing_labels = mnist.test.labels
testing_labels = testing_labels[:,RUT_veri_number]




# Construcción de MLP

In [None]:
MLP_input = tf.placeholder(tf.float32, shape=[None, n_inputs])
previous_layer = MLP_input

# Pointer to weights and biases for regularization
regularizers = 0

# Iteration constructing one layer at a time
for level in range(len(n_neurons)-1):
    weights = tf.Variable(tf.random_uniform   ([n_neurons[level],n_neurons[level+1]], minval= -np.sqrt(3/n_neurons[level]),maxval=np.sqrt(3/n_neurons[level])))
    #weights = tf.Variable(tf.truncated_normal([n_neurons[level],n_neurons[level+1]], stddev=0.1))
    biases = tf.Variable(tf.constant(0., shape=[n_neurons[level+1]]))
    applied_weights = tf.matmul(previous_layer, weights) + biases

    regularizers += tf.nn.l2_loss(weights) + tf.nn.l2_loss(biases)

    if level < len(n_neurons)-2:
        layer = tf.sigmoid(applied_weights)
        previous_layer = layer
    else:
        MLP = tf.nn.softmax(applied_weights)
        break

# Función de costo

In [None]:
# Accomodate target
target = tf.placeholder(tf.float32, shape=[None])
one_hot_target = tf.one_hot(tf.cast(target, dtype=tf.int32), 2)

# Cross Entropy
cross_entropy = tf.reduce_mean(-tf.reduce_sum(one_hot_target * tf.log(tf.clip_by_value(MLP,1e-10,1.0)), reduction_indices=[1]))

# Mean Squared Error
mean_squared_error = tf.sqrt(tf.reduce_mean(tf.square(target - MLP[:,1])))


cost_function = mean_squared_error
#cost_function = cross_entropy

# Regularización 

In [None]:
#cost_function += 0.0005 * regularizers

# Método de optimización

In [None]:
# Gradient Descent with Momentum
GDM_optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)

# Adam Algorithm
Adam_optimizer = tf.train.AdamOptimizer()

training_algorithm = GDM_optimizer.minimize(cost_function)
#training_algorithm = Adam_optimizer.minimize(cost_function)

# Entrenamiento del método

In [None]:
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())

n_minibatches = int(np.shape(training_images)[0] / minibatch_size) 

cost_valid_history = []
cost_train_history = []

prev_cost_valid = 10.0
validation_checks = 0
max_validation_checks = 15

for epoch in range(n_epochs):
    for i_mb in range(n_minibatches):
        a,b = i_mb*minibatch_size, (i_mb+1)*minibatch_size
        images_minibatch = training_images[a:b,:]
        labels_minibatch = training_labels[a:b]
        sess.run(training_algorithm,feed_dict={MLP_input: images_minibatch, target: labels_minibatch})
        
    cost_valid = sess.run(cost_function,feed_dict={MLP_input: validation_images, target: validation_labels})
    cost_valid_history += [cost_valid]
    
    cost_train = sess.run(cost_function,feed_dict={MLP_input: training_images, target: training_labels})
    cost_train_history += [cost_train]
    
    if prev_cost_valid < cost_valid:
        validation_checks += 1
    else:
        validation_checks = 0
        prev_cost_valid = cost_valid
        
    print("Epoch: %d/%d, Training cost: %f, Validation cost: %f, Validation checks: %d/%d" %(epoch+1, n_epochs,cost_train,cost_valid,validation_checks,max_validation_checks))
    
    if cost_train <= target_cost:
        print 'Target cost reached'
        break
    if validation_checks >= max_validation_checks:
        print 'Early stopping'
        break
    
    
print ''
    
fig = plt.figure(figsize=(6, 4))
plt.plot(cost_valid_history, label="Validation")
plt.plot(cost_train_history, label="Training")
plt.xlabel("Epoch")
plt.ylabel("Cost history")
plt.grid()
plt.legend()


# Test MLP

print 'Training results:'
predicted_training_labels = sess.run(MLP[:,1],feed_dict={MLP_input: training_images})
[[VN,FP],[FN,VP]]=confusion_matrix(training_labels.astype(bool),(predicted_training_labels>0.5).astype(bool)).astype(float)
print 'VP: %d, VN: %d, FP: %d, FN: %d' %(VP,VN,FP,FN)
print 'Porcentaje de clasificaciones correctas: %%%f' %(100.0*(VP+VN)/(VP+VN+FP+FN))
print 'Precision: %%%f' %(100.0*VP/(VP+FP))
print 'Recall: %%%f' %(100.0*VP/(VP+FN))
print ''

print 'Validation results:'
predicted_validation_labels = sess.run(MLP[:,1],feed_dict={MLP_input: validation_images})
[[VN,FP],[FN,VP]]=confusion_matrix(validation_labels.astype(bool),(predicted_validation_labels>0.5).astype(bool)).astype(float)
print 'VP: %d, VN: %d, FP: %d, FN: %d' %(VP,VN,FP,FN)
print 'Porcentaje de clasificaciones correctas: %%%f' %(100.0*(VP+VN)/(VP+VN+FP+FN))
print 'Precision: %%%f' %(100.0*VP/(VP+FP))
print 'Recall: %%%f' %(100.0*VP/(VP+FN))
print ''

print 'Test results:'
predicted_test_labels = sess.run(MLP[:,1],feed_dict={MLP_input: testing_images})
[[VN,FP],[FN,VP]]=confusion_matrix(testing_labels.astype(bool),(predicted_test_labels>0.5).astype(bool)).astype(float)
print 'VP: %d, VN: %d, FP: %d, FN: %d' %(VP,VN,FP,FN)
print 'Porcentaje de clasificaciones correctas: %%%f' %(100.0*(VP+VN)/(VP+VN+FP+FN))
print 'Precision: %%%f' %(100.0*VP/(VP+FP))
print 'Recall: %%%f' %(100.0*VP/(VP+FN))
print ''

fpr, tpr, th = roc_curve(testing_labels, predicted_test_labels)
    
plt.figure(figsize=(10, 4))
plt.subplot2grid((1,2),(0,0))
plt.plot(fpr, tpr, linewidth=2, alpha=0.5)
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.grid()
plt.title('ROC Curve')

plt.subplot2grid((1,2),(0,1))
plt.plot(fpr, 1.0 - tpr, linewidth=2, alpha=0.5)
plt.xlabel("False Positive Rate")
plt.ylabel("False Negative Rate")
plt.yscale('log')
plt.xscale('log')
plt.grid()
plt.title('DET Curve')

