## Logisitic Regression Example

* Author: Yingying ZHONG
* data: http://yann.lecun.com/exdb/mnist/

In [1]:
import tensorflow as tf

# Extract MINST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [4]:
def create_placeholder():
    """
    size of mnist data image:  28 * 28 = 784;
    0-9 digits recognition hence 10 classes
    
    Returns:
    X -- placeholder for the data input, i.e shape [784, None] and dtype "float"
    Y -- placeholder for the input labels, i.e shape [10, None] and dtype "float"
    
    Notice:
    - use None to allow flexibility on the number of examples.
      In fact, the number of examples from training set and testing set are different.
    """
    
    X = tf.placeholder(tf.float32, [784, None], name="weights") 
    Y = tf.placeholder(tf.float32, [10, None], name="bias")
    
    return X, Y

In [5]:
def initialize_parameters(X, Y):
    n = X.shape[0]  # number of input features
    C = Y.shape[0]  # number of classes
    
    W = tf.Variable(np.random.rand(C, n))
    b = tf.Variable(tf.zeros(C, 1))
    
    return W, b

In [6]:
def softmax_classifier(X, W, b):
    logits = tf.matmul(W, X) + b
    pred = tf.nn.softmax(logits)
    
    return pred # shape [C, m]

#### Notice the difference between 

* tensorflow tf.nn.softmax 

* tf.nn.softmax_cross_entropy_with_logits

(https://stackoverflow.com/questions/34240703/difference-between-tensorflow-tf-nn-softmax-and-tf-nn-softmax-cross-entropy-with)

#### The loss function of a single example:
$$ L(\hat{y},  y) =  - \sum\limits_{j=1}^C y_j log(\hat{y_j})$$
where C is the number of classes.

#### The cost over the entire training set:
$$ cost = \frac{1}{m} \sum\limits_{i=1}^m L(\hat{y^{(i)}}, y^{(i)}) $$
where m is the number of data examples

In [8]:
def compute_cost(pred, Y):
    """
    
    Arguments:
    - pred: Y hat that computed using softmax classifier, size of C * m
    - Y: the true labels, size of C * m
    
    notice: 
    the number of classes C: 10
    when calculate the loss, sum over the 10 classes hence along the column
    """
   
    cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(pred), reduction_indices = 0))
    
    return cost

In [None]:
def gradient_descent(learning_rate, cost):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    return optimizer

In [None]:
def logistic_regression_model(learning_rate = 0.01, num_epochs = 25, 
                              display_step = 1, minibatch_size = 64):
    X, Y = create_placeholder()
    W, b = initialize_parameters(X, Y)
    pred = softmax_classifier(X, W, b)
    cost = compute_cost(pred, Y)
    optimizer = gradient_descent(learning_rate, cost)
    
    # Initialize all the variables
    init = tf.global_variables_initializer()
    
    # Start the session to compute the tensorflow graph
    with tf.Session() as sess:
        
        # Run the initialization
        sess.run(init)
        
        for epoch in range(num_epochs):
            epoch_cost = 0   # cost relate to an epoch
            num_minibatches = int(mnist.train.num_examples/minibatch_size) # number of minibatches of size minibatch_size in the train set
            for i in range(batches):
                minibatch_x, minibatch_y = mnist.train.next_batch(batch_size)
                minibatch_cost, _ = sess.run([cost, optimizer], feed_dict={X: minibatch_x, Y: minibatch_y})
                
                epoch_cost += minibatch_cost / num_minibatches