## Linear Regression

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import xlrd

In [None]:
DATA_FILE = "data/fire_theft.xls"

# Step 1: read in data from the .xls file
book = xlrd.open_workbook(DATA_FILE, encoding_override="utf-8")
sheet = book.sheet_by_index(0)
data = np.asarray([sheet.row_values(i) for i in range(1, sheet.nrows)])
n_sample = sheet.nrows - 1

# Step 2: create placeholders for input x and label y
X = tf.placeholder(tf.float32, name="X")
Y = tf.placeholder(tf.float32, name="Y")

# Step 3: create weight and bias, initialized to 0
w = tf.Variable(0.0, name="weights")
b = tf.Variable(0.0, name="bias")

# Step 4: construct model to predict Y from X
Y_predicted = X * w + b

# Step 5: use the square error as the loss function
loss = tf.square(Y - Y_predicted, name="loss")

# Step 6: using gradient descent with learning rate of 0.01 to minimize loss
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)

with tf.Session() as sess:
    # Step 7: Initialize the necessary variables, in this case, w and b
    sess.run(tf.global_variables_initializer())
    
    # Step 8: train the model
    for i in range(100): # run 100 epochs
        for x, y in data:
            sess.run(optimizer, feed_dict={X: x, Y:y})
            
    # Step 9: output the values of w and b

In [None]:
# By default, the optimizer trains all the trainable variables whose obj function depend on
# If there are variables that do not want to train, set the keyword trainable=Flase
global_step = tf.Variable(0, trainable=False, dtype=tf.int32)
learning_rate = 0.01 * 0.99 ** tf.cast(global_step, tf.float32)
increment_step = global_step.assign_add(1)
optimizer = tf.train.GradientDescentOptimizer(learning_rate) # learning rate can be a tensor

In [None]:
# Let's look at the full definition of the class tf.Variable
tf.Variable(initial_value=None, trainable=True, collections=None, validate_shape=True,
            caching_device=None, name=None, variable_def=None, dtype=None, expected_shape=None,
            import_scope=None)

# can also ask your optimizer to take gradients of specific variables. can also modify the gradients calculated by your optimizer.
# Create an optimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
# compute the gradients for a list of vaiables
grads_and_vars = optimizer.compute_gradients(loss, <list of variables>)

# grads_and_vars is a list of tuples (gradient, variable)
# do whatever need to the "gradient" part
substracted_grads_and_vars = [(gv[0] - 1.0, gv[1]) for gv in grads_and_vars]

# ask the optimizer to apply the substracted gradients
optimizer.apply_gradients(substracted_grads_and_vars)

# the optimizer classes automatically compute derivatives on our graph, 
# but creator of new optimizer can call the lower-level functions below
tf.gradients(ys, xs, grad_ys=None, name='gradients', colocate_gradients_with_ops=False, 
             gate_gradients=False, aggregation_method=None)
# This method constructs symbolic partial derivatives of sum of ys w.r.t. x in xs. 
# ys and xs are each a Tensor or a list of tensors. grad_ys is a list of Tensor, 
# holding the gradients received by the ys. The list must be the same length as ys.

# Technical detail : This is especially useful when training only parts of a model. 
# For example, we can use tf.gradients() for to take the derivative G of the loss w.r.t. to the middle layer. 
# Then we use an optimizer to minimize the difference between the middle layer output M and M + G. 
# This only updates the lower half of the network.

## Logistic Regression

In [1]:
from tensorflow.examples.tutorials.mnist import input_data
MNIST = input_data.read_data_sets("/data/mnist", one_hot=True)

# MNIST is a TensorFlow’s Datasets object. It has 55,000 data points of training data (MNIST.train), 
# 10,000 points of test data (MNIST.test), and 5,000 points of validation data (MNIST.validation).

# Instead of using SGD, we use miniBatch SGD here.
X = tf.placeholder(tf.float32, [batch_size, 784], name="img")
Y = tf.placeholder(tf.float32, [batch_size, 10], name="label")

# instead of feeding each data point, we can feed in the batch_size number of data points.
X_batch, Y_batch = mnist.test.next_batch(batch_size)
sess.run(train_op, feed_dict={X: X_batch, Y: Y_batch})

import time
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
# Step 1: Read in data
# using TF Learn's built in function to load MNIST data to the folder data/mnist MNIST = input_data.read_data_sets("/data/mnist", one_hot=True)
# Step 2: Define parameters for the model
learning_rate = 0.01
batch_size = 128
n_epochs = 25
# Step 3: create placeholders for features and labels
# each image in the MNIST data is of shape 28*28 = 784
# therefore, each image is represented with a 1x784 tensor
# there are 10 classes for each image, corresponding to digits 0 - 9. # each label is one hot vector.
X = tf.placeholder(tf.float32, [batch_size, 784])
Y = tf.placeholder(tf.float32, [batch_size, 10])
# Step 4: create weights and bias
# w is initialized to random variables with mean of 0, stddev of 0.01
# b is initialized to 0
# shape of w depends on the dimension of X and Y so that Y = tf.matmul(X, w)
# shape of b depends on Y
w = tf.Variable(tf.random_normal(shape=[784, 10], stddev=0.01), name="weights")
b = tf.Variable(tf.zeros([1, 10]), name="bias")
# Step 5: predict Y from X and w, b
# the model that returns probability distribution of possible label of the image # through the softmax layer
# a batch_size x 10 tensor that represents the possibility of the digits
logits = tf.matmul(X, w) + b
# Step 6: define loss function
# use softmax cross entropy with logits as the loss function
# compute mean cross entropy, softmax is applied internally
entropy = tf.nn.softmax_cross_entropy_with_logits(logits, Y)
loss = tf.reduce_mean(entropy) # computes the mean over examples in the batch
# Step 7: define training op
# using gradient descent with learning rate of 0.01 to minimize cost optimizer  = tf . train . GradientDescentOptimizer ( learning_rate = learning_rate ). minimize ( loss)
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    n_batches = int(MNIST.train.num_examples/batch_size)
    for i in range(n_epochs):  # train the model n_epochs times
        for _ in range(n_batches):
            X_batch, Y_batch = MNIST.train.next_batch(batch_size)
            sess.run([optimizer, loss], feed_dict={X: X_batch, Y: Y_batch})
# average loss should be around 0.35 after 25 epochs

PermissionDeniedError: /data

In [None]:
    # test the model
    n_batches = int(MNIST.test.num_examples/batch_size)
    total_correct_preds = 0
    for i in range(n_batches):
        X_batch,Y_batch = MNIST.test.next_batch(batch_size)
        _, loss_batch, logits_batch = sess.run([optimizer, loss, logits], feed_dict={X: X_batch, Y: Y_batch})
        preds = tf.nn.softmax(logits_batch)
        correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(Y_batch, 1))
        accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))   # similar
        total_correct_preds += sess.run(accuracy)
    print "Accuracy {0}".format(total_correct_preds/MNIST.test.num_examples)