## Tensorflow

### Install env: 
- conda create -n tensorflow python=3.5
- source activate tensorflow
- conda install pandas matplotlib jupyter notebook scipy scikit-learn
- pip install tensorflow

In [1]:
# Session/ constant

import tensorflow as tf

# Create Tensorflow object called tensor
hello_contant = tf.constant('Hello world~')

with tf.Session() as sess:
    # Run the tf.constant operation in the session
    output = sess.run(hello_contant)
    print(output)

b'Hello world~'


###
1. in TensorFlow, data isn’t stored as integers, floats, or strings. These values are encapsulated in an object called a tensor.

2. TensorFlow’s api is built around the idea of a computational graph, a way of visualizing a mathematical process

#### Input:
1. tf.placeholder() - returns a tensor that gets its value from data
2. feed_dict - use this parameter in tf.seesion.run() to set the placeholder tensor
> output = sess.run(x, feed_dict={x: 'Test String', y: 123, z: 45.67})

#### Match:
- tf.add() / tf.substract()
- tf.multiply() / tf.divide()
- tf.matmul() # matrix multiply
- Converting typers:
> tf.cast(tf.constant(2.0), tf.int32)

#### modified tensor:
- tf.Variable() # an initial value needs to be chosen

- tf.zeros
- tf.global_variables_initializer() # call returns an operation that will initialize all TensorFlow variables from the graph

In [6]:
tf.add(tf.cast(tf.constant(2.0), tf.int32), tf.constant(1))

<tf.Tensor 'Add_3:0' shape=() dtype=int32>

In [9]:
# generate random numbers from a normal distribution

n_features = 20
n_labels = 5
weights = tf.Variable(tf.truncated_normal((n_features, n_labels)))
weights

<tensorflow.python.ops.variables.Variable at 0x11a5c2e80>

In [10]:
# set bias to 0
bias = tf.Variable(tf.zeros(n_labels))

### example traning the MNIST

In [12]:
def get_weights(n_features, n_labels):
    """
    Return TensorFlow weights
    :param n_features: Number of features
    :param n_labels: Number of labels
    :return: TensorFlow weights
    """
    return tf.Variable(tf.truncated_normal((n_features, n_labels)))

def get_biases(n_labels):
    """
    Return TensorFlow bias
    :param n_labels: Number of labels
    :return: TensorFlow bias
    """
    return tf.Variable(tf.zeros(n_labels))

def linear(input, w, b):
    """
    Return linear function in TensorFlow
    :param input: TensorFlow input
    :param w: TensorFlow weights
    :param b: TensorFlow biases
    :return: TensorFlow linear function
    """
    # TODO: Linear Function (xW + b)
    return tf.add(tf.matmul(input, w), b)

In [11]:
from tensorflow.examples.tutorials.mnist import input_data

In [15]:
input.read_data_sets?

Object `input.read_data_sets` not found.


In [36]:
# sandbox

def mnist_features_labels(n_labels):
    """
    Gets the first <n> labels from the MNIST dataset
    :param n_labels: Number of labels to use
    :return: Tuple of feature list and label list
    """
    mnist_features = []
    mnist_labels = []

    mnist = input_data.read_data_sets('./MNIST_data', one_hot=True) # one_hot?

    # In order to make quizzes run faster, we're only looking at 10000 images
    for mnist_feature, mnist_label in zip(*mnist.train.next_batch(10000)): # ?*

        # Add features and labels if it's for the first <n>th labels
        if mnist_label[:n_labels].any():
            mnist_features.append(mnist_feature)
            mnist_labels.append(mnist_label[:n_labels])

    return mnist_features, mnist_labels


# Number of features (28*28 image is 784 features)
n_features = 784
# Number of labels
n_labels = 3

# Features and Labels
features = tf.placeholder(tf.float32)
labels = tf.placeholder(tf.float32)

# Weights and Biases
w = get_weights(n_features, n_labels)
b = get_biases(n_labels)

# Linear Function xW + b
logits = linear(features, w, b)

# Training data
train_features, train_labels = mnist_features_labels(n_labels)

with tf.Session() as session:
    # Initialize session variables
    session.run(tf.global_variables_initializer())
    # Softmax
    prediction = tf.nn.softmax(logits)

    # Cross entropy
    # This quantifies how far off the predictions were.
    # You'll learn more about this in future lessons.
    cross_entropy = -tf.reduce_sum(labels * tf.log(prediction), reduction_indices=1)

    # Training loss
    # You'll learn more about this in future lessons.
    loss = tf.reduce_mean(cross_entropy)

    # Rate at which the weights are changed
    # You'll learn more about this in future lessons.
    learning_rate = 0.08

    # Gradient Descent
    # This is the method used to train the model
    # You'll learn more about this in future lessons.
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

    # Run optimizer and get loss
    _, l = session.run(
        [optimizer, loss],
        feed_dict={features: train_features, labels: train_labels})

# Print loss
print('Loss: {}'.format(l))

KeyboardInterrupt: 

#### 
Softmax: tf.nn.softmax()

#### Cross Entropy(熵）-  as loss function


#### 
One-hot encoding with Scikit-learn

In [23]:
import numpy as np
from sklearn import preprocessing

labels = np.array([1,5,3,2,1,4,2,1,3])
lb = preprocessing.LabelBinarizer()
lb.fit(labels)
lb.transform(labels)

array([[1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 0, 1, 0, 0],
       [0, 1, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 0, 0, 1, 0],
       [0, 1, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 0, 1, 0, 0]])

In [31]:
# cross entropy
softmax_data = [0.7, 0.2, 0.1]
one_hot_data = [1.0, 0.0, 0.0]

softmax = tf.placeholder(tf.float32)
one_hot = tf.placeholder(tf.float32)

cross_entropy = -tf.reduce_sum(tf.multiply(one_hot, tf.log(softmax)))

with tf.Session() as sess:
    print(sess.run(cross_entropy, feed_dict={softmax: softmax_data, one_hot: one_hot_data}))

0.356675


In [None]:
# SGD: Stochastic Gradient Descent


## Mini-batching
is a techinique for traning on subsets of dataset instead of all the data at one time. This provides the ability to train a model, even if a computer lacks the memory to store the entire dataset.

combined with SGD. The idea is to randomly shuffle the data at the start of each epoch, then create the mini-batches.

In [44]:
# create batch
import math
def batches(batch_size, features, labels):
    """
    Create batches of features and labels
    : param batch_size: the batch size
    : param features: List of features
    : param labels: List of labels
    : return: Batches of (Features, Labels)
    """
    assert len(features) == len(labels)
    output_batches = []
    sample_size = len(features)
    for start_i in range(0, sample_size, batch_size):
        end_i = start_i + batch_size
        batch = [features[start_i: end_i], labels[start_i: end_i]]
        output_batches.append(batch)
        
    return output_batches

In [58]:
from tensorflow.examples.tutorials.mnist import input_data

n_input = 784 # MNIST data input(img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits), output

def print_epoch_stats(epoch_i, sess, last_features, last_labels):
    """
    Print cost and validation accuracy of an epoch
    """
    current_cost = sess.run(
        cost,
        feed_dict={features: last_features, labels: last_labels})
    valid_accuracy = sess.run(
        accuracy,
        feed_dict={features: valid_features, labels: valid_labels})
    print('Epoch: {:<4} - Cost: {:<8.3} Valid Accuracy: {:<5.3}'.format(
        epoch_i,
        current_cost,
        valid_accuracy))

# import MNIST data
#mnist = input_data.read_data_sets('MNIST', one_hot=True) # download
mnist = input_data.read_data_sets('./MNIST', one_hot=True) # use local data


# The featues are already scaled and the data is shuffled
train_features = mnist.train.images
valid_features = mnist.validation.images
test_features = mnist.test.images

train_labels = mnist.train.labels.astype(np.float32)
valid_labels = mnist.validation.labels.astype(np.float32)
test_labels = mnist.test.labels.astype(np.float32)

# Weights & bias
weights = tf.Variable(tf.random_normal([n_input, n_classes]))
bias = tf.Variable(tf.random_normal([n_classes]))

# mini-batching, featrue/label
features = tf.placeholder(tf.float32, [None, n_input]) # None means None dimension is placeholderb
labels = tf.placeholder(tf.float32, [None, n_classes])

# Logits - xW + b
logits = tf.add(tf.matmul(features, weights), bias)

# Define loss and optimizer
learning_rate = tf.placeholder(tf.float32)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

# Calculate accuracy
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

## training parameter
learn_rate = 0.01
epochs = 50 # initial = 10
batch_size = 128

#assert batch_size is not None, 'You must set the batch size'

init = tf.global_variables_initializer()

train_batches = batches(batch_size, train_features, train_labels)

with tf.Session() as sess:
    sess.run(init)
    
    # Training cycle
    for epoch_i in range(epochs):
        
        # Loop over all batch
        for batch_features, batch_labels in train_batches:
            train_feed_dict = {
                features: batch_features,
                labels: batch_labels,
                learning_rate: learn_rate}
            sess.run(optimizer, feed_dict=train_feed_dict)

        # Print cost and validataion accuracy of an epoch
        print_epoch_stats(epoch_i, sess, batch_features, batch_labels)
    
    # Calculate accuracy for test dataset
    test_accuracy = sess.run(
        accuracy,
        feed_dict={features: test_features, labels: test_labels})

print('Test Accuracy: {}'.format(test_accuracy))

Extracting ./MNIST/train-images-idx3-ubyte.gz
Extracting ./MNIST/train-labels-idx1-ubyte.gz
Extracting ./MNIST/t10k-images-idx3-ubyte.gz
Extracting ./MNIST/t10k-labels-idx1-ubyte.gz
Epoch: 0    - Cost: 5.94     Valid Accuracy: 0.172
Epoch: 1    - Cost: 4.04     Valid Accuracy: 0.349
Epoch: 2    - Cost: 3.12     Valid Accuracy: 0.469
Epoch: 3    - Cost: 2.61     Valid Accuracy: 0.542
Epoch: 4    - Cost: 2.3      Valid Accuracy: 0.6  
Epoch: 5    - Cost: 2.08     Valid Accuracy: 0.639
Epoch: 6    - Cost: 1.92     Valid Accuracy: 0.667
Epoch: 7    - Cost: 1.8      Valid Accuracy: 0.688
Epoch: 8    - Cost: 1.7      Valid Accuracy: 0.703
Epoch: 9    - Cost: 1.62     Valid Accuracy: 0.721
Epoch: 10   - Cost: 1.54     Valid Accuracy: 0.73 
Epoch: 11   - Cost: 1.48     Valid Accuracy: 0.74 
Epoch: 12   - Cost: 1.43     Valid Accuracy: 0.749
Epoch: 13   - Cost: 1.38     Valid Accuracy: 0.761
Epoch: 14   - Cost: 1.33     Valid Accuracy: 0.768
Epoch: 15   - Cost: 1.29     Valid Accuracy: 0.774
Ep

## How to impro it accuracy
ref to : [THE MNIST DATABASE of handwritten digits](https://web.archive.org/web/20160117040036/http://yann.lecun.com/exdb/mnist/)