### Linear Regression 

- Batch Gradient Descent: entire dataset.
- Stochastic Gradient Descent (SGD): mini-batches. 
- In TensorFlow:

<pre>optimizer = tf.train.GradientDescentOptimizer(learning_rate)</pre>
<pre>train = optimizer.minimize(loss)</pre>


In [12]:
import numpy as np

x_data = np.random.randn(2000,3)
w_real = [0.4, 0.6, 0.2]
b_real = -0.3 
noise = np.random.randn(1,2000)*0.1 
y_data = np.matmul(w_real, x_data.T)+b_real+noise

In [13]:
num_iters = 10 
g = tf.Graph()
wb = []

with g.as_default():
    x = tf.placeholder(tf.float32, shape=[None, 3])
    y_true = tf.placeholder(tf.float32, shape=None)
    
    with tf.name_scope('inference') as scope:
        w = tf.Variable([[0,0,0]], dtype=tf.float32, name='W')
        b = tf.Variable(0, dtype=tf.float32, name='b')
        y_pred = tf.matmul(w, tf.transpose(x))+b
        
    with tf.name_scope('loss') as scope:
        loss = tf.reduce_mean(tf.square(y_true-y_pred))
        
    with tf.name_scope('training') as scope:
        lr = 0.5
        optimizer = tf.train.GradientDescentOptimizer(lr)
        train = optimizer.minimize(loss)
        
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        sess.run(init)
        for step in range(num_iters):
            sess.run(train, {x:x_data, y_true:y_data})
            if(step%5==0):
                print(step, sess.run([w,b]))
                wb.append(sess.run([w,b]))
                
        print(10, sess.run([w,b]))

0 [array([[ 0.4324733 ,  0.62102544,  0.21289678]], dtype=float32), -0.26101521]
5 [array([[ 0.39815262,  0.59838271,  0.20108758]], dtype=float32), -0.30086589]
10 [array([[ 0.39815292,  0.59838313,  0.20108768]], dtype=float32), -0.30086562]


### Logistic Regression 

In [61]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

x_data = np.random.randn(20000,3)
w_real = [0.4, 0.6, 0.2]
b_real = -0.3
wb = np.matmul(w_real,x_data.T)+b_real

y_data_bef_noise = sigmoid(wb)
y_data = np.random.binomial(1, y_data_bef_noise)

In [62]:
num_iters = 50
g = tf.Graph()
wb = []

with g.as_default():
    x = tf.placeholder(tf.float32, shape=[None, 3])
    y_true = tf.placeholder(tf.float32, shape=None)
    
    with tf.name_scope('inference') as scope:
        w = tf.Variable([[0,0,0]], dtype=tf.float32, name='W')
        b = tf.Variable(0, dtype=tf.float32, name='b')
        y_pred = tf.matmul(w, tf.transpose(x))+b
        
    with tf.name_scope('loss') as scope:
        loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y_true, logits=y_pred)
        loss = tf.reduce_mean(loss)
        
    with tf.name_scope('training') as scope:
        lr = 0.5
        optimizer = tf.train.GradientDescentOptimizer(lr)
        train = optimizer.minimize(loss)
        
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        sess.run(init)
        
        for step in range(num_iters):
            sess.run(train, {x:x_data, y_true:y_data})
            if(step%5==0):
                print(step, sess.run([w,b]))
                wb.append(sess.run([w,b]))
                
        print(50, sess.run([w,b]))

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
0 [array([[0.0450779 , 0.06196544, 0.02131424]], dtype=float32), -0.032474972]
5 [array([[0.20027295, 0.27623105, 0.09478565]], dtype=float32), -0.14451593]
10 [array([[0.28493407, 0.3939746 , 0.13491543]], dtype=float32), -0.20562238]
15 [array([[0.33391127, 0.46251875, 0.15813442]], dtype=float32), -0.24080153]
20 [array([[0.36336213, 0.50395566, 0.1720913 ]], dtype=float32), -0.26180792]
25 [array([[0.38149446, 0.52958345, 0.18067934]], dtype=float32), -0.2746434]
30 [array([[0.392821  , 0.54565424, 0.18604046]], dtype=float32), -0.28260195]
35 [array([[0.39996022, 0.5558173 , 0.18941744]], dtype=float32), -0.28758383]
40 [array([[0.4044857 , 0.56227785, 0.19155672]], dtype=float32), -0.29072213]
45 [array([[0.4073647 , 0.56639796, 0.19291689]], dtype=float32), -0.29270762]
50 [array([[0.40889612, 0.56859374, 0.19364004]], dtype=float32), -0.29375905]


## Softmax Regression with MNIST



# ![](mnist.png)
*<p style='text-align:right'>By Josef Steppan (Own work), via Wikimedia Commons</p>*

In [16]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

In [17]:
datadir = '/data'
num_iters = 1000
minibatch_size = 100

In [18]:
data = input_data.read_data_sets(datadir, one_hot=True)
x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.zeros([784,10]))
y_true = tf.placeholder(tf.float32, [None, 10])
y_pred = tf.matmul(x, W)

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=y_true))
optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)

correct_pred = tf.equal(tf.argmax(y_pred,1), tf.argmax(y_true,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for step in range(num_iters):
        batch_xs, batch_ys = data.train.next_batch(minibatch_size)
        sess.run(optimizer, feed_dict={x:batch_xs, y_true:batch_ys})
        
    testing = sess.run(accuracy, feed_dict={x:data.test.images, y_true:data.test.labels})
    
print('Accuracy: {:.4}%'.format(testing*100))

Extracting /data\train-images-idx3-ubyte.gz
Extracting /data\train-labels-idx1-ubyte.gz
Extracting /data\t10k-images-idx3-ubyte.gz
Extracting /data\t10k-labels-idx1-ubyte.gz
Accuracy: 91.66%


### Helper Functions

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

def conv_layer(input, shape):
    W = weight_variable(shape)
    b = bias_variable([shape[3]])
    return tf.nn.relu(conv2d(input, W)+b)

def full_layer(input, size):
    in_size = int(input.get_shape()[1])
    W = weight_variable([in_size, size])
    b = bias_variable([size])
    return tf.matmul(input, W)+b 

### The Model (A CNN Approach)

x = tf.placeholder(tf.float32, shape=[None, 784]) 
y_ = tf.placeholder(tf.float32, shape=[None, 10])
x_image = tf.reshape(x, [-1, 28, 28, 1])

conv1 = conv_layer(x_image, shape=[5,5,1,32])
conv1_pool = max_pool_2x2(conv1)

conv2 = conv_layer(conv1_pool, shape=[5,5,32,64])
conv2_pool = max_pool_2x2(conv2)
conv2_flat = tf.reshape(conv2_pool, [-1,7*7*64])

full_1 = tf.nn.relu(full_layer(conv2_flat, 1024))
keep_prob = tf.placeholder(tf.float32)
full1_drop = tf.nn.dropout(full_1, keep_prob=keep_prob)
y_conv = full_layer(full1_drop, 10)

mnist = input_data.read_data_sets(datadir, one_hot=True)
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_conv, labels=y_))
train_step = tf.train.AdamOptimizer(0.0001).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1),tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

[[ 0.85517861 -0.21548274  0.6444358  -0.10079988 -0.78606453  0.18190946
  -0.76554039 -0.41945907  1.67525144]
 [-0.1896261   0.28087765  1.35951643 -0.96064056  0.72667259  0.92944932
   0.12309516  1.96124824 -0.05697205]
 [ 0.42001756  1.72626952  2.88989338  0.14850762 -0.30657997 -2.40677232
  -0.59435295  2.58680778  0.04350803]
 [-1.66112881 -0.85911901  1.59722591 -0.19435795 -0.62307969 -0.97914755
   1.89224924 -0.99496918 -0.33955149]]
[[-0.52530402]
 [ 0.73122642]
 [-0.70642375]
 [-0.94313753]
 [ 0.51149802]
 [-0.6594694 ]
 [-1.35141269]
 [-0.86929466]
 [-0.20274482]]
Output: -1.4294602870941162


In [28]:
steps=180 
                                                             
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer()) 
    
    for i in range(steps):
        
        batch = mnist.train.next_batch(50) 
        
        if i % 10 == 0:
            train_accuracy = sess.run(accuracy, feed_dict={x: batch[0],y_: batch[1],keep_prob: 1.0}) 
            print ("step {}, training accuracy {}".format(i, train_accuracy)) 
        sess.run(train_step, feed_dict={x: batch[0], y_: batch[1],keep_prob: 0.5}) 

        
    X = mnist.test.images.reshape(10, 1000, 784)
    Y = mnist.test.labels.reshape(10, 1000, 10) 
    test_accuracy = np.mean([sess.run(accuracy,feed_dict={x:X[i], y_:Y[i],keep_prob:1.0}) for i in range(10)])

    
print ("test accuracy: {}".format(test_accuracy))

step 0, training accuracy 0.03999999910593033
step 10, training accuracy 0.18000000715255737
step 20, training accuracy 0.5600000023841858
step 30, training accuracy 0.47999998927116394
step 40, training accuracy 0.7400000095367432
step 50, training accuracy 0.6800000071525574
step 60, training accuracy 0.8600000143051147
step 70, training accuracy 0.8399999737739563
step 80, training accuracy 0.7599999904632568
step 90, training accuracy 0.7799999713897705
step 100, training accuracy 0.8799999952316284
step 110, training accuracy 0.800000011920929
step 120, training accuracy 0.8799999952316284
step 130, training accuracy 0.8999999761581421
step 140, training accuracy 0.8999999761581421
step 150, training accuracy 0.9200000166893005
step 160, training accuracy 0.8799999952316284
step 170, training accuracy 0.9399999976158142
test accuracy: 0.9029000401496887
