In [0]:
import pathlib

import matplotlib.pyplot as plt
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
tf.enable_eager_execution()

from sklearn import datasets, model_selection

import numpy as np

print(tf.__version__)

# Logistic Regression, manual in python

In [0]:
X, y = datasets.load_digits(return_X_y=True)
zero_or_one = np.logical_or(y==0, y==1)
X = X[zero_or_one,:]
y = y[zero_or_one]
print(X.shape)
print(y.shape)

In [0]:
idx = np.random.randint(0, X.shape[0])
_ = plt.imshow(X[idx, :].reshape(8, 8), cmap=plt.cm.gray)
_ = plt.axis('off')
print(f'The label is {y[idx]}')

Reminder: Sigmoid function is 

```
f(x) = 1/(1+exp(w_0 + x_1*w_1 + x_2*w_2 ...))
```



In [0]:
def setup_training_data(X, y):
  X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, shuffle=True, train_size=0.8, test_size=0.2)
  
  for idx in range(X_train.shape[1]):
    m = X_train[:, idx].mean()
    s = X_train[:, idx].std()
    if s > 0:
      X_train[:, idx] = (X_train[:, idx] - m)/s
      X_test[:, idx] = (X_test[:, idx] - m)/s
      
  X_train = np.concatenate((np.ones((X_train.shape[0], 1)), X_train), 1)
  X_test = np.concatenate((np.ones((X_test.shape[0], 1)), X_test), 1)
    
  return X_train, X_test, y_train.ravel(), y_test.ravel()

X_train, X_test, y_train, y_test = setup_training_data(X, y)
print(X_train[:, 0].mean())
print(X_train[:, 0].std())
print(X_train[:, 2].mean())
print(X_train[:, 2].std())

In [0]:
def setup_logistic(X):
  w = np.random.randn(X.shape[1], 1)
  return w

w = setup_logistic(X_train)

def sigmoid(X, w):
  '''Returns a the sigmoid function specified the input matrix and weights'''
  res = 1/(1 + np.exp(-np.dot(X, w)))
  return res.ravel()

def accuracy(y, y_hat):
  y_hat = (y_hat > 0.5).astype('int').ravel()
  y = y.astype('int').ravel()
  res = np.sum(y == y_hat)
  return res.astype('float') / len(y) 

y_hat = sigmoid(X_train, w)
print(accuracy(y_train, y_hat))
  

In [0]:
epochs = 50
learning_rate = 0.0001

w = setup_logistic(X_train)

for epoch in range(epochs):
  y_hat = sigmoid(X_train, w)
  err = y_train - y_hat
  update_mat = np.expand_dims(np.dot(X_train.T, err), 1)
  w = w + learning_rate * update_mat

  y_hat = sigmoid(X_train, w)
  acc = accuracy(y_train, y_hat)
  print(f'On epoch {epoch} the training accuracy of the model is {acc:0.3f}')
  
  y_hat_test = sigmoid(X_test, w)
  test_acc = accuracy(y_test, y_hat_test)
  print(f'On epoch {epoch} the test accuracy of the model is {test_acc:0.3f}')
  

# Linear Regression in Tensorflow

In [0]:
dataset_path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")
dataset_path

In [0]:
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
                'Acceleration', 'Model Year', 'Origin']
raw_dataset = pd.read_csv(dataset_path, names=column_names,
                      na_values = "?", comment='\t',
                      sep=" ", skipinitialspace=True)

dataset = raw_dataset.copy()
dataset = dataset.dropna()
dataset.tail()

In [0]:
origin = dataset.pop('Origin')

dataset['USA'] = (origin == 1)*1.0
dataset['Europe'] = (origin == 2)*1.0
dataset['Japan'] = (origin == 3)*1.0
dataset.tail()

In [0]:
train_dataset = dataset.sample(frac=0.8,random_state=0)
test_dataset = dataset.drop(train_dataset.index)

In [0]:
train_stats = train_dataset.describe()
train_stats.pop("MPG")
train_stats = train_stats.transpose()
train_stats

In [0]:
train_labels = train_dataset.pop('MPG')
test_labels = test_dataset.pop('MPG')

In [0]:
def norm(x):
  return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)

In [0]:
def build_model():
  model = keras.Sequential([
      layers.Dense(1, activation='linear', input_shape=[len(train_dataset.keys())])
  ])
  optimizer = tf.keras.optimizers.SGD(0.01)

  model.compile(loss='mean_squared_error',
                optimizer=optimizer,
                metrics=['mean_absolute_error', 'mean_squared_error'])
  return model

In [0]:
model = build_model()

In [0]:
model.summary()

In [0]:
EPOCHS = 1000

history = model.fit(
  normed_train_data, train_labels,
  epochs=EPOCHS, validation_split = 0.2, verbose=0,
  callbacks=[])

In [0]:
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()

In [0]:
def plot_history(history):
  hist = pd.DataFrame(history.history)
  hist['epoch'] = history.epoch

  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Abs Error [MPG]')
  plt.plot(hist['epoch'], hist['mean_absolute_error'],
           label='Train Error')
  plt.plot(hist['epoch'], hist['val_mean_absolute_error'],
           label = 'Val Error')
  plt.ylim([0,5])
  plt.legend()

  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Mean Square Error [$MPG^2$]')
  plt.plot(hist['epoch'], hist['mean_squared_error'],
           label='Train Error')
  plt.plot(hist['epoch'], hist['val_mean_squared_error'],
           label = 'Val Error')
  plt.ylim([0,20])
  plt.legend()
  plt.show()


plot_history(history)

# Automatic Differentiation in Tensorflow

## Gradient tapes

TensorFlow provides the [tf.GradientTape](https://www.tensorflow.org/api_docs/python/tf/GradientTape) API for automatic differentiation - computing the gradient of a computation with respect to its input variables. Tensorflow "records" all operations executed inside the context of a `tf.GradientTape` onto a "tape". Tensorflow then uses that tape and the gradients associated with each recorded operation to compute the gradients of a "recorded" computation using [reverse mode differentiation](https://en.wikipedia.org/wiki/Automatic_differentiation).

For example:

In [0]:
x = tf.ones((2, 2))

with tf.GradientTape() as t:
  t.watch(x)
  y = tf.reduce_sum(x)
  z = tf.multiply(y, y)

# Derivative of z with respect to the original input tensor x
dz_dx = t.gradient(z, x)
for i in [0, 1]:
  for j in [0, 1]:
    assert dz_dx[i][j].numpy() == 8.0

You can also request gradients of the output with respect to intermediate values computed during a "recorded" `tf.GradientTape` context.

In [0]:
x = tf.ones((2, 2))

with tf.GradientTape() as t:
  t.watch(x)
  y = tf.reduce_sum(x)
  z = tf.multiply(y, y)

# Use the tape to compute the derivative of z with respect to the
# intermediate value y.
dz_dy = t.gradient(z, y)
assert dz_dy.numpy() == 8.0

By default, the resources held by a GradientTape are released as soon as GradientTape.gradient() method is called. To compute multiple gradients over the same computation, create a `persistent` gradient tape. This allows multiple calls to the `gradient()` method as resources are released when the tape object is garbage collected. For example:

In [0]:
x = tf.constant(3.0)
with tf.GradientTape(persistent=True) as t:
  t.watch(x)
  y = x * x
  z = y * y
dz_dx = t.gradient(z, x)  # 108.0 (4*x^3 at x = 3)
dy_dx = t.gradient(y, x)  # 6.0
del t  # Drop the reference to the tape

### Recording control flow

Because tapes record operations as they are executed, Python control flow (using `if`s and `while`s for example) is naturally handled:

In [0]:
def f(x, y):
  output = 1.0
  for i in range(y):
    if i > 1 and i < 5:
      output = tf.multiply(output, x)
  return output

def grad(x, y):
  with tf.GradientTape() as t:
    t.watch(x)
    out = f(x, y)
  return t.gradient(out, x)

x = tf.convert_to_tensor(2.0)

assert grad(x, 6).numpy() == 12.0
assert grad(x, 5).numpy() == 12.0
assert grad(x, 4).numpy() == 4.0


### Higher-order gradients

Operations inside of the `GradientTape` context manager are recorded for automatic differentiation. If gradients are computed in that context, then the gradient computation is recorded as well. As a result, the exact same API works for higher-order gradients as well. For example:

In [0]:
x = tf.Variable(1.0)  # Create a Tensorflow variable initialized to 1.0

with tf.GradientTape() as t:
  with tf.GradientTape() as t2:
    y = x * x * x
  # Compute the gradient inside the 't' context manager
  # which means the gradient computation is differentiable as well.
  dy_dx = t2.gradient(y, x)
d2y_dx2 = t.gradient(dy_dx, x)

assert dy_dx.numpy() == 3.0
assert d2y_dx2.numpy() == 6.0

# Tensorflow Neural Network Exampe

In [0]:
import tensorflow as tf
mnist = tf.keras.datasets.mnist

(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

In [0]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation=tf.nn.relu),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(x_train, y_train, epochs=5)

In [0]:
model.evaluate(x_test, y_test)