In [None]:
import os.path
import re

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

from mpl_toolkits.mplot3d import Axes3D
from sklearn.linear_model import LinearRegression

import helpers

%matplotlib inline

We're going to toy around with [UCI Wine Quality data set](https://archive.ics.uci.edu/ml/datasets/Wine+Quality). The data  The below code downloads the data and info file to the correct directory (`data/02/`)

In [None]:
# Download dataset
data_dir = 'data/02'
helpers.mkdir(data_dir)
data_path = helpers.download('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv', 
                             os.path.join(data_dir, 'winequality-white.csv'))
names_path = helpers.download('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality.names',
                              os.path.join(data_dir, 'winequality.names'))

The main data 

In [None]:
# Load data in to Pandas, shuffling it along the way
data = pd.read_csv(data_path, sep=';')
data = data.reindex(np.random.permutation(data.index))
data.head()

In [None]:
# Split data into x inputs and y labels
x = data.iloc[:, :11].as_matrix()
y = data.iloc[:, -1].as_matrix()
x = x.astype(np.float32)
y = y.astype(np.float32)

## 1. Split data into training, validation, and test sets

* Use a 60/20/20 percentage split between the datasets

In [None]:
num_rows = len(data)
train_split = 0.6
valid_split = 0.2

# Number of examples in training set
num_train = int((num_rows * train_split) // 1)
# Number of examples in validation set
num_valid = int((num_rows * valid_split) // 1)
# Number of examples in test set 
# = num_rows - num_train - num_valid

# Training data inputs
train_data = x[:num_train, :]
# Training data labels
train_labels = y[:num_train]
# Validation data inputs
valid_data = x[num_train:num_train+num_valid, :]
# Validation data labels
valid_labels = y[num_train:num_train+num_valid]
# Test data inputs
test_data = x[num_train+num_valid:, :]
# Test data labels
test_labels = y[num_train+num_valid:]

In [None]:
# Sanity checking that the size of corresponding input and label data are the same size
assert len(train_data) == len(train_labels), 'Train data input/label size mismatch'
assert len(valid_data) == len(valid_labels), 'Validation data input/label size mismatch'
assert len(test_data) == len(test_labels), 'Test data input/label size mismatch'

# Print out number of training examples
print('Number of training examples: \t{}'.format(len(train_data)))
print('Number of validation examples: \t{}'.format(len(valid_data)))
print('Number of test examples: \t{}'.format(len(test_data)))

## 2. Create a basic linear model

* Your weights should be initialized with `tf.truncated_normal()`
* Your bias should be initialized to zero

In [None]:
# Simple graph, nothing fancy
graph = tf.Graph()
with graph.as_default():
    with tf.name_scope('inputs'):
        inputs = tf.placeholder(tf.float32, [None, 11], name='inputs')
        labels = tf.placeholder(tf.float32, [None], name='labels')
        learning_rate = tf.placeholder(tf.float32, name='learning_rate')
    with tf.name_scope('model'):
        w = tf.Variable(tf.truncated_normal([11, 1], stddev=0.01), name='w')
        b = tf.Variable(tf.zeros([]), name='b')
        y_hat = tf.matmul(inputs, w) + b
    with tf.name_scope('loss'):
        error = tf.reduce_mean(tf.square(y_hat - labels), name='MSE')
        train = tf.train.GradientDescentOptimizer(learning_rate).minimize(error)
    with tf.name_scope('global_step'):
        global_step = tf.Variable(0, trainable=False, name='inputs')
        inc_step = tf.assign_add(global_step, 1, name='increment_step')
    init = tf.global_variables_initializer()

## 3. Train your model

* You'll have to experiment with different learning rates
* Print out your loss information

In [None]:
# Create Session and initialize Variables
sess = tf.Session(graph=graph)
sess.run(init)

In [None]:
train_dict = {inputs: train_data, labels: train_labels, learning_rate: 0.000005}
valid_dict = {inputs: valid_data, labels: valid_labels}
test_dict = {inputs: test_data, labels: test_labels}

for i in range(500):
    err, step, _ = sess.run([error, inc_step, train], train_dict)
    if step % 50 == 0:
        # Get validation data loss
        v_err = sess.run(error, valid_dict)
        print('Step: {}\n\ttrain error:\t\t{}\n\tvalidation error:\t{}'.format(step, err, v_err))

In [None]:
# When satisfied with model, close Session
sess.close()

## 4. Soup it up

Let's add some TensorBoard features:

* Add a scalar summary for your loss
* Open up a FileWriter to save summaries to disk
* Periodically write summary data to disk
* Add some name scopes if you haven't already!

In [None]:
# Fancier graph
fancy_graph = tf.Graph()
with fancy_graph.as_default():
    with tf.name_scope('inputs'):
        inputs = tf.placeholder(tf.float32, [None, 11], name='inputs')
        labels = tf.placeholder(tf.float32, [None], name='labels')
        learning_rate = tf.placeholder(tf.float32, name='learning_rate')
    with tf.name_scope('model'):
        w = tf.Variable(tf.truncated_normal([11, 1], stddev=0.01), name='w')
        b = tf.Variable(tf.zeros([]), name='b')
        y_hat = tf.matmul(inputs, w) + b
    with tf.name_scope('loss'):
        error = tf.reduce_mean(tf.square(y_hat - labels), name='MSE')
        train = tf.train.GradientDescentOptimizer(learning_rate).minimize(error)
    with tf.name_scope('global_step'):
        global_step = tf.Variable(0, trainable=False, name='inputs')
        inc_step = tf.assign_add(global_step, 1, name='increment_step')

    error_summ = tf.summary.scalar('error_summary', error)
    w_summ = tf.summary.histogram('w_summary', w)
    b_summ = tf.summary.scalar('b_summary', b)
    summary_op = tf.summary.merge_all()
    init = tf.global_variables_initializer()

In [None]:
# Create Session and initialize Variables
sess = tf.Session(graph=fancy_graph)
sess.run(init)

In [None]:
tb_base_path = 'tbout/02_lab_solution'
i = 0
tb_path = os.path.join(tb_base_path, str(i))
while os.path.exists(tb_path) and os.path.isdir(tb_path):
    i += 1
    tb_path = os.path.join(tb_base_path, str(i))

# Open a FileWriter to create TensorBoard summaries
train_writer = tf.summary.FileWriter(os.path.join(tb_path, 'training'), graph=fancy_graph)
valid_writer = tf.summary.FileWriter(os.path.join(tb_path, 'validation'))

train_dict = {inputs: train_data, labels: train_labels, learning_rate: 0.000005}
valid_dict = {inputs: valid_data, labels: valid_labels}
test_dict = {inputs: test_data, labels: test_labels}

for i in range(500):
    err, step, summaries, _ = sess.run([error, inc_step, summary_op, train], train_dict)
    if step % 50 == 0 or step == 1:
        # Get validation data loss
        v_summary = sess.run(error_summ, valid_dict)
        train_writer.add_summary(summaries, step)
        valid_writer.add_summary(v_summary, step)

In [None]:
sess.close()
train_writer.close()
valid_writer.close()

Start tensorboard by navigating to the directory holding this notebook and running this command:

```
tensorboard --logdir=tbout/02_lab_solution/
```