# Tensorflow multiple linear regression

This smaple implement tensorflow higher API ```tf.keras``` and adopt ```tensorboard``` for multiple feature linear regression. Single node neural network is used to represent $\ f(x) = Wx+b$. Here $W$ is denoted as a high weight matrix and $x$ represent input data which has many features.

![Single node neural network](./img/single_node_nn.jpeg)

## Reference 
https://www.tensorflow.org/tutorials/keras/regression

https://donaldpinckney.com/books/tensorflow/book/ch2-linreg/2018-03-21-multi-variable.html

https://www.cs.cmu.edu/afs/cs.cmu.edu/academic/class/15381-s06/www/nn.pdf

https://medium.com/@rajatgupta310198/getting-started-with-neural-network-for-regression-and-tensorflow-58ad3bd75223

https://keras.io/layers/core/



In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals

# load tensorboard visualization tool
%load_ext tensorboard

import datetime, os
import pathlib
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers

from tensorflow import keras
from tensorflow.keras import layers

In [16]:
'''
  Download Dataset (to '/Users/10yung/.keras/datasets/auto-mpg.data' ) and set the column name
'''

dataset_path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")

column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
                'Acceleration', 'Model Year', 'Origin']
raw_dataset = pd.read_csv(dataset_path, names=column_names,
                      na_values = "?", comment='\t',
                      sep=" ", skipinitialspace=True)

dataset = raw_dataset.copy()

In [17]:
''' 
  Clean na data
'''

dataset.isna().sum()
dataset = dataset.dropna()

'''
  Convert category to one-hot
'''

origin = dataset.pop('Origin')
dataset['USA'] = (origin == 1)*1.0
dataset['Europe'] = (origin == 2)*1.0
dataset['Japan'] = (origin == 3)*1.0

In [18]:
'''
  Split dataset into training and testing
'''

train_dataset = dataset.sample(frac=0.8,random_state=0)
test_dataset = dataset.drop(train_dataset.index)
# print(train_dataset.shape)
# print(test_dataset.shape)

'''
  Overall statistic
'''

train_stats = train_dataset.describe()
train_stats.pop("MPG")
train_stats = train_stats.transpose()

'''
  Split features from labels
'''

train_labels = train_dataset.pop('MPG')
test_labels = test_dataset.pop('MPG')

In [19]:
'''
  Normalize data
'''

def norm(x):
    return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)

In [20]:
'''
  Initialize layers parameter
'''

def build_model():
    #     
    model = keras.Sequential([
        layers.Dense(1, activation = 'linear', input_dim = train_dataset.shape[1], use_bias=True)
    ])

    optimizer = tf.keras.optimizers.SGD(0.001)

    model.compile(loss='mse',
                optimizer=optimizer,
                metrics=['mae', 'mse'])

    return model

model = build_model()

print(model.summary())

# Create tensorboard meta data
log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 1)                 10        
Total params: 10
Trainable params: 10
Non-trainable params: 0
_________________________________________________________________
None


In [21]:

'''
  Train the model
'''

# Display training progress by printing a single dot for each completed epoch
class PrintDot(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        if epoch % 100 == 0: print('')
        print('.', end='')

EPOCHS = 1000

history = model.fit(
  normed_train_data, train_labels,
  epochs=EPOCHS, validation_split = 0.2, verbose=0,
  callbacks=[PrintDot(), tensorboard_callback])



....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
..........................................................................................

In [22]:
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
print(hist.tail())


loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=2)
print("Testing set Mean Abs Error: {:5.2f} MPG".format(mae))

          loss       mae        mse   val_loss   val_mae    val_mse  epoch
995  11.423806  2.552771  11.423806  10.086657  2.421389  10.086658    995
996  11.418942  2.551073  11.418943  10.085598  2.421243  10.085598    996
997  11.420496  2.551867  11.420497  10.084535  2.421192  10.084535    997
998  11.422082  2.551832  11.422081  10.085114  2.421384  10.085114    998
999  11.418926  2.551052  11.418927  10.084929  2.421315  10.084929    999
78/78 - 0s - loss: 10.7831 - mae: 2.4015 - mse: 10.7831
Testing set Mean Abs Error:  2.40 MPG


In [3]:
%tensorboard --logdir logs/fit