# Example 2.1: SGD of a linear neuron

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pylab as plt
from mpl_toolkits.mplot3d import Axes3D

import os
if not os.path.isdir('figures'):
	os.makedirs('figures')

In [None]:
# set parameters
no_epochs = 200
lr = 0.01

SEED = 10
np.random.seed(SEED)

In [None]:
# generate training data
X = 2*np.random.rand(6, 2) - 1
Y = np.dot(X, [2.53, -0.47]) + np.random.rand(6) - 0.5

print('X: {}'.format(X))
print('Y: {}'.format(Y))

In [None]:
# class for a linear neuron
class Linear():
  def __init__(self):
    self.w = tf.Variable(np.random.rand(2), dtype=tf.float64)
    self.b = tf.Variable(0., dtype=tf.float64)

  def __call__(self, x):
    return tf.tensordot(x ,self.w, axes=1) + self.b

In [None]:
# squared error as the loss function
def loss(predicted_y, target_y):
  return tf.square(predicted_y - target_y)

In [None]:
# funtion executing a training step
def train_step(model, x, d, learning_rate):
    y = model(x)
    grad_w = -(d - y)*x
    grad_b = -(d - y)
    model.w.assign(model.w - learning_rate * grad_w)
    model.b.assign(model.b - learning_rate * grad_b)

The main function to train the neuron starts here

In [None]:
model = Linear()
print('w: {}, b: {}'.format(model.w.numpy(), model.b.numpy()))

In [None]:
# keep an index for training
idx = np.arange(len(X))

err = []
for epoch in range(no_epochs):
  np.random.shuffle(idx)
  X, Y = X[idx], Y[idx]
    
  err_ = []
  for p in np.arange(len(X)):
    
    y_ = model(X[p])
    loss_ = loss(y_, Y[p])
    
    train_step(model, X[p], Y[p], learning_rate=lr)
    
    err_.append(loss_)
    
    if epoch == 0:
      print('iter: {}'.format(epoch+1))
      print('p: {}'.format(p+1))
      print('x:{}, d:{}'.format(X[p], Y[p]))
      print('y: {}'.format(y_))
      print('se: {}'.format(loss_))
      print('w: {}, b: {}'.format(model.w.numpy(), model.b.numpy()))
 
  err.append(np.mean(err_))
  if epoch%10 == 0:
        print('iter: %3d, mse: %1.4f'%(epoch, err[epoch]))

In [None]:
# print learned weights
print('w: %s, b: %s'%(model.w.numpy(), model.b.numpy()))

In [None]:
# print learning curve
plt.figure(1)
plt.plot(range(no_epochs), err)
plt.xlabel('epochs')
plt.ylabel('mse')
plt.savefig('./figures/2.1a_1.png')

In [None]:
pred = []
for p in np.arange(len(X)):
	pred.append(model(X[p]).numpy())

print(pred)

In [None]:
#print targets and predictions
fig = plt.figure(2)
ax = fig.gca(projection = '3d')
plot_original = ax.scatter(X[:,0], X[:,1], Y, c='blue', label='targets')
plot_pred = ax.scatter(X[:,0], X[:,1], pred, c='orange', label='predicted')
X1 = np.arange(-1, 1, 0.1)
X2 = np.arange(-1, 1, 0.1)
X1,X2 = np.meshgrid(X1,X2)
Z = model.w.numpy()[0]*X1 + model.w.numpy()[1]*X2 + model.b.numpy()
regression_plane = ax.plot_surface(X1, X2, Z)
ax.set_zticks([ -2, -1, 0, 1])
ax.set_xticks([-0.5, 0, 0.5])
ax.set_yticks([-0.5, 0, 0.5])
ax.set_xlabel('$x_1$')
ax.set_ylabel('$x_2$')
ax.set_zlabel('$y$')
plt.legend()
plt.savefig('./figures/2.1a_2.png')