# Exercise 1. 

Write a Python script to implement the backpropagation algorithm for a 1− $S^{1}$−1 network. 

Write the program using matrix operations. Choose the initial weights and biases to be random numbers *uniformly* distributed between -0.5 and 0.5, and train the network to approximate the function:

$g(p) = e^{-\vert p \vert}\sin(\pi p)$ for $−2 \leq p \leq 2$

Use $S^{1}=2$ and $S^{1}=10$. Experiment with several different values for the learning rate, $\alpha$, and use several different initial conditions. Discuss the convergence properties of the algorithm as the learning rate changes.

Plot the trained networks with the network outputs. Compare them. Check the squared error for each epochs.

In [None]:
# import required packages
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline

In [None]:
# define the transfer functions we need for our network

# logsig transfer function
def logsig(n):
    return 1 / (1 + np.exp(-n))

# purelin transfer function
def purelin(n):
    return n

In [None]:
# define the gradients of the transfer functions

# logsig transfer function
def logsig_grad(a): 
    return (1 - a) * a

# purelin transfer function
def purelin_grad(a): 
    return 1

In [None]:
# generate 100 linearly spaced inputs in the range [-2, 2]
p = 
# make the inputs 2D so we can specify input dimensions
p = p.reshape(-1,1)

In [None]:
# compute targets by using function to approximate, g(p)
g = 

In [None]:
# plot the function we want to approximate

plt.figure(figsize=(8,6))
plt.plot(p, g, label='$g(p)$')
plt.xlabel('p')
plt.ylabel('$g(p)$')
plt.title('Function to approximate')
plt.show()

**Recall.** When we have a 3-layer Perceptron, we say it is an $R-S^{1}-S^{2}-S^{3}$ network. Since our network is a $1-S^{1}-1$ network, we know $R=1$ and $S^{2}=1$. We let $S^{1}$ fluctuate 

In [None]:
# specify number of features: R
R = 

## Part 1. $S^{1} = 2$

In [None]:
# specify number of neurons in the hidden layer: S1
S1 = 
# specify number of neurons in the output layer: S2
S2 = 

In [None]:
# set the seed
np.random.seed(0)

# randomly initialize weights
W1 = 
W2 = 
# randomly initialize bias
b1 = 
b2 = 

In [None]:
# learning rate, alpha
alpha = 0.025
# epochs
epochs = 1000

In [None]:
# initialize a zero vector of errors for each output-target

# error vector for each input
e = np.zeros(len(g))
# error vector for MSE in each epoch
MSE = np.zeros(shape=(1,epochs))

### Train the network

In [None]:
# ----------- Train the network -----------

# for each epoch 
for epoch in range(epochs):
    # for each input
    for i in range(p.shape[0]):
        # 1) propagate input forward
        n1 = 
        a1 = 
        n2 = 
        a2 = 
        # 2) calculate the error
        # difference between g(p_{i}) and a2 (network output)
        e[i] = g[i] - a2
        # 3) Backprop sensitivities
        g_logsig = 
        f1_dot = np.diagflat(g_logsig)
        f2_dot = 
        s2 = 
        s1 = 
        # 4) Update weight and bias
        W2 = 
        W1 = 
        b2 = 
        b1 = 
    MSE[0, epoch] = e.transpose().dot(e)

In [None]:
# plot the mean square error (MSE)

plt.figure(figsize=(8,6))
plt.plot(np.arange(1,epochs+1), MSE[0,:], ls='-')
plt.title('Mean Square Error by Epoch, S = 2')
plt.xlabel('Epoch')
plt.ylabel('Mean Square Error')
plt.show()

In [None]:
# empty vector for trained network output
a_S2 = np.ones(shape=(p.shape))

# calculate predicted outputs from the trained network
for i in range(p.shape[0]):
    n1 = 
    a1 = 
    n2 = 
    a_S2[i] = 

In [None]:
plt.figure(figsize=(8,6))
plt.plot(p, g, label='Actual', color='green')
plt.plot(p, a_S2, label='Predicted', color='blue')
plt.title("Network Approximation, S = 2")
plt.xlabel("p")
plt.ylabel("g(p)")
plt.xlim((-2,2))
plt.ylim((-0.67, 0.67))
plt.legend(loc='best')
plt.show()

## Part 1. $S^{1} = 10$

In [None]:
# number of neurons in the hidden layer
S1 = 
# number of neurons in the output layer
S2 = 

In [None]:
# set the seed
np.random.seed(0)

# randomly initialize weights
W1 = 
# randomly initialize bias
b1 = 
W2 = 
b2 = 

In [None]:
# initialize a zero vector of errors for each output-target

# error vector for each input
e = np.zeros(len(g))
# error vector for MSE in each epoch
MSE = np.zeros(shape=(1,epochs))

In [None]:
# ----------- Train the network -----------

# for each epoch 
for epoch in range(epochs):
    # for each input
    for i in range(p.shape[0]):
        # 1) propagate input forward
        n1 = 
        a1 = 
        n2 = 
        a2 = 
        # 2) calculate the error
        # difference between g(p_{i}) and a2 (network output)
        e[i] = 
        # 3) Backprop sensitivities
        g_logsig = 
        f1_dot = 
        f2_dot = 
        s2 = 
        s1 = 
        # 4) Update weight and bias
        W2 = 
        W1 = 
        b2 = 
        b1 = 
    MSE[0, epoch] = 

In [None]:
plt.figure(figsize=(8,6))
plt.plot(np.arange(1,epochs+1), MSE[0,:], ls='-')
plt.title('Mean Square Error by Epoch, S = 10')
plt.xlabel('Epoch')
plt.ylabel('Mean Square Error')
plt.show()

In [None]:
# empty vector for trained network output
a_S10 = np.ones(shape=(p.shape))

# calculate network output
for i in range(p.shape[0]):
    n1 = 
    a1 = 
    n2 = 
    a_S10[i] = 

In [None]:
plt.figure(figsize=(8,6))
plt.plot(p, g, label='Actual', color='green')
plt.plot(p, a_S2, label='1-S2-1 Network', color='blue')
plt.plot(p, a_S10, label='1-S10-1 Network', color='red')
plt.title("Network Approximation, S = 10")
plt.xlabel("p")
plt.ylabel("g(p)")
plt.xlim((-2,2))
plt.ylim((-0.67, 0.67))
#plt.tight_layout()
plt.legend(loc='best')
plt.show()