### Import the required dependencies is the first step
we are going to use pytorch

In [None]:
import torch
import torch.nn.functional as F

In [None]:
x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1)  # x data (tensor), shape=(100, 1)
y = x.pow(2) + 0.2*torch.rand(x.size())                 # noisy y data (tensor), shape=(100, 1)

In [None]:
print(x.min(), x.max(), x.size())

### create a network class

What we would like to test is if a simple multi-layer perceptron (MLP) can approximate the y data!

for that we need an input neuron, some hidden neurons (how many?), and an output neuron

''torch.nn'' provides some connection and activation functions for us

In [None]:
#create a neural network
class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()
        self.hidden = ## we want a standard (linear) layer here
        self.predict = ## and here

    def forward(self, x):
        x = ## self.hidden needs to be activated
        x = ## this is just self.predict...
        return x

In [None]:
net = Net(n_feature=1, n_hidden=10, n_output=1)     # define the network
print(net)

In [None]:
## we then need to define our loss 
## and our optimizer

In [None]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.2)
loss_func = torch.nn.MSELoss()  # this is for regression mean squared loss

### how does the random network predict the data we have created

In [None]:
prediction = net(x)     # input x and predict based on x

In [None]:
import matplotlib.pyplot as plt # make sure it's installed!

In [None]:
plt.cla()
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
plt.show()

### train the network with SGD

In [None]:
for t in range(FORHOWLONG):
    # forward pass 
    to add here

    # loss 
    loss = loss_func(prediction, y)     # must be (1. nn output, 2. target)

    optimizer.zero_grad()   # clear gradients for next train
    
    # back prop
    loss.backward()         # backpropagation, compute gradients
    optimizer.step()        # apply gradients

print(loss)

In [None]:
# plt.scatter(x.data.numpy(), y.data.numpy())
# plt.show()


In [None]:
plt.cla()
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
plt.text(0.5, 0, 'Loss=%.4f' % loss.data.numpy(), fontdict={'size': 20, 'color':  'red'})

### What's the network size?
can you find out?

In [None]:
net.hidden.weight[3].add(0.2)

In [None]:
# does this change anything?
    

## How do different networks approximate it?

In [None]:
net2 = torch.nn.Sequential(
    torch.nn.Linear(1, 10),
    torch.nn.ReLU(),
    torch.nn.Linear(10, 1)
)

In [None]:
def train(network, iterations=100):
    optimizer = torch.optim.SGD(network.parameters(), lr=0.2)
    loss_func = torch.nn.MSELoss()  # this is for regression mean squared loss
    for t in range(iterations):
        prediction = network(x)     # input x and predict based on x
        loss = loss_func(prediction, y)     # must be (1. nn output, 2. target)

        optimizer.zero_grad()   # clear gradients for next train
        loss.backward()         # backpropagation, compute gradients
        optimizer.step()        # apply gradients
    

In [None]:
train(OUR_NETWORK)

In [None]:
plt.cla()
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), NETWORK(x).data.numpy(), 'r-', lw=5)