Importing

In [None]:
import torch
from torch.optim import Adamax
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')


Function to Model

In [None]:
#f
def f(x):
    y = -5*x**3 + x**2 + x + 2
    return y

#f'
def f_1(x):
    y = -15*x**2 + 2*x + 1
    return y


Sampling from defined function

In [None]:
# number of samples
num = 3
# size of sample vector and input layer
len = 10
# using a linear offset to obtain the upper bound allows for the same step size for all sample vectors
offset = 10
step = offset/(len-1)

def sample():
    l = np.random.rand()*-5
    u = l+offset
    new = np.linspace(l, u, len)
    return new

new_sample = sample()
X_sample = np.expand_dims(new_sample, axis=0)

# each row is a sample
for i in range(num-1):
    new_sample = [sample()]
    X_sample = np.append(X_sample, new_sample, axis=0)

Y_sample =  f(X_sample)

X_sample = torch.from_numpy(X_sample).float()
Y_sample = torch.from_numpy(Y_sample).float()

Defining the Model, Loss and Optimizer

In [None]:
# len i/o nodes, i_len nodes per layer and ELU activation fn
i_len = 30

model = nn.Sequential(
    nn.Linear(len, i_len),
    nn.ELU(),
    nn.Linear(i_len, i_len),
    nn.ELU(),
    nn.Linear(i_len, i_len),
    nn.ELU(),
    nn.Linear(i_len, len)
    )

# mean squared error as loss fn
loss_fn = nn.MSELoss()

# choose optimizer
optim = Adamax(model.parameters())


Graphing model accuracy

In [None]:
def plot(l,u,t):
    
    # steps required to maintain same step value as samples used for training
    n_steps = ( (u-l)/step ) + 1
    
    # inform user if steps required is not a whole number
    if not n_steps.is_integer():
        print(f'Number of steps required is not an integer: {n_steps}')
    n_steps = int(n_steps)

    # create array with plotting range
    X_plot = np.linspace(l,u,n_steps)
    print(X_plot)
    Y_plot = f(X_plot)

    # create arrays to test model and compare outputs
    X_temp = torch.from_numpy(X_plot).float()
    Y_temp = torch.from_numpy(Y_plot).float()

    Y_preds = model(X_temp)
    Y_pred_plot = torch.reshape(Y_preds, (-1,)).detach().numpy()

    loss_plot = loss_fn(Y_preds, Y_temp)
    
    plt.plot(X_plot, Y_plot, X_plot, Y_pred_plot)
    plt.legend(['Function to Model','MLP Approximation'])

    title = t + f"Epoch {epoch}, Loss {loss_plot:.2E}"
    plt.title(title)
    plt.show()
    plt.clf()

Training the Model

In [None]:
n_epochs = 2501
#batch_size = num

for epoch in range(n_epochs):
    y_pred = model(X_sample)
    loss = loss_fn(y_pred, Y_sample)
    optim.zero_grad()
    loss.backward()
    optim.step()
    if not (epoch%500):
        l = -5
        plot(l,l+offset,'')
    #print(f'Finished epoch {epoch}, latest loss {loss}')

plot(-10,10,'Plotting beyond training sample range, ')

Plotting first derivative using autograd

In [None]:
def plot_f1(l,u,t):
    n_step = 1000

    X_plot = np.linspace(l,u,n_step)
    Y_plot = f_1(X_plot)

    #to obtain grads, requires_grad must be set to true
    X_temp = torch.linspace(l, u, steps=n_step, requires_grad=True).view(-1,1)
    Y_temp = torch.from_numpy(Y_plot).float().view(-1,1)

    #change y preds to deriv
    Y_preds = model(X_temp)
    Y_pred_plot = torch.reshape(Y_preds, (-1,)).detach().numpy()

    #differentiate output w.r.t. input, to grad_output order. Retaining graph allows the graph to be stored and not recalculated to be used subsequently.
    grads = torch.autograd.grad(outputs=Y_preds, inputs=X_temp, grad_outputs=torch.ones_like(Y_preds), retain_graph=True)[0]
    grad_plot = torch.reshape(grads, (-1,)).detach().numpy()

    loss_plot = loss_fn(grads, Y_temp)
    
    plt.plot(X_plot, Y_plot, X_plot, grad_plot)
    title = t + f"Epoch {epoch}, Loss {loss_plot:.2E}"
    plt.title(title)
    plt.legend(['Function to Model','MLP Approximation'])
    plt.show()
    plt.clf()

In [None]:
plot_f1(0,10,'First Derivatives Compared, ')
plot_f1(-15,15,'First Derivatives outside training range, ')