## 1 With sympy

In [428]:
from sympy import *
import numpy as np

# instances
instances = [
    [0.5, 1, 1],
    [1, 0, 0],
    [2, 0.5, 1]
]

# define symbols (latex notation)
theta_0, theta_1, theta_2 = symbols('\\theta_0 \\theta_1 \\theta_2')
x_1, x_2 = symbols('x_1 x_2')

# define functions
H = theta_0 +  theta_1 * x_1 + theta_2 * x_2  # target function
H_0 = symbols('H_0')  # prediction
SE = (H - H_0) ** 2  # squared error
MSE = sum(map(lambda v: SE.subs({'x_1': v[0], 'x_2': v[1], 'H_0': v[2]})/3, instances))  # mean squared error

# calculate partial derivatives
diffs = list(map(lambda theta: diff(MSE, theta), [theta_0, theta_1, theta_2]))

# initial assignment
thetas = np.array([0.5, 0.5, 0.5])
eta = 1

# gradient descent
def gd():
    global thetas

    sub = {
        '\\theta_0': thetas[0],
        '\\theta_1': thetas[1],
        '\\theta_2': thetas[2],
    }  # substitution map

    print("thetas   : " + str(thetas))
    print("loss     : " + str(MSE.subs(sub)))

    grads = np.array(list(map(lambda diff: diff.subs(sub), diffs)))
    thetas = thetas - eta * grads
    print("gradients: " + str(grads))


In [432]:
from sympy.interactive import printing
from IPython.display import Math

Math(
    f"""
    H = {printing.default_latex(H)}         (1)\\\\
    J = {printing.default_latex(MSE)}       (2)\\\\
    \\frac{{\partial J}}{{\\partial \\theta_0}} = {printing.default_latex(diffs[0])}    (3)\\\\
    \\frac{{\partial J}}{{\\partial \\theta_1}} = {printing.default_latex(diffs[1])}    (4)\\\\
    \\frac{{\partial J}}{{\\partial \\theta_2}} = {printing.default_latex(diffs[2])}    (5)\\\\
    """
)


<IPython.core.display.Math object>

In [429]:
# can run this multiple time
# n=1 is too high and will diverge

gd()

thetas   : [0.5 0.5 0.5]
loss     : 0.541666666666667
gradients: [1.33333333333333 1.75000000000000 0.416666666666667]


## 2 With Pytorch Autograd

In [1]:
import torch

In [87]:
W = torch.tensor([0.5, 0.5, 0.5], requires_grad=True)
X = torch.tensor(
    [[1, 0.5, 1],
     [1, 1, 0],
     [1, 2, 0.5]]
)
Y = torch.tensor([1, 0, 1])

def gd_autograd(eta = 0.1):
    global W, X, Y

    print(f"W: {W}")

    # calculate loss
    Y_hat = torch.matmul(X, W)
    loss = torch.square(Y - Y_hat).sum() / 3
    print(f"Loss: {loss}")
    
    # gradient descent
    loss.backward()
    update = - eta * W.grad
    print(f"Update: {update}")

    # update
    W = W + update


In [88]:
gd_autograd(eta=1)  # TODO: this call only be called once

W: tensor([0.5000, 0.5000, 0.5000], requires_grad=True)
Loss: 0.5416666865348816
Update: tensor([-1.3333, -1.7500, -0.4167])
