In [None]:
import numpy as np
import torch

In [188]:
x_1dim = np.arange(16).reshape(16, 1)
x_4dim = np.arange(16 * 4).reshape(-1, 4)

x_1dim_torch = torch.tensor(x_1dim, dtype=torch.float32, requires_grad=True)
x_4dim_torch = torch.tensor(x_4dim, dtype=torch.float32, requires_grad=True)

In [189]:
x_1dim_torch.shape, x_4dim_torch.shape

(torch.Size([16, 1]), torch.Size([16, 4]))

In [190]:
y_1dim = np.arange(16)
y_3dim = np.arange(16 * 3).reshape(16, 3)

y_1dim_torch = torch.tensor(y_1dim, dtype=torch.float32)
y_3dim_torch = torch.tensor(y_3dim, dtype=torch.float32)

In [191]:
y_1dim_torch.shape, y_3dim_torch.shape

(torch.Size([16]), torch.Size([16, 3]))

In [192]:
w_1_1_dim = np.array([1.3])
w_4_1_dim = np.array([1.3, 1.6, 1.8, 2.0])
w_1_3_dim = np.array([[1.3, 1.3, 1.3]])
w_4_3_dim = np.vstack([w_4_1_dim, w_4_1_dim, w_4_1_dim]).T

w_1_1_dim_torch = torch.tensor(w_1_1_dim, dtype=torch.float32, requires_grad=True)
w_4_1_dim_torch = torch.tensor(w_4_1_dim, dtype=torch.float32, requires_grad=True)
w_1_3_dim_torch = torch.tensor(w_1_3_dim, dtype=torch.float32, requires_grad=True)
w_4_3_dim_torch = torch.tensor(w_4_3_dim, dtype=torch.float32, requires_grad=True)

In [193]:
w_1_1_dim_torch.shape, w_4_1_dim_torch.shape, w_1_3_dim_torch.shape, w_4_3_dim_torch.shape

(torch.Size([1]), torch.Size([4]), torch.Size([1, 3]), torch.Size([4, 3]))

In [221]:
from torch import nn
def mse(X, Y, w):
    loss = nn.MSELoss()
    output = loss(X @ w, Y)
    output.backward()
    dev = np.array(w.grad)
    w.grad.data.zero_()
    X.grad.data.zero_()
    return dev, output.detach().numpy()

def my_mse(X, Y, w):
    loss = np.mean((X.dot(w) - Y)**2)
    dev = 2 / Y.size * X.T @ (X.dot(w) - Y)
    return dev, loss

In [245]:
setup_np = [
    (x_1dim, y_1dim, w_1_1_dim),
    (x_1dim, y_3dim, w_1_3_dim),
    (x_4dim, y_1dim, w_4_1_dim),
    (x_4dim, y_3dim, w_4_3_dim),
]
setup_torch = [
    (x_1dim_torch, y_1dim_torch, w_1_1_dim_torch),
    (x_1dim_torch, y_3dim_torch, w_1_3_dim_torch),
    (x_4dim_torch, y_1dim_torch, w_4_1_dim_torch),
    (x_4dim_torch, y_3dim_torch, w_4_3_dim_torch), 
    #16x4, 16x3, 4x3 (XW - y) ** 2 -> 16x3 sum() / 16*3
    # 4x3: 2 * (XW - Y) * X; 16x3 16x4 -> 16x4.T * 16x3 -> 4x3 
]

In [223]:
for np_setup, torch_setup in zip(setup_np, setup_torch):
    expected = mse(*torch_setup)
    actual = my_mse(*np_setup)
    assert np.allclose(expected[0], actual[0])
    assert np.allclose(expected[1], actual[1])

In [234]:
def mae(X, Y, w):
    loss = nn.L1Loss()
    output = loss(X @ w, Y)
    output.backward()
    dev = np.array(w.grad)
    w.grad.data.zero_()
    X.grad.data.zero_()
    return dev, output.detach().numpy()

def my_mae(X, Y, w):
    loss = np.mean(np.abs(X.dot(w) - Y))
    sign = np.sign(X.dot(w) - Y)
    dev = X.T @ sign / Y.size
    return dev, loss

In [236]:
for np_setup, torch_setup in zip(setup_np, setup_torch):
    expected = mae(*torch_setup)
    actual = my_mae(*np_setup)
    assert np.allclose(expected[0], actual[0])
    assert np.allclose(expected[1], actual[1])

In [238]:
def L2_reg(w):
    output = torch.sum(w ** 2)
    output.backward()
    dev = np.array(w.grad)
    w.grad.data.zero_()
    return dev

def my_L2_reg(w):
    return 2 * w

In [242]:
for np_setup, torch_setup in zip(setup_np, setup_torch):
    expected = L2_reg(torch_setup[2])
    actual = my_L2_reg(np_setup[2])
    assert np.allclose(expected, actual)

In [243]:
def L1_reg(w):
    output = torch.sum(torch.abs(w))
    output.backward()
    dev = np.array(w.grad)
    w.grad.data.zero_()
    return dev

def my_L1_reg(w):
    return np.sign(w)

In [244]:
for np_setup, torch_setup in zip(setup_np, setup_torch):
    expected = L1_reg(torch_setup[2])
    actual = my_L1_reg(np_setup[2])
    assert np.allclose(expected, actual)