In [2]:
import torch
import torch.nn as nn

torch.manual_seed(0)
vec_example_1 = torch.randn(4)
vec_example_2 = torch.rand(4)
vec_example_3 = torch.zeros(4)
vec_example_4 = torch.ones(4)
vec_example_5 = torch.randint(8, (4,))
mat_example = torch.randn((4,4))
print('Gaussian random vector:', vec_example_1)
print('Unifom random vector (in [0,1]):', vec_example_2)
print('All zero vector:', vec_example_3)
print('All one vector:', vec_example_4)
print('Uniform random integer vector from 0-7:', vec_example_5)
print('Gaussian random matrix:', mat_example)

torch.manual_seed(0)
feat_dim = 4
w = torch.randn(feat_dim+1)
x = torch.cat((torch.ones(1), torch.randn(feat_dim)), dim=0) # Concatenate a 1 element to, i.e. x=[1,x1,x2,x3,x4]
pred = torch.dot(x,w)
print('Weight:', w)
print('Input:', x)
print('Prediction:', pred)

feat_dim = 4
num_train = 8
X = torch.cat((torch.ones(num_train, 1), torch.randn(num_train, feat_dim)), dim=1)
print(X.shape)



pred = torch.mv(X, w) # mv stands for "matrix-vector product"
print(pred, pred.shape)


torch.manual_seed(0)
target = torch.randn(num_train)
print('Prediction:', pred)
print('Target:', target)
print('Difference:', pred-target)


torch.manual_seed(0)
feat_dim = 4
w = torch.randn(feat_dim+1)
x = torch.cat((torch.ones(1), torch.randn(feat_dim)), dim=0)
pred = torch.dot(x,w)
target = torch.randn(1)

grad_w = (target-pred)*(-x)
print('Weight:', w)
print('Gradient:', grad_w)
print('Weight shape:', w.shape)
print('Gradient shape:', grad_w.shape)

pred = torch.dot(x,w)
print('Prediction:', pred)
print('Target:', target)
print('Difference before training:', pred-target)
num_iter = 1000
alpha = 1e-2

for _ in range(num_iter):
    pred = torch.dot(x,w)
    grad_w = (target-pred)*(-x)
    w = w - alpha*grad_w

pred = torch.dot(x,w)
print('Prediction:', pred)
print('Target:', target)
print('Difference after training:', pred-target)

Gaussian random vector: tensor([ 1.5410, -0.2934, -2.1788,  0.5684])
Unifom random vector (in [0,1]): tensor([0.4556, 0.6323, 0.3489, 0.4017])
All zero vector: tensor([0., 0., 0., 0.])
All one vector: tensor([1., 1., 1., 1.])
Uniform random integer vector from 0-7: tensor([4, 7, 6, 0])
Gaussian random matrix: tensor([[-1.3527, -1.6959,  0.5667,  0.7935],
        [ 0.5988, -1.5551, -0.3414,  1.8530],
        [ 0.7502, -0.5855, -0.1734,  0.1835],
        [ 1.3894,  1.5863,  0.9463, -0.8437]])
Weight: tensor([ 1.5410, -0.2934, -2.1788,  0.5684, -1.0845])
Input: tensor([ 1.0000, -1.3986,  0.4033,  0.8380, -0.7193])
Prediction: tensor(2.3290)
torch.Size([8, 5])
tensor([ 2.5498,  2.2794, -0.8701, -0.3941,  1.0529,  3.2605, -2.7426,  0.3267]) torch.Size([8])
Prediction: tensor([ 2.5498,  2.2794, -0.8701, -0.3941,  1.0529,  3.2605, -2.7426,  0.3267])
Target: tensor([ 1.5410, -0.2934, -2.1788,  0.5684, -1.0845, -1.3986,  0.4033,  0.8380])
Difference: tensor([ 1.0088,  2.5728,  1.3087, -0.9626, 

In [3]:
torch.manual_seed(0)
feat_dim = 4
num_train = 8

w = torch.randn(feat_dim+1)
X = torch.cat((torch.ones(num_train, 1), torch.randn(num_train, feat_dim)), dim=1) # matrix
pred = torch.mv(X, w)
target = torch.randn(num_train)

grad_w_1 = torch.zeros(5)

# note these two are equivalent
for iii in range(num_train):
    grad_w_1 = grad_w_1+(target[iii]-pred[iii])*(-X[iii])

    
# note these two are equivalent
grad_w_2 = -torch.mv(X.t(), target-pred)

print('Weight:', w)
print('Gradient from direct implementation:', grad_w_1)
print('Gradient from matrix multiplication:', grad_w_2)
print('Weight shape:', w.shape)
print('Gradient shape:', grad_w_1.shape, grad_w_2.shape)

Weight: tensor([ 1.5410, -0.2934, -2.1788,  0.5684, -1.0845])
Gradient from direct implementation: tensor([ 14.9825,  -3.1721, -12.5810,  -2.1126,  -9.2198])
Gradient from matrix multiplication: tensor([ 14.9825,  -3.1721, -12.5810,  -2.1126,  -9.2198])
Weight shape: torch.Size([5])
Gradient shape: torch.Size([5]) torch.Size([5])
