In [1]:
# Importing the modules
%matplotlib inline
import torch
from torch import nn
from torch.utils import data

In [11]:
def synthetic_data(w, b, num_examples): #@save
  """Generate y = Xw + b + noise."""
  X = torch.normal(0, 1, (num_examples, len(w)))
  y = torch.matmul(X, w) + b #why didnt used np.dot()
  y += torch.normal(0, 0.01, y.shape)
  return X, y.reshape((-1, 1))

In [3]:
# data preperation
def data_iter(batch_size, features, labels):
  num_examples = len(features)
  indices = list(range(num_examples))
  # The examples are read at random, in no particular order
  random.shuffle(indices)
  for i in range(0, num_examples, batch_size):
    batch_indices = torch.tensor(indices[i:min(i +
    batch_size, num_examples)])
  yield features[batch_indices], labels[batch_indices]


In [4]:
def load_array(data_arrays, batch_size, is_train=True): #@save
  """Construct a PyTorch data iterator."""
  dataset = data.TensorDataset(*data_arrays)
  return data.DataLoader(dataset, batch_size, shuffle=is_train)

In [5]:
## preparing the dataset
n_train, n_test, num_inputs, batch_size = 20, 100, 200, 5
true_w, true_b = torch.ones((num_inputs, 1)) * 0.01, 0.05
train_data = synthetic_data(true_w, true_b, n_train)
train_iter = load_array(train_data, batch_size)
test_data = synthetic_data(true_w, true_b, n_test)
test_iter = load_array(test_data, batch_size, is_train=False)

In [6]:
def init_params():
  w = torch.normal(0, 1, size=(num_inputs, 1), requires_grad=True)
  b = torch.zeros(1, requires_grad=True)
  return [w, b]

In [7]:
def l2_penalty(w):
  return torch.sum(w.pow(2)) / 2


# defining the learning algorithm
def sgd(params, lr, batch_size):
  """Minibatch stochastic gradient descent."""
  with torch.no_grad():
    for param in params:
      param -= lr * param.grad / batch_size
      param.grad.zero_()

# forward
def linreg(X, theta_1, theta_0):
  """The linear regression model."""
  return torch.matmul(X, theta_1) + theta_0


# Computing the MSE
def MSE_loss(y_out, y_true):
  """Squared loss."""
  return (y_out - y_true.reshape(y_out.shape))**2 / 2

In [8]:
def train(lambd):
  w, b = init_params()
  net, loss = lambda X: linreg(X, w, b), MSE_loss
  num_epochs, lr = 100, 0.01
  for epoch in range(num_epochs):

    for X, y in train_iter:
      # The L2 norm penalty term has been added, and broadcasting
      # makes `l2_penalty(w)` a vector whose length is `batch_size`
      l = loss(net(X), y) + lambd * l2_penalty(w)
      l.sum().backward()
      sgd([w, b], lr, batch_size)
      print("Loss", l.sum())
      print('L2 norm of w:', torch.norm(w).item())

In [9]:
train(lambd=0)

Loss tensor(423.4085, grad_fn=<SumBackward0>)
L2 norm of w: 15.637124061584473
Loss tensor(720.3859, grad_fn=<SumBackward0>)
L2 norm of w: 15.48731803894043
Loss tensor(951.5809, grad_fn=<SumBackward0>)
L2 norm of w: 15.294082641601562
Loss tensor(93.4011, grad_fn=<SumBackward0>)
L2 norm of w: 15.274580955505371
Loss tensor(248.9888, grad_fn=<SumBackward0>)
L2 norm of w: 15.221720695495605
Loss tensor(131.9106, grad_fn=<SumBackward0>)
L2 norm of w: 15.193304061889648
Loss tensor(119.6603, grad_fn=<SumBackward0>)
L2 norm of w: 15.16749095916748
Loss tensor(86.2134, grad_fn=<SumBackward0>)
L2 norm of w: 15.14908504486084
Loss tensor(48.2394, grad_fn=<SumBackward0>)
L2 norm of w: 15.13801097869873
Loss tensor(81.6414, grad_fn=<SumBackward0>)
L2 norm of w: 15.120823860168457
Loss tensor(19.3650, grad_fn=<SumBackward0>)
L2 norm of w: 15.116085052490234
Loss tensor(37.6804, grad_fn=<SumBackward0>)
L2 norm of w: 15.108207702636719
Loss tensor(27.1181, grad_fn=<SumBackward0>)
L2 norm of w: 15.

In [9]:
def train(lambd):
  w, b = init_params()
  net, loss = lambda X: linreg(X, w, b), MSE_loss
  num_epochs, lr = 100, 0.01
  for epoch in range(num_epochs):
    #adapting Learning rate
    lr  = lr * (1000/(1000+epoch))
    print(lr)
    for X, y in train_iter:
      # The L2 norm penalty term has been added, and broadcasting
      # makes `l2_penalty(w)` a vector whose length is `batch_size`
      l = loss(net(X), y) + lambd * l2_penalty(w)
      l.sum().backward()

      sgd([w, b], lr, batch_size)
      print("Loss", l.sum())
      print('L2 norm of w:', torch.norm(w).item())

In [10]:
train(lambd=3)

0.01
Loss tensor(1781.7705, grad_fn=<SumBackward0>)
L2 norm of w: 13.727041244506836
Loss tensor(2139.0605, grad_fn=<SumBackward0>)
L2 norm of w: 13.146439552307129
Loss tensor(1794.7024, grad_fn=<SumBackward0>)
L2 norm of w: 12.634703636169434
Loss tensor(1482.0585, grad_fn=<SumBackward0>)
L2 norm of w: 12.184324264526367
0.00999000999000999
Loss tensor(1252.6794, grad_fn=<SumBackward0>)
L2 norm of w: 11.782763481140137
Loss tensor(1091.4893, grad_fn=<SumBackward0>)
L2 norm of w: 11.416189193725586
Loss tensor(1114.7555, grad_fn=<SumBackward0>)
L2 norm of w: 11.035908699035645
Loss tensor(1006.6398, grad_fn=<SumBackward0>)
L2 norm of w: 10.677200317382812
0.009970069850309371
Loss tensor(891.8154, grad_fn=<SumBackward0>)
L2 norm of w: 10.346963882446289
Loss tensor(837.6575, grad_fn=<SumBackward0>)
L2 norm of w: 10.026973724365234
Loss tensor(776.9424, grad_fn=<SumBackward0>)
L2 norm of w: 9.719977378845215
Loss tensor(738.3115, grad_fn=<SumBackward0>)
L2 norm of w: 9.419658660888672
