In [23]:
import torch
import numpy as np

## Tensors and parameters in PyTorch

In [5]:
# Constant tensors

torch.ones(size=(2, 1))

tensor([[1.],
        [1.]])

In [6]:
torch.zeros(size=(2, 1))

tensor([[0.],
        [0.]])

In [7]:
torch.tensor([1, 2, 3], dtype=torch.float32)

tensor([1., 2., 3.])

In [8]:
# Random Tensors
torch.normal(
    mean=torch.zeros(size=(3, 1)),
    std=torch.ones(size=(3, 1))
)

tensor([[-0.3854],
        [ 0.5359],
        [ 0.6536]])

In [9]:
torch.rand(3, 1)

tensor([[0.2521],
        [0.4132],
        [0.0782]])

In [11]:
# Tensor assignment and the Parameter class
# unlike tensorflow tensors, pytorch tensors are assignable

x = torch.zeros(size=(2, 1))
x[0, 0] = 1.
x

tensor([[1.],
        [0.]])

In [12]:
# Creating a PyTorch parameter
x = torch.zeros(size=(2, 1))
p = torch.nn.parameter.Parameter(data=x)

In [13]:
# Tensor operations: Doing math in PyTorch
a = torch.ones((2, 2))
b = torch.square(a)
c = torch.sqrt(a)
d = b + c
e = torch.matmul(a, b)
f = torch.cat((a, b), dim=0)

In [14]:
def dense(inputs, W, b):
  return torch.nn.relu(torch.matmul(inputs, W) + b)

In [15]:
# Computing gradients with PyTorch
# Computing a gradient with .backward()

input_var = torch.tensor(3.0, requires_grad=True)
result = torch.square(input_var)
result.backward()
gradient = input_var.grad
gradient

tensor(6.)

In [16]:
# calling backward() multiple times in a row:
# the .grad attribute will accumulate gradients
result = torch.square(input_var)
result.backward()
gradient = input_var.grad
gradient

tensor(12.)

In [17]:
input_var.grad = None

## An end-to-end example: A linear classifier in pure PyTorch

In [24]:
#  Generating two classes of random points in a 2D plane
num_samples_per_class = 1000

negative_samples = np.random.multivariate_normal(
    mean=[0, 3], cov=[[1, 0.5], [0.5, 1]], size=num_samples_per_class
)
positive_samples = np.random.multivariate_normal(
    mean=[3, 0], cov=[[1, 0.5], [0.5, 1]], size=num_samples_per_class
)

In [25]:
# Stacking the two classes into an array with shape (2000, 2)
inputs = np.vstack((negative_samples, positive_samples)).astype(np.float32)

In [26]:
# Generating the corresponding targets (0 and 1)
targets = np.vstack(
    (
        np.zeros((num_samples_per_class, 1), dtype="float32"),
        np.ones((num_samples_per_class, 1), dtype="float32"),
    )
)

In [18]:
input_dim = 2
output_dim = 1

W = torch.rand(input_dim, output_dim, requires_grad=True)
b = torch.zeros(output_dim, requires_grad=True)

In [19]:
def model(inputs, W, b):
  return torch.matmul(inputs, W) + b

In [20]:
def mean_squared_error(targets, predictions):
  per_sample_losses = torch.square(targets - predictions)
  return torch.mean(per_sample_losses)

In [27]:
learning_rate = 0.1

def training_step(inputs, targets, W, b):
  predictions = model(inputs)
  loss = mean_squared_error(targets, predictions)
  loss.backward()
  grad_loss_wrt_W, grad_loss_wrt_b = W.grad, b.grad
  with torch.no_grad():
    W -= learning_rate * grad_loss_wrt_W
    b -= learning_rate * grad_loss_wrt_b
  W.grad = None
  b.grad = None
  return loss

In [29]:
# above execution can be done even simpler
# Packaging state and computation with the Module class

class LinearModel(torch.nn.Module):
  def __init__(self):
    super().__init__()
    self.W = torch.nn.Parameter(torch.rand(input_dim, output_dim))
    self.b = torch.nn.Parameter(torch.zeros(output_dim))

  def forward(self, inputs):
    return torch.matmul(inputs, self.W) + self.b

In [30]:
model = LinearModel()

In [31]:
torch_inputs = torch.tensor(inputs)
output = model(torch_inputs)

In [32]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [33]:
def training_step(inputs, targets):
  predictions = model(inputs)
  loss = mean_squared_error(targets, predictions)
  loss.backward()
  optimizer.step()
  model.zero_grad()
  return loss

In [34]:
# Making PyTorch modules fast using compilation
compiled_model = torch.compile(model)

In [35]:
# another way of Making PyTorch modules fast using compilation
@torch.compile
def dense(inputs, W, b):
    return torch.nn.relu(torch.matmul(inputs, W) + b)