# Introduction to PyTorch

In [1]:
import torch

### Tensor Properties

In [2]:
example_tensor = torch.Tensor(
    [[[1, 2], [3, 4]], 
     [[5, 6], [7, 8]], 
     [[9, 0], [1, 2]]])

In [3]:
example_tensor

tensor([[[1., 2.],
         [3., 4.]],

        [[5., 6.],
         [7., 8.]],

        [[9., 0.],
         [1., 2.]]])

### Tensor Properties: Device

In [4]:
example_tensor.device

device(type='cpu')

### Tensor Properties: Shape

In [5]:
example_tensor.shape

torch.Size([3, 2, 2])

In [6]:
print("shape[0] =", example_tensor.shape[0])
print("size(1) =", example_tensor.size(1))

shape[0] = 3
size(1) = 2


In [7]:
print("Rank =", len(example_tensor.shape))
print("Number of elements =", example_tensor.numel())

Rank = 3
Number of elements = 12


### Indexing Tensors

In [8]:
example_tensor[1]

tensor([[5., 6.],
        [7., 8.]])

In [9]:
example_tensor[1, 1, 0]

tensor(7.)

In [10]:
example_scalar = example_tensor[1, 1, 0]
example_scalar.item()

7.0

In [11]:
example_tensor[:, 0, 0]

tensor([1., 5., 9.])

### Initializing Tensors

In [12]:
torch.ones_like(example_tensor)

tensor([[[1., 1.],
         [1., 1.]],

        [[1., 1.],
         [1., 1.]],

        [[1., 1.],
         [1., 1.]]])

In [13]:
torch.zeros_like(example_tensor)

tensor([[[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]]])

In [14]:
torch.randn_like(example_tensor)

tensor([[[-0.6921,  2.1000],
         [ 1.0576,  0.7950]],

        [[ 1.8831, -0.1730],
         [-2.1867, -1.5215]],

        [[ 0.5721,  0.0280],
         [-0.9377,  0.7413]]])

In [15]:
# Alternatively, for a GPU tensor, you'd use device='cuda'
torch.randn(2, 2, device = 'cpu') 

tensor([[ 1.0049,  1.2235],
        [-0.9058, -0.1610]])

### Basic Functions

In [16]:
(example_tensor - 5) * 2

tensor([[[ -8.,  -6.],
         [ -4.,  -2.]],

        [[  0.,   2.],
         [  4.,   6.]],

        [[  8., -10.],
         [ -8.,  -6.]]])

In [17]:
print("Mean:", example_tensor.mean())
print("Stdev:", example_tensor.std())

Mean: tensor(4.)
Stdev: tensor(2.9848)


In [18]:
example_tensor.mean(0)

# Equivalently, you could also write:
# example_tensor.mean(dim=0)
# example_tensor.mean(axis=0)
# torch.mean(example_tensor, 0)
# torch.mean(example_tensor, dim=0)
# torch.mean(example_tensor, axis=0)

tensor([[5.0000, 2.6667],
        [3.6667, 4.6667]])

### PyTorch Neural Network Module (`torch.nn`)

In [19]:
import torch.nn as nn

In [20]:
linear = nn.Linear(10, 2)
example_input = torch.randn(3, 10)
example_output = linear(example_input)
example_output

tensor([[ 0.0146, -0.0827],
        [-0.4191, -0.3443],
        [ 0.3170,  0.9119]], grad_fn=<AddmmBackward>)

In [21]:
relu = nn.ReLU()
relu_output = relu(example_output)
relu_output

tensor([[0.0146, 0.0000],
        [0.0000, 0.0000],
        [0.3170, 0.9119]], grad_fn=<ReluBackward0>)

In [22]:
batchnorm = nn.BatchNorm1d(2)
batchnorm_output = batchnorm(relu_output)
batchnorm_output

tensor([[-0.6565, -0.7071],
        [-0.7562, -0.7071],
        [ 1.4127,  1.4142]], grad_fn=<NativeBatchNormBackward>)

In [23]:
mlp_layer = nn.Sequential(
    nn.Linear(5, 2),
    nn.BatchNorm1d(2),
    nn.ReLU()
)

test_example = torch.randn(5,5) + 1
print("input: ")
print(test_example)
print("output: ")
print(mlp_layer(test_example))

input: 
tensor([[ 0.7572, -0.1835,  0.4758,  0.7426,  1.7239],
        [ 1.4397,  4.2980,  0.8439,  1.2631,  3.2627],
        [ 1.0299, -0.3506, -0.4288, -1.0835,  1.6909],
        [ 1.0859, -0.3313, -0.1023,  1.0248, -0.2427],
        [ 0.7540, -0.1721,  1.9425,  0.7108,  1.7780]])
output: 
tensor([[1.1351, 0.3075],
        [0.0000, 1.6112],
        [0.4121, 0.0000],
        [0.0000, 0.0000],
        [0.6718, 0.0727]], grad_fn=<ReluBackward0>)


### Optimization

In [24]:
import torch.optim as optim
adam_opt = optim.Adam(mlp_layer.parameters(), lr=1e-1)

### Training Loop


In [25]:
train_example = torch.randn(100,5) + 1
adam_opt.zero_grad()

# We'll use a simple loss function of mean distance from 1
# torch.abs takes the absolute value of a tensor
cur_loss = torch.abs(1 - mlp_layer(train_example)).mean()

cur_loss.backward()
adam_opt.step()
print(cur_loss)

tensor(0.7629, grad_fn=<MeanBackward0>)


### New `nn` Classes

In [26]:
class ExampleModule(nn.Module):
    def __init__(self, input_dims, output_dims):
        super(ExampleModule, self).__init__()
        self.linear = nn.Linear(input_dims, output_dims)
        self.exponent = nn.Parameter(torch.tensor(1.))

    def forward(self, x):
        x = self.linear(x)

        # This is the notation for element-wise exponentiation, 
        # which matches python in general
        x = x ** self.exponent 
        
        return x

In [27]:
example_model = ExampleModule(10, 2)
list(example_model.parameters())

[Parameter containing:
 tensor(1., requires_grad=True),
 Parameter containing:
 tensor([[ 0.0935, -0.3137, -0.0414,  0.2845, -0.1326,  0.0514, -0.2260, -0.2278,
           0.0587, -0.0966],
         [ 0.2028,  0.1049, -0.2562,  0.2714,  0.0884, -0.2642,  0.0997,  0.2343,
           0.2574, -0.1054]], requires_grad=True),
 Parameter containing:
 tensor([-0.1119,  0.0506], requires_grad=True)]

In [28]:
list(example_model.named_parameters())

[('exponent',
  Parameter containing:
  tensor(1., requires_grad=True)),
 ('linear.weight',
  Parameter containing:
  tensor([[ 0.0935, -0.3137, -0.0414,  0.2845, -0.1326,  0.0514, -0.2260, -0.2278,
            0.0587, -0.0966],
          [ 0.2028,  0.1049, -0.2562,  0.2714,  0.0884, -0.2642,  0.0997,  0.2343,
            0.2574, -0.1054]], requires_grad=True)),
 ('linear.bias',
  Parameter containing:
  tensor([-0.1119,  0.0506], requires_grad=True))]

In [29]:
input = torch.randn(2, 10)
example_model(input)

tensor([[ 0.1876, -0.6051],
        [ 1.3073, -0.7475]], grad_fn=<PowBackward1>)