Preliminary: Check for PyTorch Version



In [1]:
import torch
print(torch.__version__) 

1.9.0+cu102


In [2]:
! nvidia-smi

Sat Aug 21 04:15:33 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   63C    P8    10W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Tensor

In [3]:
A = torch.tensor([[1., -1.], [1., -1.]])
print(A)
print(A.type)
print(A.dtype)

tensor([[ 1., -1.],
        [ 1., -1.]])
<built-in method type of Tensor object at 0x7fbb831440f0>
torch.float32


In [None]:
# PyTorch works with numpy smoothly
import numpy as np
A = torch.tensor(np.array([[1, 2, 3], [4, 5, 6]]))
print(A)
print(A.dtype)

A = torch.tensor(np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)) # PyTorch inherits dtype from numpy
print(A)
print(A.dtype)

tensor([[1, 2, 3],
        [4, 5, 6]])
torch.int64
tensor([[1., 2., 3.],
        [4., 5., 6.]])
torch.float32


In [6]:
A = torch.ones([2, 2])
print(A)

A = A.cuda()
print(A)

# create tensor directly on the GPU
cuda0 = torch.device('cuda:0')
A = torch.ones([2, 4], dtype=torch.float32, device=cuda0)
print(A)


tensor([[1., 1.],
        [1., 1.]])
tensor([[1., 1.],
        [1., 1.]], device='cuda:0')
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.]], device='cuda:0')


**Performance Tip**: Whenever possible, create tensors on GPUs directly, instead of transferring from CPU to GPU after creation. 

#Tensor Operations



In [10]:
# tensor operations
A = torch.ones([2, 2])
B = torch.tensor([[1, 3], [2, 4]])
# component-wise operations
print('A+B')
print(A+B)
print('A*B')
print(A*B)

# matrix multiplilcation
print('matrix multiplication A*B')

#print(torch.matmul(A, B)) # this line fails because matmul only supports 32-bit floats
print(torch.matmul(A.float(), B.float())) # this works because of the conversion to float


A+B
tensor([[2., 4.],
        [3., 5.]])
A*B
tensor([[1., 3.],
        [2., 4.]])
matrix multiplication A*B
tensor([[3., 7.],
        [3., 7.]])


In [12]:
# concatenation
print(A)
print(B)
print('along the row dimension')
C = torch.cat([A, B], dim=0)
print(C)

print('along the column dimension')
C = torch.cat([A, B], dim=1)
print(C)

print('create a new batch dimension and concatenate')
As = A.unsqueeze(dim=0)
Bs = B.unsqueeze(dim=0)
print('As shape: ' + str(As.shape))
print('Bs shape: ' + str(Bs.shape))
C = torch.cat([As, Bs], dim=0)
print(C)
print(C.shape)

tensor([[1., 1.],
        [1., 1.]])
tensor([[1, 3],
        [2, 4]])
along the row dimension
tensor([[1., 1.],
        [1., 1.],
        [1., 3.],
        [2., 4.]])
along the column dimension
tensor([[1., 1., 1., 3.],
        [1., 1., 2., 4.]])
create a new batch dimension and concatenate
As shape: torch.Size([1, 2, 2])
Bs shape: torch.Size([1, 2, 2])
tensor([[[1., 1.],
         [1., 1.]],

        [[1., 3.],
         [2., 4.]]])
torch.Size([2, 2, 2])


In [None]:
# Batch multiplication -- optimized for mini-batch operations
A = torch.ones([2, 2, 3])
A[1, :, :] = A[1, :, :] * 0.5
print(A)
print('A shape: ' + str(A.shape))

B1 = torch.tensor([[1, 3], [2, 4], [4, 6]], dtype=torch.float32).unsqueeze(0)
B2 = torch.tensor([[4, 3], [22, 4], [-4, 60]], dtype=torch.float32).unsqueeze(0)
B = torch.cat([B1, B2], dim=0)
print(B)
print('B shape: ' + str(B.shape))
D = torch.bmm(A, B)
print(D)
print(D.shape)

In [None]:
#Eigendecomposition
# create orthgonal matrix
U = torch.zeros([5, 5])
torch.nn.init.orthogonal_(U) # any function ending in _ performs in-place modification
print(U)

# eigenvalues
S = torch.diag(torch.tensor([2, 1.8, 0.9, 0.55, 0.3]))
print(S)

M = torch.matmul(torch.matmul(U,S), U.T)
print(M)

#verify positive definiteness
for i in range(10):
  v = torch.ones([5, 1]) # it is necessary to specify one more dimension in PyTorch
  torch.nn.init.normal_(v)
  ss = torch.matmul(torch.matmul(torch.transpose(v, 0, 1), M), v)
  print(ss)

L_complex, V_complex = torch.linalg.eig(M) # this by default returns complex numbers
print(L_complex) # eigenvalues
print(V_complex) # eigenvectors
print(torch.view_as_real(L_complex)[:,0]) # real parts
print(torch.view_as_real(V_complex)[:, :, 0])

### Tensor Expand

Returns a new view of the self tensor with singleton dimensions expanded to a larger size. This does not allocate new memory. 

Any dimension of size 1 can be expanded to an arbitrary value without allocating new memory.

In [25]:
x = torch.tensor([[1], [2], [3]])
x.size()

torch.Size([3, 1])

In [16]:
x.expand(-1, 4)   # -1 means not changing the size of that dimension

tensor([[1, 1, 1, 1],
        [2, 2, 2, 2],
        [3, 3, 3, 3]])

We cannot expand any dimension whose size is not 1!

In [17]:
x.expand(4, 4) # Uh-oh!


RuntimeError: ignored

In [18]:
x.expand(6, 4)

RuntimeError: ignored

What happens when you write to a tensor resulting from expansion?

In [29]:
y = x.repeat(4, 3) # repeat the first dimension 4 times, yielding 12x1. This actually takes new memory
print(y)
y[1, 1] = 100
print(y)
x

tensor([[1, 1, 1],
        [2, 2, 2],
        [3, 3, 3],
        [1, 1, 1],
        [2, 2, 2],
        [3, 3, 3],
        [1, 1, 1],
        [2, 2, 2],
        [3, 3, 3],
        [1, 1, 1],
        [2, 2, 2],
        [3, 3, 3]])
tensor([[  1,   1,   1],
        [  2, 100,   2],
        [  3,   3,   3],
        [  1,   1,   1],
        [  2,   2,   2],
        [  3,   3,   3],
        [  1,   1,   1],
        [  2,   2,   2],
        [  3,   3,   3],
        [  1,   1,   1],
        [  2,   2,   2],
        [  3,   3,   3]])


tensor([[1],
        [2],
        [3]])

In [34]:
y = x.expand(3, 4)
y[1,1]=100 # modifying the expanded tensor. This will allocate new memory. 
print(y)
print(x)
# However, PyTorch documentation explicitly warns against modifying the tensor
# resulted from the expansion operation. So we probably shouldn't do this. 

tensor([[  1,   1,   1],
        [  2, 100,   2],
        [  3,   3,   3],
        [  1,   1,   1],
        [  2,   2,   2],
        [  3,   3,   3],
        [  1,   1,   1],
        [  2,   2,   2],
        [  3,   3,   3],
        [  1,   1,   1],
        [  2,   2,   2],
        [  3,   3,   3]])
tensor([[1],
        [2],
        [3]])


## Broadcasting

Two tensors are “broadcastable” if the following rules hold:

Each tensor has at least one dimension.

When iterating over the dimension sizes, starting at the trailing dimension, the dimension sizes must either be equal, one of them is 1, or one of them does not exist.


In [23]:
x=torch.empty(5,7,3)
y=torch.empty(5,7,3)
# same shapes are always broadcastable (i.e. the above rules always hold)

x=torch.empty((0,))
y=torch.empty(2,2)
# x and y are not broadcastable, because x does not have at least 1 dimension

# can line up trailing dimensions
x=torch.empty(5,3,4,1)
y=torch.empty(  3,1,1)
# x and y are broadcastable.
# 1st trailing dimension: both have size 1
# 2nd trailing dimension: y has size 1
# 3rd trailing dimension: x size == y size
# 4th trailing dimension: y dimension doesn't exist
print(x+y)

# but:
x=torch.empty(5,2,4,1)
y=torch.empty(  3,1,1)
# x and y are not broadcastable, because in the 3rd trailing dimension 2 != 3
x+y

tensor([[[[-1.5153e+34],
          [-1.5144e+34],
          [-1.5144e+34],
          [-1.5144e+34]],

         [[ 3.0775e-41],
          [ 3.0775e-41],
          [ 3.0775e-41],
          [ 3.0775e-41]],

         [[ 1.5975e-43],
          [ 1.4595e-39],
          [ 1.8788e+31],
          [ 1.7220e+22]]],


        [[[-1.5144e+34],
          [-1.5144e+34],
          [-1.5144e+34],
          [-1.5144e+34]],

         [[ 3.3611e+21],
          [ 2.6826e+23],
          [ 2.1271e-07],
          [ 1.3661e-05]],

         [[ 5.2902e-08],
          [ 1.4580e-19],
          [ 1.1495e+24],
          [ 3.0881e+29]]],


        [[[-1.5144e+34],
          [-1.5144e+34],
          [-1.5144e+34],
          [-1.5144e+34]],

         [[ 2.3089e-12],
          [ 1.9421e+31],
          [ 2.7491e+20],
          [ 6.1949e-04]],

         [[ 1.9421e+31],
          [ 2.7491e+20],
          [ 2.3078e-12],
          [ 7.1760e+22]]],


        [[[-1.5144e+34],
          [-1.5144e+34],
          [-1.5144e+34],
 

RuntimeError: ignored

If two tensors x, y are “broadcastable”, the resulting tensor size is calculated as follows:

If the number of dimensions of x and y are not equal, prepend 1 to the dimensions of the tensor with fewer dimensions to make them equal length.

Then, for each dimension size, the resulting dimension size is the max of the sizes of x and y along that dimension.


In [None]:
# can line up trailing dimensions to make reading easier
x=torch.empty(5,1,4,1) # Returns a tensor filled with uninitialized data.
y=torch.empty(  3,1,1)
(x+y).size()


torch.Size([5, 3, 4, 1])

In [None]:
# but not necessary:
x=torch.empty(1)
y=torch.empty(3,1,7)
(x+y).size()


torch.Size([3, 1, 7])

In [None]:
x=torch.empty(5,2,4,1)
y=torch.empty(3,1,1)
(x+y).size()

RuntimeError: ignored

### In-place semantics

One complication is that in-place operations do not allow the in-place tensor to change shape as a result of the broadcast.

In [None]:
x=torch.empty(5,3,4,1)
y=torch.empty(3,1,1)
(x.add_(y)).size() # this replaces x with the result of the addition operation

torch.Size([5, 3, 4, 1])

In [None]:
x=torch.empty(1,3,1)
y=torch.empty(3,1,7)
(x.add_(y)).size()


RuntimeError: ignored