### Pytorch Tensors Introduction

**Date:** 28/10/2021  
**Author:** Murad Popattia

In [1]:
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"

In [5]:
my_tensor = torch.rand(3,3, device = device)

In [6]:
my_tensor

tensor([[0.2149, 0.4197, 0.3122],
        [0.7149, 0.4330, 0.4272],
        [0.4679, 0.9652, 0.5048]], device='cuda:0')

In [7]:
my_tensor.device

device(type='cuda', index=0)

### Checking CUDA Version

In [8]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Tue_Sep_15_19:12:04_Pacific_Daylight_Time_2020
Cuda compilation tools, release 11.1, V11.1.74
Build cuda_11.1.relgpu_drvr455TC455_06.29069683_0


### Intializing Tensors

In [9]:
my_tensor = torch.tensor([[1,2,3],[4,5,6]], dtype=torch.float32)
print(my_tensor)
print(my_tensor.device)

tensor([[1., 2., 3.],
        [4., 5., 6.]])
cpu


In [10]:
my_tensor = torch.tensor([[1,2,3],[4,5,6]], dtype=torch.float32, device=device)
print(my_tensor)
print(my_tensor.device)

tensor([[1., 2., 3.],
        [4., 5., 6.]], device='cuda:0')
cuda:0


In [11]:
# this is essential for back propogation
my_tensor = torch.tensor([[1,2,3],[4,5,6]], dtype=torch.float32, requires_grad=True)
print(my_tensor)

tensor([[1., 2., 3.],
        [4., 5., 6.]], requires_grad=True)


In [3]:
# getting value of tensors
my_tensor = torch.rand(1)
print(my_tensor, my_tensor.item())

tensor([0.8722]) 0.8721868395805359


### Attributes of tensor

In [12]:
print(my_tensor.dtype)
print(my_tensor.device)
print(my_tensor.shape)
print(my_tensor.requires_grad)

torch.float32
cpu
torch.Size([2, 3])
True


### Other common initializations

In [13]:
print(torch.empty(3,3)) # this would be uninitialized but not necessarily zeroes
print(torch.zeros(3,3)) # array of zeroes
print(torch.rand(3,3)) # array of uniform distribution between 0 and 1
print(torch.ones(3,3)) # array of ones
print(torch.eye(5,5)) # identity matrix / diagonal 1 rest 0s
print(torch.arange(start = 0, end=5, step = 1)) # end is non-inclusive of 5. Step means it would take 'n' steps
print(torch.linspace(start = 0, end=5, steps = 10)) # different in the sense that arg 'steps' would be the number of values

print(torch.empty(size=(1,5)).normal_(mean=0, std=1)) # for normal distribution specifying mean and std 
print(torch.empty(size=(3,3)).uniform_(0,1)) # same as torch,rand() but we get to control the lower and upper limits

print(torch.diag(torch.arange(3)))
print(torch.diag(torch.ones(3)))

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
tensor([[0.2213, 0.5621, 0.8036],
        [0.7032, 0.0961, 0.7915],
        [0.2348, 0.6403, 0.3937]])
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])
tensor([0, 1, 2, 3, 4])
tensor([0.0000, 0.5556, 1.1111, 1.6667, 2.2222, 2.7778, 3.3333, 3.8889, 4.4444,
        5.0000])
tensor([[ 0.4401, -0.2121, -1.3968,  1.1034,  0.3741]])
tensor([[0.3818, 0.4807, 0.8207],
        [0.4032, 0.1376, 0.3378],
        [0.0946, 0.5839, 0.7232]])
tensor([[0, 0, 0],
        [0, 1, 0],
        [0, 0, 2]])
tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])


### Converting b/w tensor types

In [14]:
x = torch.arange(4)
print(x, x.dtype)

tensor([0, 1, 2, 3]) torch.int64


In [15]:
print(x.bool())
print(x.short())
print(x.long()) # imp. 
print(x.half())
print(x.float()) # imp. 
print(x.double())

tensor([False,  True,  True,  True])
tensor([0, 1, 2, 3], dtype=torch.int16)
tensor([0, 1, 2, 3])
tensor([0., 1., 2., 3.], dtype=torch.float16)
tensor([0., 1., 2., 3.])
tensor([0., 1., 2., 3.], dtype=torch.float64)


### Conversion from numpy to tensor

In [16]:
import numpy as np

x = np.zeros((5,5))
print(x, x.dtype)
y = torch.from_numpy(x)
print(y, y.dtype)

# We can also convert them back to numpy by
print(y.numpy(), y.numpy().dtype)

[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]] float64
tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]], dtype=torch.float64) torch.float64
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]] float64


### Tensor Math and Comparison Operations

In [17]:
x = torch.rand(3)
y = torch.rand(3)

x, y

(tensor([0.6660, 0.5217, 0.6264]), tensor([0.0716, 0.1513, 0.9332]))

In [18]:
# Addition
z = torch.empty(3)
torch.add(x,y, out = z)
print(z)

z = torch.add(x,y)
print(z)

z = x+y
print(z)

tensor([0.7376, 0.6730, 1.5596])
tensor([0.7376, 0.6730, 1.5596])
tensor([0.7376, 0.6730, 1.5596])


In [19]:
# Subtraction

z = x-y
print(z)

tensor([ 0.5944,  0.3704, -0.3069])


In [20]:
# Division

z = torch.true_divide(x,y) # possible if shapes are equal
print(z)

tensor([9.2998, 3.4485, 0.6712])


In [21]:
# inplace operations
tensor = torch.zeros(3)
print(tensor)

tensor.add_(x)
print(tensor)

tensor -= x
print(tensor)

tensor([0., 0., 0.])
tensor([0.6660, 0.5217, 0.6264])
tensor([0., 0., 0.])


In [22]:
# Exponentiation

z = x.pow(2)
print(z)

z = x ** 2
print(z)

# Matrix Exponentiation (matrixm multiplied by itself)
matrix_exp = torch.diag(torch.arange(start = 2, end = 5))
print(matrix_exp)
matrix_exp.matrix_power(3)

tensor([0.4436, 0.2722, 0.3923])
tensor([0.4436, 0.2722, 0.3923])
tensor([[2, 0, 0],
        [0, 3, 0],
        [0, 0, 4]])


tensor([[ 8,  0,  0],
        [ 0, 27,  0],
        [ 0,  0, 64]])

In [23]:
# Simple Comparision (element-wise)

z = x > 0
print(z)

z = x < 0
print(z)

tensor([True, True, True])
tensor([False, False, False])


In [24]:
# matrix multiplication

t1 = torch.rand((2,3))
t2 = torch.rand((3,2))

print("T1")
print(t1)
print("T2")
print(t2)

print("Result")
# Resultant (2x3) (3x2) -> (2x2)
print(torch.mm(t1,t2))
print(t1.mm(t2))

T1
tensor([[0.7131, 0.7010, 0.0029],
        [0.6897, 0.1618, 0.1272]])
T2
tensor([[0.6937, 0.1437],
        [0.9482, 0.7711],
        [0.7409, 0.0020]])
Result
tensor([[1.1615, 0.6430],
        [0.7262, 0.2242]])
tensor([[1.1615, 0.6430],
        [0.7262, 0.2242]])


In [25]:
# element wise multiplication
t1 = torch.arange(start=1,end=10).reshape(3,3)
t2 = torch.arange(start=1,end=10).reshape(3,3)

print(t1)
print(t2)

print("Element wise Multiplication: ")
print(t1*t2)
print("Matrix Multiplication: ")
print(t1.mm(t2))

# dot product (element wise multiplication then sum of all the elements)
t1 = torch.arange(start=1,end=10)
t2 = torch.arange(start=1,end=10)

print(torch.dot(t1,t2))

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
Element wise Multiplication: 
tensor([[ 1,  4,  9],
        [16, 25, 36],
        [49, 64, 81]])
Matrix Multiplication: 
tensor([[ 30,  36,  42],
        [ 66,  81,  96],
        [102, 126, 150]])
tensor(285)


In [26]:
# Batch Matrix Multiplication

# specify batch size
b = 32
n = 10
m = 20
p = 30

# so now matrices of (b, n, m) (b, m, p)
t1 = torch.rand(b,n,m)
t2 = torch.rand(b,m,p)

print(t1.shape, t2.shape)

t3 = t1.bmm(t2)
print(t3.shape) # mul across the dimension 'm'

torch.Size([32, 10, 20]) torch.Size([32, 20, 30])
torch.Size([32, 10, 30])


### Broadcasting

In [38]:
x1 = torch.ones(5,5)
x2 = torch.arange(5)

print(x1)
print(x2)

# now when multiplying this would extend the x2 to match the size of x1
print(x1 + x2)
print(x1 ** x2)

tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])
tensor([0, 1, 2, 3, 4])
tensor([[1., 2., 3., 4., 5.],
        [1., 2., 3., 4., 5.],
        [1., 2., 3., 4., 5.],
        [1., 2., 3., 4., 5.],
        [1., 2., 3., 4., 5.]])
tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])


### Some useful Tensor Math Operations

For a lot of these operations we do not need to explicitly write torch.max() etc. we can directly do x.max() or x.sort() etc.

In [54]:
# Summation over a dimension
print(torch.sum(x1 + x2), x1.add(x2).shape) 

# we have a 2d array hence we can some across both of its dimensions
print(torch.sum(x1 + x2, axis=0)) # summing across columns
print(torch.sum(x1 + x2, axis=1)) # summing across rows

tensor(75.) torch.Size([5, 5])
tensor([ 5., 10., 15., 20., 25.])
tensor([15., 15., 15., 15., 15.])


In [89]:
# max / min
z = x1 + x2
print(z)

print("Max: ", torch.max(z))
print("Max: ", torch.max(z, dim = 0))
print("Max: ", torch.max(z, dim = 1))

print("Min: ", torch.min(z))
print("Min: ", torch.min(z, dim = 0))
print("Min: ", torch.min(z, dim = 1))

# we get a tuple from here so we can return it like this as well
vals, inds = torch.max(z,dim = 0)
print("Vals:", vals)
print("Indices:", inds)

tensor([[1., 2., 3., 4., 5.],
        [1., 2., 3., 4., 5.],
        [1., 2., 3., 4., 5.],
        [1., 2., 3., 4., 5.],
        [1., 2., 3., 4., 5.]])
Max:  tensor(5.)
Max:  torch.return_types.max(
values=tensor([1., 2., 3., 4., 5.]),
indices=tensor([0, 0, 0, 0, 0]))
Max:  torch.return_types.max(
values=tensor([5., 5., 5., 5., 5.]),
indices=tensor([4, 4, 4, 4, 4]))
Min:  tensor(1.)
Min:  torch.return_types.min(
values=tensor([1., 2., 3., 4., 5.]),
indices=tensor([0, 0, 0, 0, 0]))
Min:  torch.return_types.min(
values=tensor([1., 1., 1., 1., 1.]),
indices=tensor([0, 0, 0, 0, 0]))
Vals: tensor([1., 2., 3., 4., 5.])
Indices: tensor([0, 0, 0, 0, 0])


In [67]:
# abs argmax argmin
z = x1 - x2
print(z)
print(torch.abs(z))

z = x1 + x2
print("Index of max: ", torch.argmax(z)) # tensor (4) corresponds to the 4th value
print("Index of min: ", torch.argmin(z))

tensor([[ 1.,  0., -1., -2., -3.],
        [ 1.,  0., -1., -2., -3.],
        [ 1.,  0., -1., -2., -3.],
        [ 1.,  0., -1., -2., -3.],
        [ 1.,  0., -1., -2., -3.]])
tensor([[1., 0., 1., 2., 3.],
        [1., 0., 1., 2., 3.],
        [1., 0., 1., 2., 3.],
        [1., 0., 1., 2., 3.],
        [1., 0., 1., 2., 3.]])
Index of max:  tensor(4)
Index of min:  tensor(0)


In [77]:
# mean (but for mean torch wants it to be float)

print(torch.mean(z.float(), dim = 0)) # because across columns all the values are the same
print(torch.mean(z.float(), dim = 1)) # across rows the sum is 15 so 15 / 5 = 3

tensor([1., 2., 3., 4., 5.])
tensor([3., 3., 3., 3., 3.])


In [80]:
x = torch.rand(2,2)

# element wise equal
y = x
print(torch.equal(x,y)) # returns a single value
print(torch.eq(x,y)) # return a comparison for all values

y = torch.rand(2,2)

# other element wise comparisons
print(torch.greater(x,y))
print(torch.less(x,y))

True
tensor([[True, True],
        [True, True]])
tensor([[False, False],
        [False,  True]])
tensor([[ True,  True],
        [ True, False]])


In [86]:
# sorting
z = torch.rand(5)
print(z)
print("After sorting: ")
print(torch.sort(z, descending=False))
print(torch.sort(z, descending=True))

tensor([0.6422, 0.9128, 0.3736, 0.2888, 0.8666])
After sorting: 
torch.return_types.sort(
values=tensor([0.2888, 0.3736, 0.6422, 0.8666, 0.9128]),
indices=tensor([3, 2, 0, 4, 1]))
torch.return_types.sort(
values=tensor([0.9128, 0.8666, 0.6422, 0.3736, 0.2888]),
indices=tensor([1, 4, 0, 2, 3]))


In [92]:
# clamping (used for gradient clipping)
z = torch.linspace(start = 0, end = 6, steps = 10)
print(z)
print("After clamping:")
print(torch.clamp(z, min = 4)) # all values below 4 set to 4
print(torch.clamp(z, max = 4)) # all values above 4 set to 4

tensor([0.0000, 0.6667, 1.3333, 2.0000, 2.6667, 3.3333, 4.0000, 4.6667, 5.3333,
        6.0000])
After clamping:
tensor([4.0000, 4.0000, 4.0000, 4.0000, 4.0000, 4.0000, 4.0000, 4.6667, 5.3333,
        6.0000])
tensor([0.0000, 0.6667, 1.3333, 2.0000, 2.6667, 3.3333, 4.0000, 4.0000, 4.0000,
        4.0000])


In [95]:
# any: atleast one value true, all: all values must be true
x = torch.tensor([1,0,1,1,1], dtype=torch.bool)

print(torch.any(x))
print(torch.all(x))

tensor(True)
tensor(False)


### Tensor Indexing

In [105]:
batch = 10
features = 25

# so intialize a tensor with (batch x features)
x = torch.rand(batch, features)

print(x[2, :10].shape)
print(x[0].shape) # same as x[0, :] we get all features of the first batch
print(x[:, 0].shape) # getting first example of every batch

torch.Size([10])
torch.Size([25])
torch.Size([10])


In [110]:
# Fancy indexing
x = torch.arange(10)
ind = [2,5,8]

print(x)
print(x[ind]) # we get the exact same values from the list of indices

# we can fetch in the same manner for 2D
rows = [1,0]
cols = [4,0]

x = torch.rand(3,5)
print(x)
print(x[rows, cols])

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
tensor([2, 5, 8])
tensor([[0.2088, 0.8732, 0.5612, 0.0906, 0.5069],
        [0.1223, 0.4928, 0.6797, 0.9107, 0.4841],
        [0.2155, 0.2518, 0.8610, 0.3544, 0.1514]])
tensor([0.4841, 0.2088])


In [114]:
# Advanced Indexing
# picking up value satisfied by a condition

x = torch.arange(10)
print(x[(x>2) & (x<8)])
print(x[x.remainder(2) == 0])

tensor([3, 4, 5, 6, 7])
tensor([0, 2, 4, 6, 8])


### Some other useful Tensor operations

In [204]:
# Some useful operations

x = torch.arange(10)
print(torch.where(x>5, x, x*2)) # whereever x>5 return x else return x * 2
print(torch.tensor([2,2,2,4,5,6]).unique()) # returning unique values

x = torch.ones(5,5,5)
print(x.ndimension()) # this will print the number of dimensions of x 
print(x.numel()) # returns the total number of elements in the array

# using softmax function
print(torch.nn.functional.softmax(torch.rand(3,3), dim = 0)) # across columns
print(torch.nn.functional.softmax(torch.rand(3,3), dim = 1)) # across rows

# using clip instead of clamp
print(torch.arange(10).clamp(2,8))
print(torch.arange(10).clip(2,8))

# converting torch tensor to numpy
x = torch.rand(4,4, device=device)
# x.numpy().device -> would give error as need to bring to cpu to convert
type(x.cpu().detach().numpy()) # detach is used to turn of require_grad

x = torch.rand(4,4, requires_grad=True)
print(x.requires_grad)
print(x.detach().requires_grad)

tensor([ 0,  2,  4,  6,  8, 10,  6,  7,  8,  9])
tensor([2, 4, 5, 6])
3
125
tensor([[0.2378, 0.4333, 0.3479],
        [0.3445, 0.2932, 0.3265],
        [0.4177, 0.2735, 0.3256]])
tensor([[0.2669, 0.3933, 0.3398],
        [0.3369, 0.3255, 0.3376],
        [0.2281, 0.3196, 0.4523]])
tensor([2, 2, 2, 3, 4, 5, 6, 7, 8, 8])
tensor([2, 2, 2, 3, 4, 5, 6, 7, 8, 8])
True
False


### Tensor Dimensions

In [156]:
# Reshaping

x = torch.arange(9)

# reshaping to 3x3
print(x.view(3,3))
print(x.reshape(3,3))

z = x.view(3,3)

y = z.T # tranpose essentially breaks the contigency of the memory blocks and hence cannot use view
print(y)

# y.view(9) --- throws an error. Use reshape instead
print(y.reshape(9))

# we can also do
print(y.contiguous().view(9))

tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])
tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])
tensor([[0, 3, 6],
        [1, 4, 7],
        [2, 5, 8]])
tensor([0, 3, 6, 1, 4, 7, 2, 5, 8])
tensor([0, 3, 6, 1, 4, 7, 2, 5, 8])


View and reshape are almost similar with the difference being that **view** works **contigous memory** while **reshape** can just make a copy and then perform reshape. But view is **faster** as compared to reshape and hence we might get a **performance boost**.

In [None]:
# Tensor Concatenation

x1 = torch.rand(2,5)
x2 = torch.rand(2,5)

print(torch.cat((x1, x2), dim = 0).shape)
print(torch.cat((x1, x2), dim = 1).shape)

In [158]:
# Flatten

batch = 64
x1 = torch.rand(batch,5,4)
print(x1.shape)
print(x1.view(-1).shape) # 64 * 5 * 4
print(x1.view(batch, -1).shape) # 5 * 4

torch.Size([64, 5, 4])
torch.Size([1280])
torch.Size([64, 20])


In [161]:
# Transposing over the dimensions

# 64 -> 0
# 5 -> 1
# 4 -> 2

print(x1.permute(0,2,1).shape) # transpose is a special case of permute

torch.Size([64, 4, 5])


In [183]:
# Adding a dimension

x = torch.arange(10)
print(x.unsqueeze(0).shape) # -> 1 x 10
print(x.unsqueeze(1).shape) # -> 10 x 1
print(x.unsqueeze(0).unsqueeze(1).shape) # -> 1 x 10 -> 1 x 1 x 10

x = torch.arange(10).unsqueeze(0).unsqueeze(1) # -> 1 x 1 x 10

print(x)
print(x.squeeze(1))
print(x.squeeze(1).shape) # -> 1 x 10

torch.Size([1, 10])
torch.Size([10, 1])
torch.Size([1, 1, 10])
tensor([[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]])
tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])
torch.Size([1, 10])
