In [2]:
import torch

### Initializing tensors

In [3]:
my_tensor = torch.tensor([[1,2,3], [4,5,6]], dtype=torch.float32, device="cpu")
my_tensor

tensor([[1., 2., 3.],
        [4., 5., 6.]])

If CUDA enabled GPU is present, move tensor to GPU using "device" parameter. By default, tensor gets placed in the CPU.

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

CUDA GPU is not present in this laptop

In [5]:
print(my_tensor)
print(my_tensor.dtype)
print(my_tensor.device)
print(my_tensor.shape)
print(my_tensor.requires_grad) ##Specific to gradient descent

tensor([[1., 2., 3.],
        [4., 5., 6.]])
torch.float32
cpu
torch.Size([2, 3])
False


### Alternate initialisations

In [6]:
x = torch.empty(size = (3,3))
x

## torch.empty can be used to create empty tensors without values, for a given shape.
## Initialises tensor with random values from CPU

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [7]:
x = torch.zeros((3,3))
x

## Initialises tensor with 0's for the given shape

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [8]:
x = torch.ones((3,3))
x

## Initialises tensor with 1's for the given shape

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

In [9]:
x = torch.rand((3,3))
x

## Initialises the tensor with values from a uniform distribution between 0 and 1

tensor([[0.9854, 0.8845, 0.6149],
        [0.2734, 0.2384, 0.0807],
        [0.9024, 0.0319, 0.4593]])

In [10]:
x = torch.eye(5,5)
x

## Initialises with identity matrix for the given shape

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])

In [11]:
x = torch.arange(start=0.1,end=4,step=1)
x

## Similar to range funtion from python(start is inclusive, end is exclusive, step is the step size)

tensor([0.1000, 1.1000, 2.1000, 3.1000])

In [12]:
x = torch.linspace(start=0.1, end=1, steps=13)
x

## Similar to linspace in python(start and end are both inclusive, steps is the number of values in the inclusive interval)

tensor([0.1000, 0.1750, 0.2500, 0.3250, 0.4000, 0.4750, 0.5500, 0.6250, 0.7000,
        0.7750, 0.8500, 0.9250, 1.0000])

In [13]:
x = torch.empty((3,3)).normal_(mean=0, std=1)
x

## Initialises from normal dist with mean 0 and stdev 1

tensor([[ 2.2643,  0.8712,  0.2172],
        [ 0.1021,  1.7420, -1.3092],
        [ 1.4262,  0.8295,  0.4208]])

In [14]:
x = torch.empty((3,3)).uniform_(0,1)
x

## Initialises from uniform dist between 0 and 1

tensor([[0.8889, 0.7653, 0.4151],
        [0.6854, 0.5481, 0.9769],
        [0.2556, 0.7450, 0.0264]])

In [15]:
x = torch.diag(torch.rand(4))
x

## Initialises the diagonal matrix with whatever distribution we choose the diagonal elements from(Here, we have taken from the uniform distribution)

tensor([[0.4043, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.3012, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.5544, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.7695]])

### Converting tensors to other dataypes

In [16]:
t = torch.arange(4)
print(t, t.dtype)

tensor([0, 1, 2, 3]) torch.int64


In [17]:
print(t.bool()) # boolean
print(t.short()) # int16
print(t.long()) # int64(important)
print(t.half()) # float16
print(t.float()) # float32(important)
print(t.double()) # float64

tensor([False,  True,  True,  True])
tensor([0, 1, 2, 3], dtype=torch.int16)
tensor([0, 1, 2, 3])
tensor([0., 1., 2., 3.], dtype=torch.float16)
tensor([0., 1., 2., 3.])
tensor([0., 1., 2., 3.], dtype=torch.float64)


### Array to tensor and vice versa

In [18]:
import numpy as np
array = np.array([1,2,3,4])
array

array([1, 2, 3, 4])

In [19]:
tensor = torch.from_numpy(array)
tensor

tensor([1, 2, 3, 4])

In [20]:
array_back = tensor.numpy()
array_back

array([1, 2, 3, 4])

In [21]:
array[1] = 10
print(array)
print(tensor)
print(array_back)

[ 1 10  3  4]
tensor([ 1, 10,  3,  4])
[ 1 10  3  4]


NOTE : Although we converted array to tensor, the identifiers still point to the same location in memory, and thus updating one updates the other one too

### Math operations on tensors

In [22]:
x = torch.tensor([1,2,3])
y = torch.tensor([9,8,7])

In [23]:
#Addition
z1 = torch.empty(3)
torch.add(x,y,out=z1)
z1

tensor([10., 10., 10.])

In [24]:
z2 = torch.add(x,y)
z2

tensor([10, 10, 10])

In [25]:
z3 = x+y
z3

tensor([10, 10, 10])

In [26]:
## Inplace addition
t = torch.zeros(3)
t.add_(x)

tensor([1., 2., 3.])

In [27]:
#Subtraction
z1 = x-y
z1

tensor([-8, -6, -4])

In [28]:
#Division
z = torch.true_divide(x,y)
z

## Division happens element wise if both the tensors have the same shape. In case of one of them being a pure scalar, it is broadcasted into the other tensor

tensor([0.1111, 0.2500, 0.4286])

In [29]:
## Inplace addition (alternate)
t += x
t

tensor([2., 4., 6.])

NOTE : t = t+x IS NOT inplace, while t += x IS inplace

In [30]:
## Exponentiation
z = x.pow(2)
z

tensor([1, 4, 9])

In [32]:
z = y**2
z

tensor([81, 64, 49])

In [34]:
## Simple comparision
z = x>0
z

tensor([True, True, True])

In [35]:
z = x<0
z

tensor([False, False, False])

In [36]:
## Matrix multiplication
x1 = torch.rand((2,5))
x2 = torch.rand((5,3))

x3 = torch.mm(x1,x2)
x3

tensor([[1.0590, 1.0705, 1.2591],
        [0.6755, 0.7336, 0.8229]])

In [37]:
x4 = x1.mm(x2)
x4

tensor([[1.0590, 1.0705, 1.2591],
        [0.6755, 0.7336, 0.8229]])

In [47]:
##Matrix exponentiation

matrix_exp = torch.rand((5,5))
print(matrix_exp.matrix_power(3))

tensor([[2.2277, 1.1587, 2.1961, 2.0758, 2.6785],
        [3.0729, 1.6785, 3.1527, 2.9327, 3.7461],
        [4.6830, 2.7269, 5.0174, 4.7435, 6.1080],
        [5.8301, 3.2135, 6.0120, 5.6378, 7.2492],
        [5.3739, 2.9831, 5.5401, 5.2360, 6.7670]])


In [48]:
##Element wise multiplication

z = x*y
z

tensor([ 9, 16, 21])

In [50]:
##Dot product

z = torch.dot(x,y)
z

tensor(46)

In [51]:
batch = 32
n = 10
m = 20
p = 30

t1 = torch.rand((batch,n,m))
t2 = torch.rand((batch,m,p))
out_bmm = torch.bmm(t1,t2) # (batch,n,p)
out_bmm

tensor([[[5.3743, 5.9356, 6.2739,  ..., 5.6696, 5.0998, 4.8168],
         [3.7748, 4.1940, 5.4176,  ..., 4.5453, 3.6027, 3.6980],
         [4.2739, 5.1057, 5.6470,  ..., 5.6089, 4.0610, 4.1700],
         ...,
         [5.0254, 5.2975, 6.1697,  ..., 5.5241, 4.3352, 4.9682],
         [4.3430, 5.1002, 6.4227,  ..., 5.6843, 4.5378, 4.2600],
         [3.4315, 4.2589, 4.4090,  ..., 3.6245, 2.7094, 3.0977]],

        [[4.7820, 7.2092, 6.8624,  ..., 5.9440, 5.3648, 6.0159],
         [2.9775, 3.8011, 4.3128,  ..., 4.6675, 3.5387, 4.6087],
         [3.8694, 5.9921, 5.8263,  ..., 5.8248, 4.8793, 5.6697],
         ...,
         [4.1797, 5.9693, 5.8498,  ..., 5.3984, 4.4180, 5.2848],
         [4.1214, 5.7467, 6.1277,  ..., 5.2335, 5.0430, 4.9260],
         [4.4437, 5.2013, 5.5556,  ..., 5.1002, 4.5187, 5.7192]],

        [[6.0705, 4.4346, 4.0716,  ..., 5.6928, 5.0337, 5.8441],
         [7.0880, 5.3396, 4.4342,  ..., 5.4583, 4.8665, 5.6070],
         [6.7446, 5.8855, 5.3710,  ..., 5.1867, 5.2630, 5.

In [52]:
##Broadcasting
x1 = torch.rand((5,5))
x2 = torch.rand((1,5))

z = x1-x2
z

tensor([[ 0.2298,  0.0115,  0.2627,  0.1130, -0.5022],
        [-0.2346,  0.0816, -0.4280,  0.5190, -0.4093],
        [-0.0256,  0.6345,  0.1667,  0.1398, -0.0944],
        [-0.1024,  0.7587, -0.3716,  0.4270, -0.3659],
        [ 0.3408, -0.0285, -0.2388, -0.0349,  0.1356]])

The row vector is broadcasted by concatenating it over itself 5 times(to match the number of rows of x2).

In [57]:
z = x1**x2 ## Element wise exponentiation
z

tensor([[0.8260, 0.7316, 0.9210, 0.7639, 0.3574],
        [0.4484, 0.7749, 0.3545, 0.9561, 0.4515],
        [0.6772, 0.9646, 0.8578, 0.7792, 0.7344],
        [0.6142, 0.9921, 0.4177, 0.9185, 0.4932],
        [0.8753, 0.7016, 0.5469, 0.6673, 0.9202]])

In [63]:
x1

tensor([[0.5393, 0.2145, 0.8741, 0.4784, 0.2587],
        [0.0749, 0.2846, 0.1834, 0.8844, 0.3516],
        [0.2838, 0.8375, 0.7781, 0.5051, 0.6665],
        [0.2070, 0.9617, 0.2398, 0.7924, 0.3950],
        [0.6503, 0.1745, 0.3726, 0.3305, 0.8965]])

### Other useful tensor operations

In [64]:
sum_x = torch.sum(x1, dim = 0)
sum_x

tensor([1.7553, 2.4729, 2.4480, 2.9909, 2.5683])

In [65]:
values, indices = torch.max(x1, dim=1)
values, indices

(tensor([0.8741, 0.8844, 0.8375, 0.9617, 0.8965]), tensor([2, 3, 1, 1, 4]))

In [66]:
values, indices = torch.min(x1, dim=1)
values, indices

(tensor([0.2145, 0.0749, 0.2838, 0.2070, 0.1745]), tensor([1, 0, 0, 0, 1]))

In [68]:
abs_x = torch.abs(x1)
abs_x

tensor([[0.5393, 0.2145, 0.8741, 0.4784, 0.2587],
        [0.0749, 0.2846, 0.1834, 0.8844, 0.3516],
        [0.2838, 0.8375, 0.7781, 0.5051, 0.6665],
        [0.2070, 0.9617, 0.2398, 0.7924, 0.3950],
        [0.6503, 0.1745, 0.3726, 0.3305, 0.8965]])

In [69]:
z = torch.argmax(x1, dim=1)
z

tensor([2, 3, 1, 1, 4])

In [73]:
mean = torch.mean(x.float())
mean

tensor(2.)

In [80]:
sort = torch.sort(x1, dim=1)
sort

torch.return_types.sort(
values=tensor([[0.2145, 0.2587, 0.4784, 0.5393, 0.8741],
        [0.0749, 0.1834, 0.2846, 0.3516, 0.8844],
        [0.2838, 0.5051, 0.6665, 0.7781, 0.8375],
        [0.2070, 0.2398, 0.3950, 0.7924, 0.9617],
        [0.1745, 0.3305, 0.3726, 0.6503, 0.8965]]),
indices=tensor([[1, 4, 3, 0, 2],
        [0, 2, 1, 4, 3],
        [0, 3, 4, 2, 1],
        [0, 2, 4, 3, 1],
        [1, 3, 2, 0, 4]]))

In [81]:
x1

tensor([[0.5393, 0.2145, 0.8741, 0.4784, 0.2587],
        [0.0749, 0.2846, 0.1834, 0.8844, 0.3516],
        [0.2838, 0.8375, 0.7781, 0.5051, 0.6665],
        [0.2070, 0.9617, 0.2398, 0.7924, 0.3950],
        [0.6503, 0.1745, 0.3726, 0.3305, 0.8965]])

NOTE : The sort method returns a tuple, one being the sorted tensor and the oether being the index mapping of the sorted tensor's elements to the original tensor. That is, the indices tensor consists of actual indices that the elements of the sorted tensor would've had in the original tensor. The index mapping is based on the dimension parameter that is given in the sort method

In [82]:
z = torch.clamp(x1, min=0.5)
z

tensor([[0.5393, 0.5000, 0.8741, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000, 0.8844, 0.5000],
        [0.5000, 0.8375, 0.7781, 0.5051, 0.6665],
        [0.5000, 0.9617, 0.5000, 0.7924, 0.5000],
        [0.6503, 0.5000, 0.5000, 0.5000, 0.8965]])

In [88]:
k = torch.randn((3,4))
print(k)
relu = torch.clamp(k, min=0)
print(relu)

tensor([[ 0.0074,  0.6284, -0.2850, -1.6842],
        [-0.5916,  1.2173,  1.1139,  0.7538],
        [-0.2074, -0.1813, -0.7383,  2.5281]])
tensor([[0.0074, 0.6284, 0.0000, 0.0000],
        [0.0000, 1.2173, 1.1139, 0.7538],
        [0.0000, 0.0000, 0.0000, 2.5281]])


NOTE : Th clamp function rounds of any number less than min to min and any number greater than max to max. If we set min alone as 0, then it is basically the relu activation to the input. 

### Indexing in tensors

In [90]:
batch_size = 10
features = 25

x = torch.rand((batch_size, features))

## Getting the first training example from the batch
print(x[0].shape)

torch.Size([25])


In [91]:
## Getting the first feature of every example from the batch
print(x[:,0].shape)

torch.Size([10])


In [96]:
## Fancy indexing
indices = [2,5,8]
l = torch.arange(3)

print(x[indices,l])

tensor([0.1178, 0.4106, 0.8176])


NOTE : We can specify the indices for the rows and columns in a list, but both the lists should be of the same size, since the index mapping happens element by element

Example : indices = [2,5,8] and l = [0,1,2]. So for x[indices, l], we get x[2,0], x[5,1], x[8,2].

In case of one of them being pure scalars, it is broadcasted with other indices.

Example : x[9,l] gives x[9,0], x[9,1], x[9,2]

In [98]:
x = torch.rand((3,5))
rows = torch.tensor([1,0])
cols = torch.tensor([4,0])
print(x[rows, cols].shape)

torch.Size([2])


In [104]:
## Advanced indexing
x = torch.arange(10)
y = (x<2) | (x>8)
x[y]

tensor([0, 1, 9])

Tensor supports boolean array for indexing. We created a boolean array with the condition that x<2 or x>8, which checks the condition for every element of x and forms a boolean list.

In [105]:
y

tensor([ True,  True, False, False, False, False, False, False, False,  True])

In [106]:
x[x.remainder(2) == 0]

tensor([0, 2, 4, 6, 8])

In [115]:
print(torch.where(x>=5, x, x*2))
print(torch.tensor([0,0,1,2,2,3,4,4]).unique())
print(x.ndimension(), x1.ndimension()) ##Number of dimensions
print(x.numel()) ## Number of elements
print(x1.numel()) ## Number of elements

tensor([0, 2, 4, 6, 8, 5, 6, 7, 8, 9])
tensor([0, 1, 2, 3, 4])
1 2
10
25


### Reshaping tensors

In [118]:
x = torch.arange(9)

x_3x3 = x.view(3,3)
x_3x3

tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])

In [119]:
y_3x3 = x.reshape(3,3)
y_3x3

tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])

NOTE : reshape is a safer bet, since view might give errors if its input is not in a contiguous manner in memory

In [122]:
x1 = torch.rand((2,5))
x2 = torch.rand((2,5))

print(torch.cat((x1,x2), dim=0).shape)
print(torch.cat((x1,x2), dim=1).shape)

torch.Size([4, 5])
torch.Size([2, 10])


In [124]:
##Unrolling
z = x1.view(-1)
z

tensor([0.9022, 0.8116, 0.2549, 0.4772, 0.8443, 0.8809, 0.7748, 0.2467, 0.4400,
        0.7825])

In [127]:
batch = 64
s = torch.rand((batch,2,5))
print(s.shape)
z = s.reshape(batch,-1) ##Having batch samples but unrolling each of them
print(z.shape)

torch.Size([64, 2, 5])
torch.Size([64, 10])


In [128]:
#Switching axis
k = s.permute(0,2,1)
print(k.shape)

torch.Size([64, 5, 2])
