In [1]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
x = torch.arange(12,dtype=torch.float32)
x

tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.])

In [4]:
x.numel()

12

In [5]:
x.shape

torch.Size([12])

In [12]:
X = x.reshape(3,4)
X
#X.shape

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]])

In [13]:
# Practitioners often need to work with tensors initialized to contain all 0s or 1s. We can construct a tensor with all elements 
# set to 0 and a shape of (2, 3, 4) via the zeros function.

torch.zeros((2, 3, 4))


tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])

In [14]:
torch.ones((2, 3, 4))

tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]])

In [15]:
torch.randn(3,4)

tensor([[-1.0449,  0.0485,  0.5062, -0.4122],
        [ 0.5164, -1.0510,  1.5647,  0.0457],
        [ 1.8876,  0.5338,  0.5709,  0.7401]])

In [18]:
a = torch.tensor([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])
a

tensor([[2, 1, 4, 3],
        [1, 2, 3, 4],
        [4, 3, 2, 1]])

# Indexing and slicing


In [None]:
a[-1]

tensor([4, 3, 2, 1])

In [27]:
a[2,3] = 18
a

tensor([[ 2,  1,  4,  3],
        [ 1,  2,  3,  4],
        [ 4,  3,  2, 18]])

**For instance, [:2, :] accesses the first and second rows, where : takes all the elements along axis 1 (column). While we discussed indexing for matrices, this also works for vectors and for tensors of more than two dimensions.**

In [28]:
a[:2,:]

tensor([[2, 1, 4, 3],
        [1, 2, 3, 4]])

In [30]:
a[:2,:2] = 12
a

tensor([[12, 12,  4,  3],
        [12, 12,  3,  4],
        [ 4,  3,  2, 18]])

In [32]:
z = torch.randn((4,4))
z

tensor([[ 0.3067,  0.6179,  0.0470, -0.5577],
        [ 1.1591,  0.5375, -0.0422, -1.4697],
        [-0.2401, -0.5157, -0.5271, -0.6760],
        [ 1.6264,  0.4561,  0.7921, -0.6918]])

# Operations

Various mathematical operations can be operated on tensors. Among the most useful of these are the elementwise operations. These apply a standard scalar operation to each element of a tensor. For functions that take two tensors as inputs, elementwise operations apply some standard binary operator on each pair of corresponding elements. We can create an elementwise function from any function that maps from a scalar to a scalar.

The mathematical notation for such unary scalar operators is $f \rightarrow R:R$. This means that the function maps from any real number onto some other real number. Unary functions such as $e^x$ can be applied elementwise.     

In [34]:
torch.exp(z)

tensor([[1.3589, 1.8550, 1.0481, 0.5725],
        [3.1871, 1.7118, 0.9587, 0.2300],
        [0.7865, 0.5971, 0.5903, 0.5087],
        [5.0853, 1.5779, 2.2081, 0.5007]])

Likewise, we denote binary scalar operators, which map pairs of real numbers to a (single) real number via the signature $f: R, R\rightarrow R$. Given any two vectors $u$ and $v$ of the same shape, and a binary operator $f$, we can produce a vector $\mathbf{c} = F(u,v)$ by setting $c_i \leftarrow f(u_i,v_i)$ for all $i$, where $c_i$,$u_i$, and $v_i$ are the $i^{th}$ elements of vectors $\mathbf{c}$, $\mathbf{u}$, and $\mathbf{v}$. 

Here, we produced the vector-valued $\mathbf{F} : R^d, R^d \rightarrow R^d$ by lifting the scalar function to an elementwise vector operation. The common standard arithmetic operators for addition (+), subtraction (-), multiplication (*), division (/), and exponentiation (**) have all been lifted to elementwise operations for identically-shaped tensors of arbitrary shape.

In [38]:
m = torch.tensor([1,2,3,4,5,6])
n = torch.tensor([11,12,13,14,15,16])

m+n, m-n, m*n, m/n

(tensor([12, 14, 16, 18, 20, 22]),
 tensor([-10, -10, -10, -10, -10, -10]),
 tensor([11, 24, 39, 56, 75, 96]),
 tensor([0.0909, 0.1667, 0.2308, 0.2857, 0.3333, 0.3750]))

##### We can also concatenate multiple tensors, stacking them end-to-end to form a larger one. We just need to provide a list of tensors and tell the system along which axis to concatenate.

In [48]:
M = torch.arange(12,dtype=torch.float32).reshape((3,4))
N = torch.tensor([[1,2,3,4],[45,96,13,24],[4,3,2,1]])
M,N 

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.]]),
 tensor([[ 1,  2,  3,  4],
         [45, 96, 13, 24],
         [ 4,  3,  2,  1]]))

In [49]:
torch.cat((M,N),dim=0), torch.cat((M,N),dim=1)

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [ 1.,  2.,  3.,  4.],
         [45., 96., 13., 24.],
         [ 4.,  3.,  2.,  1.]]),
 tensor([[ 0.,  1.,  2.,  3.,  1.,  2.,  3.,  4.],
         [ 4.,  5.,  6.,  7., 45., 96., 13., 24.],
         [ 8.,  9., 10., 11.,  4.,  3.,  2.,  1.]]))

##### Sometimes, we want to construct a binary tensor via logical statements. Take M == N as an example. For each position i, j, if M[i, j] and N[i, j] are equal, then the corresponding entry in the result takes value 1, otherwise it takes value 0.

In [50]:
M==N

tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

##### Summing all the elements in the tensor yields a tensor with only one element.



In [52]:
M.sum(),n.sum()

(tensor(66.), tensor(81))

# Broadcasting

We can still perform elementwise binary operations by invoking the broadcasting mechanism. Broadcasting works according to the following two-step procedure: (i) expand one or both arrays by copying elements along axes with length 1 so that after this transformation, the two tensors have the same shape; (ii) perform an elementwise operation on the resulting arrays.

In [64]:
b = torch.arange(3).reshape((3,1))
c = torch.arange(2).reshape((1,2))
d = torch.exp(b+c)
b,c,d

(tensor([[0],
         [1],
         [2]]),
 tensor([[0, 1]]),
 tensor([[ 1.0000,  2.7183],
         [ 2.7183,  7.3891],
         [ 7.3891, 20.0855]]))

# Saving Memory

if we write Y = X + Y, we dereference the tensor that Y used to point to and instead point Y at the newly allocated memory. We can demonstrate this issue with Python’s id() function, which gives us the exact address of the referenced object in memory. Note that after we run Y = Y + X, id(Y) points to a different location. That is because Python first evaluates Y + X, allocating new memory for the result and then points Y to this new location in memory.

This might be undesirable for two reasons. First, we do not want to run around allocating memory unnecessarily all the time. In machine learning, we often have hundreds of megabytes of parameters and update all of them multiple times per second. Whenever possible, we want to perform these updates in place. Second, we might point at the same parameters from multiple variables. If we do not update in place, we must be careful to update all of these references, lest we spring a memory leak or inadvertently refer to stale parameters.

In [65]:
before = id(d)
d = 2*a+b
id(d) == before 

False

##### Fortunately, performing in-place operations is easy. We can assign the result of an operation to a previously allocated array "e" by using slice notation: e[:] = **expression**. To illustrate this concept, we overwrite the values of tensor e, after initializing it, using zeros_like, to have the same shape as b.

In [72]:
A = torch.arange(16).reshape(4,4)
B = torch.tensor([[1,2,3,4], [5,6,7,8], [9,10,11,12], [13,14,15,16]]).reshape((4,4))
A,B

(tensor([[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11],
         [12, 13, 14, 15]]),
 tensor([[ 1,  2,  3,  4],
         [ 5,  6,  7,  8],
         [ 9, 10, 11, 12],
         [13, 14, 15, 16]]))

In [82]:
e = torch.zeros_like(B)
#e.size()
e_before = id(e)
#e_before
e[:] = A+B
#e[:]
e.size(),e_before,e[:],id(e) == e_before


(torch.Size([4, 4]),
 2539006161488,
 tensor([[ 1,  3,  5,  7],
         [ 9, 11, 13, 15],
         [17, 19, 21, 23],
         [25, 27, 29, 31]]),
 True)

##### If the value of A is not reused in subsequent computations, we can also use A[:] = A + B or  A+= B to reduce the memory overhead of the operation.

In [85]:
A_before = id(A)
A_before
A+=B
A_after = id(A)
A_before, A , A_after, A_before == A_after

(2539006119920,
 tensor([[ 2,  5,  8, 11],
         [14, 17, 20, 23],
         [26, 29, 32, 35],
         [38, 41, 44, 47]]),
 2539006119920,
 True)

# Conversion to Other Python Objects