Run following command to install pytorch<br>
conda install pytorch torchvision -c pytorch -y

In [1]:
import torch

print(torch.__version__)

0.4.0


#### Creating Tensors

#### A Tensor initialized with a specific array

In [2]:
tensor_array = torch.Tensor([[1,2],[4,5]])
tensor_array

tensor([[ 1.,  2.],
        [ 4.,  5.]])

#### An un-initialized Tensor of shape 3X3 allocated space in memory

In [3]:
tensor_uninitialized = torch.Tensor(3, 3)   

#### numel() returns the number of elements in a tensor

In [4]:
torch.numel(tensor_uninitialized)    

9

Creating Tensor without initialization sometimes lead to "RuntimeError: Overflow when unpacking long" error.<br>
Because torch.empty gives uninitialized memory, so we may or may not get a large value from it.

In [5]:
tensor_uninitialized

RuntimeError: Overflow when unpacking long

#### A tensor of size 2x3 initialized with random values

In [6]:
tensor_initialized = torch.rand(2, 3)    
tensor_initialized

tensor([[ 0.7362,  0.6354,  0.5489],
        [ 0.3056,  0.2553,  0.5708]])

#### Tensors can be set to have specific data types
Here we create one poplulated with random integers

In [3]:
tensor_int = torch.randn(5, 3).type(torch.IntTensor) 
tensor_int

tensor([[ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0, -1],
        [ 0,  0,  0],
        [ 0,  0,  1]], dtype=torch.int32)

#### A Tensor of type Long

In [8]:
tensor_long = torch.LongTensor([1.0, 2.0, 3.0])   
tensor_long

tensor([ 1,  2,  3])

#### A tensor of type Byte
This holds unsigned int values from 0 to 255. Values outside of that range are expressed relative to 256

In [9]:
tensor_byte = torch.ByteTensor([0, 261, 1, -5]) 
tensor_byte

tensor([   0,    5,    1,  251], dtype=torch.uint8)

#### A tensor of size 10 containing all ones

In [10]:
tensor_ones = torch.ones(10)              
tensor_ones

tensor([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

#### A tensor of size 10 containing all zeros

In [11]:
tensor_zeroes = torch.zeros(10)             
tensor_zeroes

tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

#### Create an identity 3x3 tensor

In [12]:
tensor_eye = torch.eye(3)              
tensor_eye

tensor([[ 1.,  0.,  0.],
        [ 0.,  1.,  0.],
        [ 0.,  0.,  1.]])

#### Get the list of indices of non-zero elements in a tensor
[ i, j ] index for non-zero elements

In [13]:
non_zero = torch.nonzero(tensor_eye)
non_zero

tensor([[ 0,  0],
        [ 1,  1],
        [ 2,  2]])

#### Use ones_like() with an existing tensor to create a tensor of ones with the same shape as that tensor
A tensor with same shape as eye. Fill it with 1.

In [14]:
tensor_ones_shape_eye = torch.ones_like(tensor_eye)      
tensor_ones_shape_eye

tensor([[ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.]])

## Inplace / Out-of-place
The first difference is that ALL operations on the tensor that operate in-place on it will have an "\_" postfix. For example, add is the out-of-place version, and add\_ is the in-place version.

#### .fill_ is in-place operation and it doesnt have any out-place equivalent

In [15]:
initial_tensor = torch.rand(3, 3) 

initial_tensor.fill_(3) 

tensor([[ 3.,  3.,  3.],
        [ 3.,  3.,  3.],
        [ 3.,  3.,  3.]])

In [16]:
initial_tensor.fill(3) 

AttributeError: 'Tensor' object has no attribute 'fill'

#### The add() method does an out-of-place add operation and returns a new tensor
This is assigned to the new_tensor variable

In [17]:
new_tensor = initial_tensor.add(4)
new_tensor

tensor([[ 7.,  7.,  7.],
        [ 7.,  7.,  7.],
        [ 7.,  7.,  7.]])

#### The original tensor is unchanged

In [18]:
initial_tensor

tensor([[ 3.,  3.,  3.],
        [ 3.,  3.,  3.],
        [ 3.,  3.,  3.]])

#### The add\_ method does an in-place add, changing the calling tensor

In [19]:
initial_tensor.add_(5)
initial_tensor

tensor([[ 8.,  8.,  8.],
        [ 8.,  8.,  8.],
        [ 8.,  8.,  8.]])

#### The new_tensor was a separate copy and is unaffected

In [20]:
new_tensor

tensor([[ 7.,  7.,  7.],
        [ 7.,  7.,  7.],
        [ 7.,  7.,  7.]])

## Interoperablity between Numpy arrays and Pytorch Tensors

In [21]:
import numpy as np

#### Converting a numpy array to a Tensor

In [22]:
numpy_arr = np.array([1, 2, 3])
numpy_arr

array([1, 2, 3])

In [23]:
tensor = torch.from_numpy(numpy_arr)
tensor

tensor([ 1,  2,  3])

#### Converting tensor to numpy arrays

In [24]:
numpy_from_tensor = tensor.numpy()                   
numpy_from_tensor

array([1, 2, 3])

#### The Numpy arrays and Tensor share the same memory
The tensor and numpy_from_tensor are shallow copies and share the same memory as the original numpy array. Modifying the original array affects the values of both tensor and numpy_from_tensor

In [25]:
numpy_arr[1] = 4
numpy_arr

array([1, 4, 3])

In [26]:
tensor

tensor([ 1,  4,  3])

In [27]:
numpy_from_tensor

array([1, 4, 3])

#### Indexing

In [28]:
initial_tensor = torch.rand(2, 3)    
initial_tensor

tensor([[ 0.1648,  0.8958,  0.7553],
        [ 0.2085,  0.6270,  0.3306]])

#### Selecting individual elements from the tensor
Select the 1st row, 3rd column

In [29]:
initial_tensor[0, 2]

tensor(0.7553)

#### Slicing
Select all rows, and the elements from the 2nd column onwards

In [30]:
initial_tensor[:,1:]

tensor([[ 0.8958,  0.7553],
        [ 0.6270,  0.3306]])

#### Resizing 

.size() method is used to check shape of the tensor

In [31]:
initial_tensor.size()

torch.Size([2, 3])

#### .shape returns the shape attribute of the tensor

In [32]:
initial_tensor.shape

torch.Size([2, 3])

#### The view() methoed
This creates a view of the calling tensor in the shape specified in the arguments. Here, we create a 1-D tensor of shape (6,) from our 2D initial_tensor

In [33]:
resized_tensor = initial_tensor.view(6)                   
resized_tensor.shape

torch.Size([6])

In [34]:
resized_tensor

tensor([ 0.1648,  0.8958,  0.7553,  0.2085,  0.6270,  0.3306])

#### view() does not create a deep copy - just a view as the name suggests
Modifying the original tensor affects the resized_tensor as they both point to the same space in memory

In [35]:
initial_tensor[0, 2] = 0.1111
resized_tensor

tensor([ 0.1648,  0.8958,  0.1111,  0.2085,  0.6270,  0.3306])

#### Convert 2x3 tensor to a 3x2 tensor

In [36]:
resized_tensor = initial_tensor.view(3, 2)                
resized_tensor.shape

torch.Size([3, 2])

In [37]:
resized_tensor

tensor([[ 0.1648,  0.8958],
        [ 0.1111,  0.2085],
        [ 0.6270,  0.3306]])

#### Use of -1 as a view argument
In a situation where you would like one of the dimensions of the tensor to be inferred, the argument for that can be stated as -1. It's actual dimension will be set based on the value of the other dimension. 

This also applies to multi-dimensional tensors though we only use 2D tensors here

In [38]:
resized_matrix = initial_tensor.view(-1, 2)  
resized_matrix.shape

torch.Size([3, 2])

#### The reshaping must contain valid arguments
The view method must have arguments which are factors of the number of elements in the tensor. Specifying values which are not factors will result in an error

In [39]:
resized_matrix = initial_tensor.view(-1, 5)  

RuntimeError: invalid argument 2: size '[-1 x 5]' is invalid for input with 6 elements at /Users/soumith/minicondabuild3/conda-bld/pytorch_1524590658547/work/aten/src/TH/THStorage.c:37

## Sorting tensors
Tensors can be sorted along a specified dimension. If no dimension is specified, the last dimension is picked by default

In [40]:
initial_tensor

tensor([[ 0.1648,  0.8958,  0.1111],
        [ 0.2085,  0.6270,  0.3306]])

#### torch.sort() returns two tensors
* one original sorted tensor
* one with index of elements in sorted order

The rows in this tensor will be sorted as they represent the last dimension

In [41]:
sorted_tensor, sorted_indices = torch.sort(initial_tensor)

In [42]:
sorted_tensor

tensor([[ 0.1111,  0.1648,  0.8958],
        [ 0.2085,  0.3306,  0.6270]])

In [43]:
sorted_indices

tensor([[ 2,  0,  1],
        [ 0,  2,  1]])

#### Specifying a dimension to sort on
Here, the tensor is sorted on its columns

In [44]:
sorted_tensor, sorted_indices = torch.sort(initial_tensor, dim=0)
sorted_tensor

tensor([[ 0.1648,  0.6270,  0.1111],
        [ 0.2085,  0.8958,  0.3306]])

In [45]:
sorted_indices

tensor([[ 0,  1,  0],
        [ 1,  0,  1]])

#### torch.topk() returns two tensors
* one with max k elements in the specific dimension (k being the second argument)
one with index of elements having max element

In [46]:
topk_tensor, topk_indices = torch.topk(initial_tensor, 2)
topk_tensor

tensor([[ 0.8958,  0.1648],
        [ 0.6270,  0.3306]])

In [47]:
topk_indices

tensor([[ 1,  0],
        [ 1,  2]])

## Math Operations
Pytorch supports a number of mathematical operations which can be performed on tensors. We take a look at a few of them here

#### Float tensors

In [48]:
tensor_float= torch.FloatTensor([-1.1, -2.2, 3.3])
tensor_float

tensor([-1.1000, -2.2000,  3.3000])

#### Absolute values

In [49]:
tensor_abs = torch.abs(tensor_float)      
tensor_abs

tensor([ 1.1000,  2.2000,  3.3000])

#### Addition
Addition of single value tensors

In [50]:
float1 = torch.FloatTensor([2])
float2 = torch.FloatTensor([3])
float1 + float2

tensor([ 5.])

#### Adding array-like tensors
Two tensors of the same shape can be added by summing up the values of elements at the same index locations 

In [60]:
rand1 = torch.abs(torch.randn(2, 3))
rand2 = torch.abs(torch.randn(2, 3))

print(rand1, '\n', rand2)

tensor([[ 0.2675,  1.6448,  0.5585],
        [ 0.2205,  0.6304,  0.6868]]) 
 tensor([[ 0.6505,  0.0043,  0.5749],
        [ 1.4979,  1.3191,  1.5164]])


#### Addition using the + operator

In [61]:
add1 = rand1 + rand2
add1

tensor([[ 0.9180,  1.6491,  1.1334],
        [ 1.7184,  1.9495,  2.2032]])

#### Addition using the add() function

In [62]:
add2 = torch.add(rand1, rand2)
add2

tensor([[ 0.9180,  1.6491,  1.1334],
        [ 1.7184,  1.9495,  2.2032]])

#### In-place addition using add_()

In [63]:
rand1.add_(rand2)
rand1

tensor([[ 0.9180,  1.6491,  1.1334],
        [ 1.7184,  1.9495,  2.2032]])

#### Adding a scalar value to all tensor elements
Using the plus sign

In [64]:
add_scalar_plus = rand1 + 10
add_scalar_plus

tensor([[ 10.9180,  11.6491,  11.1334],
        [ 11.7184,  11.9495,  12.2032]])

Using the add() function

In [65]:
add_scalar = torch.add(rand1, 10)
add_scalar

tensor([[ 10.9180,  11.6491,  11.1334],
        [ 11.7184,  11.9495,  12.2032]])

In [82]:
tensor = torch.Tensor([[-1, 0.3, 2],
                      [-4, 5, -0.4] 
                     ])

#### Element-wise division
The div() and mul() functions can be used to divide and multiply the values in a tensor. Here, we do an element-wise division between two tensors

In [83]:
tensor_div = torch.div(tensor, tensor + 0.3)
tensor_div

tensor([[ 1.4286,  0.5000,  0.8696],
        [ 1.0811,  0.9434,  4.0000]])

#### Element-wise multiplicaton

In [84]:
tensor_mul = torch.mul(tensor, tensor)
tensor_mul

tensor([[  1.0000,   0.0900,   4.0000],
        [ 16.0000,  25.0000,   0.1600]])

#### Multiplying with a scalar
Both mul() and div() can be used with a scalar to perform a div() or mul() of all elements with a scalar quantity

In [86]:
tensor_mul = torch.mul(tensor, 10)
tensor_mul

tensor([[-10.,   3.,  20.],
        [-40.,  50.,  -4.]])

#### Clamp the value of a Tensor
There will be occasions where you would like to set upper and lower limits for the values in a tensor. This is where the clamp function is used. The value of an element is set to:
* min if if x<sub>i</sub> < min
* x<sub>i</sub> if min < x<sub>i</sub> < max
* max if x<sub>i</sub> > max

In [87]:
tensor_clamp = torch.clamp(tensor, min=-0.5, max=0.5)
tensor_clamp

tensor([[-0.5000,  0.3000,  0.5000],
        [-0.5000,  0.5000, -0.4000]])

#### Transpose with t()

In [88]:
transposed_tensor = tensor.t()
transposed_tensor

tensor([[-1.0000, -4.0000],
        [ 0.3000,  5.0000],
        [ 2.0000, -0.4000]])

## Vector Multiplication 

#### Dot product

In [90]:
t1 = torch.Tensor([4, 2])
t2 = torch.Tensor([3, 1])

In [67]:
dot_product = torch.dot(t1, t2) 
dot_product

tensor(14.)

#### Matrix Vector product
If mat is a (n×m) tensor, vec is a 1-D tensor of size m, out will be 1-D of size n.

In [103]:
matrix = torch.Tensor([[2, 5, 3],
                   [4, 1, 0]
                  ])

vector = torch.Tensor([3, 5, 0])

In [104]:
matrix_vector = torch.mv(matrix, vector)
matrix_vector

tensor([ 31.,  17.])

#### Matrix multiplication

In [106]:
another_matrix = torch.Tensor([[1, 3],
                               [2, 0],
                               [0, 5]
                              ])

In [107]:
matrix_mul = torch.mm(matrix, another_matrix)
matrix_mul

tensor([[ 12.,  21.],
        [  6.,  12.]])