In [1]:
import os
import numpy as np
import torch

# PyTorch Basics - Tensors in PyTorch

## Creating Tensors

### Manually

In [2]:
a = torch.tensor([[1, 2], [3, 4]], dtype=torch.int64, requires_grad=False, device="cpu", pin_memory=False)
print(a)

tensor([[1, 2],
        [3, 4]])


### From Numpy

In [3]:
a = np.random.randint(low=0, high=10, size=(5, 5)) 
a = torch.from_numpy(a)
print(a)

tensor([[0, 1, 4, 5, 0],
        [7, 7, 5, 3, 2],
        [3, 4, 0, 3, 7],
        [0, 8, 7, 3, 4],
        [4, 9, 9, 2, 1]])


### Generate Random 

In [4]:
# Create random integers
a = torch.randint(low=0, high=10, size=(5, 5))
print(f"Random Numbers:\n{a}\n\n")
# Normal distribution
a = torch.randn((5, 5))
print(f"Normal distribution:\n{a}\n")

Random Numbers:
tensor([[8, 8, 8, 6, 6],
        [8, 4, 8, 0, 5],
        [6, 0, 2, 0, 2],
        [2, 1, 5, 4, 8],
        [6, 4, 6, 4, 0]])


Normal distribution:
tensor([[ 0.8410, -0.9266,  0.0904, -1.3038, -0.5289],
        [ 0.1274,  0.1591, -0.8699,  0.1360, -0.3747],
        [ 0.1562, -0.0189,  0.4074, -1.1719,  1.0901],
        [-0.8862, -1.5510,  1.1211,  1.0351, -0.4395],
        [ 1.1263, -0.8310,  1.5366, -0.2800,  1.3096]])



### Specifying the Data Type

In [5]:
a = torch.randn((5, 5), dtype=torch.float64)
print(f"Random Float64:\n{a}\n")

a = torch.randn((5, 5), dtype=torch.float32)
print(f"Random Float32:\n{a}\n")

Random Float64:
tensor([[ 0.9089, -0.1012,  0.6913, -0.6494,  0.0691],
        [-0.0240,  0.2580,  1.9057,  0.5182,  0.8161],
        [ 1.5821, -0.6852, -0.9144, -0.2886,  1.3857],
        [ 0.4388,  1.3956, -0.1918,  0.2068, -0.2406],
        [ 1.6565, -0.5604, -0.1334, -0.6638,  1.9534]], dtype=torch.float64)

Random Float32:
tensor([[-0.1282, -0.2956, -0.0129,  0.3268, -0.4477],
        [-1.0102, -0.1872,  1.0727,  0.2466,  0.0259],
        [ 0.1439, -1.5302,  0.6920,  0.8864, -0.0798],
        [ 0.3686,  0.0130,  1.4660, -0.2808, -0.5227],
        [-0.2389,  1.3530,  0.7793, -1.1110,  1.2176]])



#### Question

1. What's the difference between float64 and float32


2. Why would you choose one over the other?

### Generate Zeros/Ones

In [6]:
zeros = torch.zeros((5, 5, 5))
print(f"Zeros:\n{zeros}\n\n")

ones = torch.ones((5, 5, 5))
print(f"Ones:\n{ones}")

Zeros:
tensor([[[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]]])


Ones:
tensor([[[1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.]],

        [[1., 1., 1., 1., 1.],
         [1.

### Specify a Device

A device can either be the `CPU` (saved on RAM) or a `GPU` device (saved on its memory)

`GPU` devices are denoted as `cuda:0` where the number following the cuda is the index of the GPU in case of multiple GPU devices available

The following is a boilerplate code for determining the device to use


In [7]:
print("CUDA/GPU available:", torch.cuda.is_available())

CUDA/GPU available: True


In [8]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

#### Notes
1. Specifying the device as `cuda` is the standard when using a single device, multi-gpu training is usually handled by PyTorch or other supporting frameworks and packages


2. PyTorch isn't limited to `cpu` or `cuda` as the device, suches as openCl and openGl, ..etc. But keep in mind PyTorch can be slower on these devices and they're intended for special cases 

In [9]:
a = torch.randn((5, 5, 5), device=device)
print(a)

tensor([[[ 2.3697e-02, -2.4632e-03, -6.8423e-01, -2.9210e-01, -1.4122e+00],
         [ 2.1567e-01, -1.0202e+00,  9.1347e-01, -1.4124e-01, -1.4028e-01],
         [-8.6652e-01, -1.4215e+00,  1.2623e+00,  6.8693e-01,  1.6328e+00],
         [-9.8356e-01,  1.1174e+00,  4.2112e-01, -1.0609e+00,  6.0197e-01],
         [-2.1692e+00,  3.4749e-02,  1.3484e-01,  4.0391e-01,  6.6008e-01]],

        [[-2.2851e-02,  1.4867e+00, -4.7597e-01, -3.8129e-01,  2.0704e-01],
         [ 1.3503e+00,  2.1202e-03, -5.7547e-01, -4.5058e-01, -8.7372e-01],
         [-4.6559e-01, -9.0361e-01,  9.0136e-01, -1.9443e-01, -1.6196e+00],
         [-3.0709e-01,  5.6610e-01, -6.0768e-01,  8.1147e-01, -7.9059e-01],
         [ 5.4832e-01, -1.6052e+00, -1.0650e+00,  1.6160e+00, -1.2189e+00]],

        [[ 1.3759e+00,  1.2999e+00, -4.1149e-01,  1.3409e+00, -2.9787e-01],
         [-8.0635e-01, -1.3923e+00,  7.3275e-01,  9.1437e-01,  1.5791e+00],
         [-1.4199e+00, -1.4356e+00, -7.2890e-01,  2.3176e+00, -8.3148e-01],
        

## Tensor Information

### Ndim and Shape

In [10]:
a = torch.randn((5, 5, 5), dtype=torch.float32)

In [11]:
print(f"ndmin:{a.ndim}")
print(f"shape:{a.shape}")

ndmin:3
shape:torch.Size([5, 5, 5])


### Single value tensors

In [12]:
a = torch.tensor(7)

print(f"Tensor:{a}\tType:{type(a)}")
print(f"Value:{a.item()}\tType:{type(a.item())}")

Tensor:7	Type:<class 'torch.Tensor'>
Value:7	Type:<class 'int'>


### Data Type

In [13]:
a.dtype

torch.int64

### Device

In [14]:
a.device

device(type='cpu')

## Tensor Operations

### Element Wise

In [15]:
a = torch.randint(low=0, high=10, size=(3, ))
a

tensor([3, 5, 9])

In [16]:
a + 5

tensor([ 8, 10, 14])

In [17]:
a * 5

tensor([15, 25, 45])

In [18]:
a / 5

tensor([0.6000, 1.0000, 1.8000])

In [19]:
b = torch.randint(low=0, high=10, size=(3, ))
b

tensor([9, 2, 9])

In [20]:
a * b

tensor([27, 10, 81])

In [21]:
torch.mul(a, b)

tensor([27, 10, 81])

In [22]:
torch.div(a, b)

tensor([0.3333, 2.5000, 1.0000])

## Operations

### Vector Operations

In [23]:
a = torch.randint(low=0, high=10, size=(3, ))
b = torch.randint(low=0, high=10, size=(3, ))

In [24]:
# dot product of two vectors
torch.dot(a, b), torch.matmul(a, b)

(tensor(13), tensor(13))

In [25]:
torch.mul(a, b), a * b

(tensor([6, 7, 0]), tensor([6, 7, 0]))

### Matrix Operations

You can use this link to visualize matrix multipication

http://matrixmultiplication.xyz/

In [26]:
a = torch.randint(low=0, high=10, size=(5, 3))
b = torch.randint(low=0, high=10, size=(3, 5))

In [27]:
ab = torch.matmul(a, b)
ab.shape

torch.Size([5, 5])

In [28]:
batch_size = 32
a = torch.randint(low=0, high=10, size=(batch_size, 5, 3))
b = torch.randint(low=0, high=10, size=(batch_size, 3, 5))

In [29]:
ab = torch.bmm(a, b)
ab.shape

torch.Size([32, 5, 5])

In [30]:
torch.all(torch.bmm(a, b) == torch.matmul(a, b))

tensor(True)

### Reshaping

In [31]:
batch_size = 128

In [32]:
a = torch.randn((batch_size, 25))

In [33]:
a.view((batch_size, 5, 5)).shape

torch.Size([128, 5, 5])

In [34]:
a.reshape((batch_size, 5, 5)).shape

torch.Size([128, 5, 5])

Note that the new shape must be valid, as the number of required elements is the same as the number of original elements 

In [35]:
a.reshape((batch_size, 8, 4))

RuntimeError: shape '[128, 8, 4]' is invalid for input of size 3200

View and reshape are used to change the shape of a tensor, they **don't create a new tensor**, what they do is that they provide us with a specific view of that tensor

The main difference is that:
- `view` cannot be used on *non contiguous* tensors
- `reshape` is used on both *contiguous* and *non contiguous* tensors, if the tensor is *contiguous* it will internally use view, otherwise it will use reshape


*contiguous* means that the we are viewing the tensor in the same order it's ordered in in the memory
you can learn more through this [article](https://medium.com/analytics-vidhya/pytorch-contiguous-vs-non-contiguous-tensor-view-understanding-view-reshape-73e10cdfa0dd) 

In [36]:
a = torch.randn((batch_size, 32, 64))

In [37]:
print(f"Permutation: {a.permute((0, 2, 1)).shape}\nReshaping: {a.reshape((batch_size, 64, 32)).shape}")

Permutation: torch.Size([128, 64, 32])
Reshaping: torch.Size([128, 64, 32])


Permutation is an operation to switch the axes of a tensor, without changing each axis length

### Changing the Device

In [38]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [39]:
a = a.to(device)
a.device

device(type='cuda', index=0)

## Common Operations

In [40]:
a = torch.randn((5, 5))
a

tensor([[-1.4805,  1.3048,  0.8085, -0.2649, -0.2437],
        [ 0.5492,  0.2483,  0.1300, -0.8308, -1.8253],
        [-2.3664, -0.3820, -0.6254, -0.1817,  0.0952],
        [ 0.9206, -0.0480,  0.8068,  0.8988,  0.7282],
        [-0.6080, -0.5726, -1.0910,  0.0517, -0.3867]])

In [41]:
abs_a = a.abs()
abs_a

tensor([[1.4805, 1.3048, 0.8085, 0.2649, 0.2437],
        [0.5492, 0.2483, 0.1300, 0.8308, 1.8253],
        [2.3664, 0.3820, 0.6254, 0.1817, 0.0952],
        [0.9206, 0.0480, 0.8068, 0.8988, 0.7282],
        [0.6080, 0.5726, 1.0910, 0.0517, 0.3867]])

In [42]:
cos_a = a.cos()
cos_a

tensor([[ 0.0902,  0.2629,  0.6906,  0.9651,  0.9705],
        [ 0.8529,  0.9693,  0.9916,  0.6743, -0.2518],
        [-0.7143,  0.9279,  0.8107,  0.9835,  0.9955],
        [ 0.6053,  0.9988,  0.6918,  0.6226,  0.7464],
        [ 0.8208,  0.8405,  0.4616,  0.9987,  0.9261]])

### min, max, argmin, argmax

In [43]:
a.min(), a.argmin()

(tensor(-2.3664), tensor(10))

`a.min()`: returns the minimum value found in all of the matrix

`a.argmin()`: returns the index of the minimum value, given it's flattened

In [44]:
print(a.min(dim=0), a.argmin(dim=0), sep='\n\n')

torch.return_types.min(
values=tensor([-2.3664, -0.5726, -1.0910, -0.8308, -1.8253]),
indices=tensor([2, 4, 4, 1, 1]))

tensor([2, 4, 4, 1, 1])


`a.min(dim=0)`: returns two items:
- The first is the minimum value found for each vector at the specified dimension, since we specified the `dim=0` (which means rows), we will get the minimum value found at every row, so five rows yeilds five minimum values, 
- The second item is the indices of these values at every given row  

`a.argmin(dim=0)`: returns the indices of the minimum values found at each vector in the specified dimension, similar to the second item returned from the `a.min(dim=0)`

#### Note
`argmin` and `argmax` are specifically helpful in functions like cross entropy, or finding the maximum probability from the output of a neural network

You can view a detailed [list of operations](https://pytorch.org/docs/stable/tensors.html#torch.Tensor.mH)

### Inline Operations

Adding an underscore (_) after an operation, makes the operation in place, meaning it will change the tensor and not returning a new copy as before

In [45]:
abs_a = a.abs()
(a == abs_a).all()

tensor(False)

In [46]:
a.abs_()
(a == abs_a).all()

tensor(True)