In [1]:
import torch
import numpy as np

# Tensors

- Tensors are building blocks of any deep learning network.
- They are used to represent all the different types of data be it images, sound, text data, etc.
- Tensors are **order N-matrix**

**For example:**
* If N=1, tensor will basically be a *vector* (1-D matrix)
* If N=2, tensor will basically be a *matrix* (2-D matrix)

**Why Tensors and not Numpy arrays?**
- NumPy only supports CPU computation
- Tensor class supports automatic differentiation


**Let's start by importing PyTorch library and understand some of the basic functions on tensors.**


# Scalar

- rank-0 tensor



In [2]:
t = torch.tensor(1.)
print(t)
print('\n size:',t.shape, sep="\n")
print()
print('\n number of dimensions:', t.dim(), sep='\n')
print('\n Data Type:', t.dtype, sep='\n')

tensor(1.)

 size:
torch.Size([])


 number of dimensions:
0

 Data Type:
torch.float32


# Vector
- rank-1 tensor

In [3]:
t = torch.tensor([1,2])
print(t)
print()
print("\n size", t.shape, sep='\n')
print()
print('\n Number of dimensions:', t.dim(), sep='\n')
print()
print('\n data type:', t.dtype, sep='\n')

tensor([1, 2])


 size
torch.Size([2])


 Number of dimensions:
1


 data type:
torch.int64


## <font color = 'pickle'>**Matrix**
- rank 2 tensor

Matrices are 2-d arrays with size `n x m`. Here, n: number of rows and m: number of columns.

If `m = n`, then the matrix is known as a `square matrix`.

Precisely, matrices can be represented as:
$$\mathbf{X}=\begin{bmatrix} x_{11} & x_{12} & \cdots & x_{1n} \\ x_{21} & x_{22} & \cdots & x_{2n} \\ \vdots & \vdots & \ddots & \vdots \\ x_{m1} & x_{m2} & \cdots & x_{mn} \\ \end{bmatrix}$$
<br>



In [4]:
t = torch.tensor([
    [1., 2, 3],
    [4,5,6],
    [7,8,9]
])

print(t)
print()
print("\n size", t.shape, sep='\n')
print()
print('\n Number of dimensions:', t.dim(), sep='\n')
print()
print('\n data type:', t.dtype, sep='\n')

tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])


 size
torch.Size([3, 3])


 Number of dimensions:
2


 data type:
torch.float32


# Higher Order Tensors
- rank-3 tensor

In [5]:
t = torch.tensor([
    [[1,2], [3,4]],
    [[5,6],[7,8]],
    [[9,10],[11,12]]
])

print(t)
print()
print("\n size", t.shape, sep='\n')
print()
print('\n Number of dimensions:', t.dim(), sep='\n')
print()
print('\n data type:', t.dtype, sep='\n')

tensor([[[ 1,  2],
         [ 3,  4]],

        [[ 5,  6],
         [ 7,  8]],

        [[ 9, 10],
         [11, 12]]])


 size
torch.Size([3, 2, 2])


 Number of dimensions:
3


 data type:
torch.int64


In [6]:
# Python List
scalar = 4
type(scalar)

int

In [7]:
my_list = [[1,2],[3,4]]
type(my_list)

list

In [8]:
my_tensor = torch.tensor([[1,2],[3,4]])
print(type(my_tensor))
print(t)
print()
print('\n Data type:', my_tensor.dtype, sep='\n')

<class 'torch.Tensor'>
tensor([[[ 1,  2],
         [ 3,  4]],

        [[ 5,  6],
         [ 7,  8]],

        [[ 9, 10],
         [11, 12]]])


 Data type:
torch.int64


In [9]:
my_tensor = torch.tensor([[1.,2],[3,4]])
print(type(my_tensor))
print(t)
print()
print('\n Data type:', my_tensor.dtype, sep='\n')

<class 'torch.Tensor'>
tensor([[[ 1,  2],
         [ 3,  4]],

        [[ 5,  6],
         [ 7,  8]],

        [[ 9, 10],
         [11, 12]]])


 Data type:
torch.float32


# <font color = 'pickle'>**Difference between list and Array/tensor**</font>

| <font size =5> Python List                       | <font size =5>Tensor/Array                     |
|-----------------------------------|----------------------------------|
| <font size =5>Mixed types allowed               | <font size =5>Same type required               |
|<font size =5> Elements can be added or removed  | <font size =5>Elements cannot be added or removed               
| <font size =5>Basic Python operations           | <font size =5>Supports mathematical operations                
|<font size =5>Numerical Computtaions are slow    |<font size =5>Numerical Computtaions are fast



# Conversion to other Python Objects

In [10]:
# Initializing a tensor
t = torch.arange(10)

print("Initial Tensor:",t)
print()

# Converting tensor t to numpy array using numpy() method
arr = t.numpy()
print("Converting Tensor into a Numpy Array:", arr)

# Converting numpy array to tensor T using tensor() method
T = torch.tensor(arr)
print("Converting Numpy array into a Tensor", T)

# Print datatype of arr and T
print()
print(f"Dtype of arr: {arr.dtype}")
print(f"Dtype of T: {T.dtype}")

Initial Tensor: tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

Converting Tensor into a Numpy Array: [0 1 2 3 4 5 6 7 8 9]
Converting Numpy array into a Tensor tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

Dtype of arr: int64
Dtype of T: torch.int64


**Note:** We can also use `torch.from_numpy()` and `torch.as_tensor()` to convert numpy array to PyTorch tensor. However with these methods, the PyTorch tensor and the source NumPy array share the same memory. This means that changes to one affect the other. But, the `torch.tensor()` function always makes a copy.

In [11]:
my_ndarray = np.arange(10)
t_from_numpy = torch.from_numpy(my_ndarray)
t_as_tensor = torch.as_tensor(my_ndarray)
t_Tensor = torch.tensor(my_ndarray)

print(f"tensor craeted using torch.from_numpy before changing np array: {t_from_numpy}")
print(f"tensor craeted using torch.as_tensor before changing np array : {t_as_tensor}")
print(f"tensor craeted using torch.tensor before changing np array    : {t_Tensor}")

# change numpy array
my_ndarray[2] = 1000

print()
print(f"tensor craeted using torch.from_numpy after changing np array: {t_from_numpy}")
print(f"tensor craeted using torch.as_tensor after changing np array : {t_as_tensor}")
print(f"tensor craeted using torch.tensor after changing np array    : {t_Tensor}")

tensor craeted using torch.from_numpy before changing np array: tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
tensor craeted using torch.as_tensor before changing np array : tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
tensor craeted using torch.tensor before changing np array    : tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

tensor craeted using torch.from_numpy after changing np array: tensor([   0,    1, 1000,    3,    4,    5,    6,    7,    8,    9])
tensor craeted using torch.as_tensor after changing np array : tensor([   0,    1, 1000,    3,    4,    5,    6,    7,    8,    9])
tensor craeted using torch.tensor after changing np array    : tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])


In [12]:
# Initializing a size-1 tensor
t = torch.tensor([10.5])

# Printing tensor
print(t)

# Accessing element of tensor using item function
# 'items' returns the value of the tensor as python number
# 'works only for tensors with single element'

print(t.item())

tensor([10.5000])
10.5


In [13]:
# We can also convert the tensor to python list
t = torch.tensor([10,2])
print(t)
print()
print("Tensor converted into Python List..", t.tolist())
print(type(t.tolist()))

tensor([10,  2])

Tensor converted into Python List.. [10, 2]
<class 'list'>


# Changing the shape of Tensors

In [14]:
t = torch.arange(10)
print(t)
print('\n size/shape of tensor:', t.shape, sep='\n')


tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

 size/shape of tensor:
torch.Size([10])


In [15]:
t = t.view(5,2)
print(t)
print()
print('\n size:', t.shape, sep='\n')

tensor([[0, 1],
        [2, 3],
        [4, 5],
        [6, 7],
        [8, 9]])


 size:
torch.Size([5, 2])


In [16]:
t = t.view(-1,5)
print(t)
print()
print('\n size:', t.shape, sep='\n')

tensor([[0, 1, 2, 3, 4],
        [5, 6, 7, 8, 9]])


 size:
torch.Size([2, 5])


# Changing the datatype of Tensors

When creating tensor we can pass the dtype as an argument. We can also change the datatype of tensors using to() and type() methods. For a list of dtypes visit https://pytorch.org/docs/stable/tensor_attributes.html#torch.torch.dtype

In [17]:
x = torch.tensor([8,9,-3], dtype = torch.int)
print(f"Current tensor: {x}")
print()

# We can use type() method or to() method to change the datatype
print(f"Old: {x.dtype}")

# change the datatype to int64 using type() method
x = x.type(dtype=torch.int64)
print(f"New (using type() method): {x.dtype}")

# change the datatype to int32 using to() method
x = x.to(dtype = torch.int32)
print(f"Newer (using to()method): {x.dtype}")

Current tensor: tensor([ 8,  9, -3], dtype=torch.int32)

Old: torch.int32
New (using type() method): torch.int64
Newer (using to()method): torch.int32


# Saving Memory - inplace operations

*In-place operator are operations that change the content of a given Tensor without making a copy.*

- Operations that have `a_` suffix are in-place. For example: `.add_()`. Operations like += or *= are also in-place operations.

We can also perform in-place operation using the notation `Z[:] = <expression>`

As in-place operations do not make a copy, they can save memory. However, we need to use them carefully. They can be **problematic when computing derivatives** because of an immediate loss of history.



In [18]:
a = torch.tensor(10)
print(a)
print(id(a))

a+=1
print(a)
print(id(a))
a=a+1
print(a)
print(id(a))

tensor(10)
139629107648112
tensor(11)
139629107648112
tensor(12)
139629107648912


In [19]:
b = torch.tensor(10)
print(b)
print(id(b))
b.add_(1)
print(b)
print(id(b))
b = b.add(1)
print(b)
print(id(b))

tensor(10)
139629107650112
tensor(11)
139629107650112
tensor(12)
139629107650272


# Checking GPU

In [20]:
# Check if GPU is available

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(device)

cuda:0


In [21]:
# create a tensor

X = torch.tensor([1,2,3,4])
X

tensor([1, 2, 3, 4])

In [22]:
# check the device attribute of the tensor
X.device

device(type='cpu')

In [23]:
# move device attribute to GPU
X.to(device=device)

tensor([1, 2, 3, 4], device='cuda:0')

In [24]:
# it is more efficient to create the tensor on GPU directly
Y = torch.tensor([1,2,3,4], device=device)
Y

tensor([1, 2, 3, 4], device='cuda:0')

In [25]:
# check the device attribute of the tensor
Y.device

device(type='cuda', index=0)

## Memory allocation of in-place operations

In [26]:
# create tensor
t1 = torch.randn(10000, 10000, device = "cpu") #10000, 10000: the tensor will have a shape of 10000x10000, meaning it will be a 2D tensor with 10000 rows and 10000 columns
print(f" t1: {t1}")
print(f" t1 length: {len(t1)}")
print(f" t1 shape: {t1.shape}")
print()

# move tensor to GPU
t1 = t1.to(device)
print(t1.device)

# We can use id() function to get memory location of tensor
print(f"initial memory location of tensor t1 is  {id(t1)}")

x=t1
print(f"initial memory location of x is : {id(x)}")

# Waits for everything to finish running
torch.cuda.synchronize()

# initial memory allocated
start_memory = torch.cuda.memory_allocated()

# inplace operation
t1+=0.1
t1.add_(0.1)

# since the operation was inplace when we update t1 it will update x as well
print(x==t1)

print(f" Final memory location of tensor t1 is: {id(t1)}")
print(f" final location of x is :{id(x)}")

# Total memory allocated after function call
end_memory = torch.cuda.memory_allocated()

print(f"Total memory allocated: {end_memory - start_memory} bytes")

# Memory allocated because of function call
memory_allocated = end_memory - start_memory
print(memory_allocated / 1024**2)

 t1: tensor([[ 1.2293,  1.1145, -0.1093,  ...,  1.2478,  2.0459,  0.9633],
        [ 0.4214,  0.4207, -0.5832,  ..., -0.1143, -0.9691,  1.3124],
        [-1.5134,  1.9943, -0.6634,  ..., -0.7071,  1.1182, -0.2489],
        ...,
        [-1.1518, -0.5882, -0.5358,  ...,  0.6244, -0.5343,  1.0199],
        [ 0.6029,  0.3120,  0.1966,  ..., -0.8433,  0.2969,  0.1788],
        [-1.3558, -0.7624, -0.7661,  ...,  0.2257,  0.5125, -0.8336]])
 t1 length: 10000
 t1 shape: torch.Size([10000, 10000])

cuda:0
initial memory location of tensor t1 is  139629107591136
initial memory location of x is : 139629107591136
tensor([[True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        ...,
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True]], device='cuda:0')
 Final memory location of tensor t1 is: 1396291

- From the above example we can see that both x and t1 has same memory location.
- when we use in-place operation on t1,it also updates x

## Memory allocation for out-of-place operations

In [27]:
# create tensor
t2 = torch.randn(10000, 10000, device = "cpu")

# move tensor to gpu
t2 = t2.to(device=device)
print(t2.device)

# We can use id() function to get memory location of tensor
print(f"initial memory location of tensor t2 {id(t2)}")

y=t2
print(f"final memory location of y is: {id(y)}")

# Waits for everything to finish running
torch.cuda.synchronize()

# Initial memory allocated
start_memory = torch.cuda.memory_allocated()

# out-of-place operations
t2= t2+0.1

# since the operations was not inplace when we update t2, it will not update y
print(y==t2)

# We can use id() function to get memory location of tensor
print(f"Final memory location of tensor t2 is: {id(t2)}")
print(f"Final memory location of tensor y is: {id(y)}")

# Total memory allocated after function call
end_memory = torch.cuda.memory_allocated()

# Memory allocated because of function call
memory_allocated = end_memory - start_memory
print(memory_allocated / 1024**2)

cuda:0
initial memory location of tensor t2 139629107652832
final memory location of y is: 139629107652832
tensor([[False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        ...,
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False]], device='cuda:0')
Final memory location of tensor t2 is: 139629107652352
Final memory location of tensor y is: 139629107652832
382.0


**Observations from `inplace` and 'out_place' operators**
- From the above example we can see that initially both y and t2 has same memory location. After running t2+=0.2, we found that id(t2) points to a different location. This is because Python first evaluates t2+0.2, then allocates new memory for the results.

- Since, we have not done in-place operations, updating t2 does not affect y. **y** still points to the same memory location.

## Linear Algebra

### Dot Product

- *Dot product of 2 vectors x and y is given by the summation of product of elements at the same position*

For example:
If we have 2 vectors X: [1,2,3,4] and Y:[1,1,2,1]. Then, X.Y will be 1*1 + 2*1 + 3*2 + 4*1 = 13