In [1]:
import torch

In [2]:
print(torch.__version__)

2.9.0+cpu


In [3]:
if torch.cuda.is_available():
  print("GPU is available!")
  print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
  print("GPU is not available. Using CPU!")

GPU is not available. Using CPU!


# **Creating a Tensor**

In [5]:
#using empty
a = torch.empty(2,3)  #This will allocate ((2,3)(matrix)) size memory. Here, the value you can see this is already
a                     # existed value in the allocated memory, empty method doesn't initialize any value.

tensor([[1.5732e-17, 0.0000e+00, 0.0000e+00],
        [1.7111e-05, 0.0000e+00, 1.4301e-02]])

In [6]:
#check type
type(a) # to check data type. Usually torch create tensor type data.

torch.Tensor

In [7]:
#using zeros
torch.zeros(2,3) # This will create a (2,3) size matrix with all elements initialize value zero.

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [8]:
#using ones
torch.ones(2,3) # This will create a (2,3) size matrix with all elements initialize value one.

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [9]:
#using rand
torch.rand(2,3) # This will create a (2,3) size matrix where all elements initialize with random values range between 0 to 1.

tensor([[0.5936, 0.4397, 0.7343],
        [0.4403, 0.0510, 0.7561]])

In [11]:
# manual seed
torch.manual_seed(100) # manual seed allow to reproduce the same random values again again while kept the same seed value, here 100.
torch.rand(2,3)        # or you can say for each seed value there is a fixed random value, so by keeping the same seed value you will get the same rand value .

tensor([[0.1117, 0.8158, 0.2626],
        [0.4839, 0.6765, 0.7539]])

In [12]:
# using tensor
torch.tensor([[1,2,3],[4,5,6]]) # allow you to create custom tensor that you put.

tensor([[1, 2, 3],
        [4, 5, 6]])

In [14]:
# other ways
# arange
print("using arange ->", torch.arange(0,10,2)) # This will create a tensor from 0 to 9 with step 2. [0,2,4,6,8]

#using linspace -> linearly spaced
print("using linspace ->", torch.linspace(0,10,10)) # This will create a tensor of 10 evenly spaced values from 0 to 10.

#using eye -> identity matrix
print("using eye ->", torch.eye(5)) # this will create 5x5 indentity matrix (diagonal values 1 and other values 0)

#using full
print("using full ->", torch.full((3,3),5)) # this will create a 3x3 matrix with all elements value 5.

using arange -> tensor([0, 2, 4, 6, 8])
using linspace -> tensor([ 0.0000,  1.1111,  2.2222,  3.3333,  4.4444,  5.5556,  6.6667,  7.7778,
         8.8889, 10.0000])
using eye -> tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])
using full -> tensor([[5, 5, 5],
        [5, 5, 5],
        [5, 5, 5]])


# **Tensor Shapes**

In [3]:
x = torch.tensor([[1,2,3],[4,5,6]])
x

tensor([[1, 2, 3],
        [4, 5, 6]])

In [4]:
x.shape

torch.Size([2, 3])

Creating new tensor of the shape of tensor"x"

In [5]:
torch.empty_like(x)

tensor([[    139184676339088,           882007568,                   0],
        [                  0,                   0, 7310593858020254331]])

In [6]:
torch.zeros_like(x)

tensor([[0, 0, 0],
        [0, 0, 0]])

In [7]:
torch.ones_like(x)

tensor([[1, 1, 1],
        [1, 1, 1]])

In [12]:
torch.rand_like(x, dtype=torch.float32)

tensor([[0.6910, 0.0536, 0.8456],
        [0.4565, 0.2430, 0.8212]])

# **Tensor Data Types**

In [8]:
# find data type
x.dtype

torch.int64

In [9]:
# assign data type
torch.tensor([1.0,2.0,3.0], dtype=torch.int32)

tensor([1, 2, 3], dtype=torch.int32)

In [10]:
torch.tensor([1,2,3], dtype=torch.float32)

tensor([1., 2., 3.])

In [11]:
#using to()
x.to(torch.float32)

tensor([[1., 2., 3.],
        [4., 5., 6.]])

| **Data Type**             | **Dtype**         | **Description**                                                                                                                                                                |
|---------------------------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **32-bit Floating Point** | `torch.float32`   | Standard floating-point type used for most deep learning tasks. Provides a balance between precision and memory usage.                                                         |
| **64-bit Floating Point** | `torch.float64`   | Double-precision floating point. Useful for high-precision numerical tasks but uses more memory.                                                                               |
| **16-bit Floating Point** | `torch.float16`   | Half-precision floating point. Commonly used in mixed-precision training to reduce memory and computational overhead on modern GPUs.                                            |
| **BFloat16**              | `torch.bfloat16`  | Brain floating-point format with reduced precision compared to `float16`. Used in mixed-precision training, especially on TPUs.                                                |
| **8-bit Floating Point**  | `torch.float8`    | Ultra-low-precision floating point. Used for experimental applications and extreme memory-constrained environments (less common).                                               |
| **8-bit Integer**         | `torch.int8`      | 8-bit signed integer. Used for quantized models to save memory and computation in inference.                                                                                   |
| **16-bit Integer**        | `torch.int16`     | 16-bit signed integer. Useful for special numerical tasks requiring intermediate precision.                                                                                    |
| **32-bit Integer**        | `torch.int32`     | Standard signed integer type. Commonly used for indexing and general-purpose numerical tasks.                                                                                  |
| **64-bit Integer**        | `torch.int64`     | Long integer type. Often used for large indexing arrays or for tasks involving large numbers.                                                                                  |
| **8-bit Unsigned Integer**| `torch.uint8`     | 8-bit unsigned integer. Commonly used for image data (e.g., pixel values between 0 and 255).                                                                                    |
| **Boolean**               | `torch.bool`      | Boolean type, stores `True` or `False` values. Often used for masks in logical operations.                                                                                      |
| **Complex 64**            | `torch.complex64` | Complex number type with 32-bit real and 32-bit imaginary parts. Used for scientific and signal processing tasks.                                                               |
| **Complex 128**           | `torch.complex128`| Complex number type with 64-bit real and 64-bit imaginary parts. Offers higher precision but uses more memory.                                                                 |
| **Quantized Integer**     | `torch.qint8`     | Quantized signed 8-bit integer. Used in quantized models for efficient inference.                                                                                              |
| **Quantized Unsigned Integer** | `torch.quint8` | Quantized unsigned 8-bit integer. Often used for quantized tensors in image-related tasks.                                                                                     |


# **Mathematical Operation**

1. Scalar Operation

In [2]:
x = torch.rand(2,2)
x

tensor([[0.3614, 0.2499],
        [0.0580, 0.3294]])

In [3]:
# like numpy tensor also do broadcasting while a mathematical operation is done with a scalar.
# addition
print("Addition : ", x+2)

# substraction
print("Substraction : ", x-2)

# multiplication
print("Multiplication : ", x*3)

# division
print("Division : ", x/3)

# int division
print("integer division : ", (x*100) //3)

# mod
print("Mod : ", ((x*100) //3)%5)

# power
print("Power : ", x**2)

Addition :  tensor([[2.3614, 2.2499],
        [2.0580, 2.3294]])
Substraction :  tensor([[-1.6386, -1.7501],
        [-1.9420, -1.6706]])
Multiplication :  tensor([[1.0841, 0.7497],
        [0.1739, 0.9883]])
Division :  tensor([[0.1205, 0.0833],
        [0.0193, 0.1098]])
integer division :  tensor([[12.,  8.],
        [ 1., 10.]])
Mod :  tensor([[2., 3.],
        [1., 0.]])
Power :  tensor([[0.1306, 0.0624],
        [0.0034, 0.1085]])


## **Element wise Operation**

In [5]:
a = torch.rand(2,3)
b = torch.rand(2,3)

print(a)
print(b)

tensor([[0.2026, 0.5897, 0.8430],
        [0.9282, 0.2288, 0.6748]])
tensor([[0.2013, 0.8299, 0.6208],
        [0.6216, 0.0426, 0.5848]])


In [6]:
# element wise(element by element) operation occurs
# add
print(a+b)

#substract
print(a-b)

#multiplication
print(a*b)

#division
print(a/b)

#power
print(a ** b)

#mod
print(a%b)

tensor([[0.4039, 1.4196, 1.4638],
        [1.5499, 0.2714, 1.2596]])
tensor([[ 0.0014, -0.2402,  0.2222],
        [ 0.3066,  0.1862,  0.0900]])
tensor([[0.0408, 0.4894, 0.5233],
        [0.5770, 0.0097, 0.3946]])
tensor([[1.0068, 0.7106, 1.3579],
        [1.4932, 5.3684, 1.1540]])
tensor([[0.7252, 0.6451, 0.8994],
        [0.9548, 0.9391, 0.7945]])
tensor([[0.0014, 0.5897, 0.2222],
        [0.3066, 0.0157, 0.0900]])


In [8]:
c = torch.tensor([1, -2, -3, 4])
c

tensor([ 1, -2, -3,  4])

In [9]:
# abs (absolute)
print(torch.abs(c))

tensor([1, 2, 3, 4])


In [10]:
# neg (negative -> opposite the value, means positive becomes negative and negative becomes positive)
print(torch.neg(c))

tensor([-1,  2,  3, -4])


In [12]:
d = torch.tensor([1.9, 2.4, 3.5, 4.7])
d

tensor([1.9000, 2.4000, 3.5000, 4.7000])

In [17]:
# round
torch.round(d)

tensor([2., 2., 4., 5.])

In [18]:
# ceil
torch.ceil(d)

tensor([2., 3., 4., 5.])

In [19]:
# floor
torch.floor(d)

tensor([1., 2., 3., 4.])

In [20]:
# clamp
torch.clamp(d, min=2, max=3) # this will the elements of d in between 2 to 3. (less than 2 becomes 2 and greater than 3 becomes 3)

tensor([2.0000, 2.4000, 3.0000, 3.0000])

# **Reduction Operation**

In [39]:
e = torch.randint(size=(2,3), low=0, high=10, dtype=torch.float32)
e

tensor([[1., 6., 0.],
        [8., 5., 4.]])

In [38]:
# sum
print(torch.sum(e)) # will calculate total sum of the matrix e.

# sum along columns
print(torch.sum(e, dim=0)) # will calculate sum of each columns

#  sum along rows
print(torch.sum(e, dim=1)) # will calculate sum of each rows

tensor(15.)
tensor([8., 4., 3.])
tensor([14.,  1.])


In [28]:
# mean
print(torch.mean(e)) # will calculate mean of the matrix e

# mean along with columns
print(torch.mean(e, dim=0)) # will calculate mean of each columns

#mean along with rows
print(torch.mean(e, dim=1)) # will calculate mean of each rows

tensor(4.1667)
tensor([2.0000, 5.0000, 5.5000])
tensor([4.0000, 4.3333])


In [29]:
# max and min
print(torch.max(e))
print(torch.min(e)) # can be done along with columns or rows by using dim=0 or 1.

tensor(9.)
tensor(1.)


In [30]:
# product
torch.prod(e) # calculate product of the matrix (multiplication)

tensor(648.)

In [31]:
# standard deviation
print(torch.std(e))

tensor(3.7639)


In [32]:
# variance
torch.var(e)

tensor(14.1667)

In [40]:
#argmanx
print(torch.argmax(e)) # will return the max element index(position)

tensor(3)


In [41]:
#argmin
print(torch.argmin(e)) # will return the min element index(position)

tensor(2)


# **Matrix Operation**

In [44]:
f = torch.randint(size=(2,3), low=0, high=10)
g = torch.randint(size=(3,2), low=0, high=10)

print(f)
print(g)

tensor([[9, 7, 0],
        [7, 6, 7]])
tensor([[0, 6],
        [1, 2],
        [6, 0]])


In [47]:
# matrix multiplication
print(torch.matmul(f,g))

tensor([[ 7, 68],
        [48, 54]])


In [48]:
vector1 = torch.tensor([1,2])
vector2 = torch.tensor([3,4])

#dot product
print(torch.dot(vector1, vector2)) #do multiplication between two vectors of same size(1D matrix)(1*3+2*4)

tensor(11)


In [49]:
# transpose
print(f)
print(torch.transpose(f, 0, 1)) # here 0 and 1 are dimention(dim), so 0, 1 means asked to transpose column into row

tensor([[9, 7, 0],
        [7, 6, 7]])
tensor([[9, 7],
        [7, 6],
        [0, 7]])


In [50]:
h = torch.randint(size=(3,3), low=0, high=10, dtype=torch.float32)
h

tensor([[9., 4., 0.],
        [3., 8., 1.],
        [5., 5., 0.]])

In [51]:
# determinant
torch.det(h)

tensor(-25.)

In [52]:
# inverse
torch.inverse(h)

tensor([[ 0.2000,  0.0000, -0.1600],
        [-0.2000,  0.0000,  0.3600],
        [ 1.0000,  1.0000, -2.4000]])

# **Comparison Operations**

In [53]:
i = torch.randint(size=(2,3), low=0, high=10)
j = torch.randint(size=(2,3), low=0, high=10)

print(i)
print(j)

tensor([[1, 9, 4],
        [5, 8, 8]])
tensor([[8, 3, 8],
        [5, 6, 3]])


In [54]:
# greater than
print(i>j)

# less than
print(i<j)

# equal to
print(i == j)

# not equal to
print(i != j)

# greater than equal to
print(i>=j)

# less than equal to
print(i<=j)

tensor([[False,  True, False],
        [False,  True,  True]])
tensor([[ True, False,  True],
        [False, False, False]])
tensor([[False, False, False],
        [ True, False, False]])
tensor([[ True,  True,  True],
        [False,  True,  True]])
tensor([[False,  True, False],
        [ True,  True,  True]])
tensor([[ True, False,  True],
        [ True, False, False]])


# **Special Operations**

In [61]:
k = torch.randint(size=(2,3), low=0, high=10, dtype=torch.float32)
print(k)

tensor([[4., 3., 3.],
        [8., 4., 6.]])


In [56]:
# log
torch.log(k)

tensor([[1.9459, 1.3863, 2.1972],
        [1.3863, 0.0000, 1.9459]])

In [57]:
# exp
torch.exp(k)

tensor([[1.0966e+03, 5.4598e+01, 8.1031e+03],
        [5.4598e+01, 2.7183e+00, 1.0966e+03]])

In [58]:
# sqrt
torch.sqrt(k)

tensor([[2.6458, 2.0000, 3.0000],
        [2.0000, 1.0000, 2.6458]])

In [59]:
# sigmoid
torch.sigmoid(k)

tensor([[0.9991, 0.9820, 0.9999],
        [0.9820, 0.7311, 0.9991]])

In [62]:
# softmax
torch.softmax(k, dim=0)

tensor([[0.0180, 0.2689, 0.0474],
        [0.9820, 0.7311, 0.9526]])

In [63]:
# relu
torch.relu(k)

tensor([[4., 3., 3.],
        [8., 4., 6.]])

# **Inplace Operations**

In [2]:
m = torch.rand(2,3)
n = torch.rand(2,3)

print(m)
print(n)

tensor([[0.3302, 0.7940, 0.1624],
        [0.9480, 0.1212, 0.3660]])
tensor([[0.5641, 0.1058, 0.4826],
        [0.3479, 0.3411, 0.4181]])


In [3]:
m + n # add m and n, create new tensor with m + n

tensor([[0.8942, 0.8998, 0.6450],
        [1.2959, 0.4622, 0.7841]])

In [4]:
m.add_(n) # add m and n, inplace m + n value into the tensor m

tensor([[0.8942, 0.8998, 0.6450],
        [1.2959, 0.4622, 0.7841]])

In [5]:
m.neg_()

tensor([[-0.8942, -0.8998, -0.6450],
        [-1.2959, -0.4622, -0.7841]])

In [6]:
m.relu_()

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [7]:
m

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [None]:
# so any operation name with "underscore _ " at the end means it will inplace after the end of the operation.

# **Copying a Tensor**

In [8]:
a = torch.rand(2,3)
a

tensor([[0.3632, 0.6797, 0.8484],
        [0.2224, 0.5467, 0.3152]])

In [9]:
b = a.clone()

In [10]:
a

tensor([[0.3632, 0.6797, 0.8484],
        [0.2224, 0.5467, 0.3152]])

In [11]:
b

tensor([[0.3632, 0.6797, 0.8484],
        [0.2224, 0.5467, 0.3152]])

# **Tensor Operation on GPU**

In [2]:
torch.cuda.is_available()

True

In [3]:
device = torch.device('cuda') # used to store gpu device in a variable which is here "device"

In [4]:
# creating new tensor on GPU
torch.rand(size=(2,3), device=device)

tensor([[0.2686, 0.9324, 0.6145],
        [0.3798, 0.2300, 0.4673]], device='cuda:0')

In [8]:
# moving an existing tensor to GPU
a = torch.rand(2,3)
print(a) # this tensor "a" is created on CPU

b = a.to(device=device) # moving tensor "a" to the GPU tensor "b"
print(b)

tensor([[0.9990, 0.5603, 0.5210],
        [0.5666, 0.2272, 0.2989]])


In [9]:
# after moving to GPU any operation with that tensor will happen in the GPU
b + 5

tensor([[5.9990, 5.5603, 5.5210],
        [5.5666, 5.2272, 5.2989]], device='cuda:0')

Checking operation runtime CPU vs GPU

In [10]:
import time

# Define size of the matrices
size = 10000 #Large size for perfomance comparison

# create random matrices on CPU
matrix_cpu1 = torch.randn(size, size)
matrix_cpu2 = torch.randn(size, size)

# measure time on cpu
start_time = time.time()
result_cpu = torch.matmul(matrix_cpu1, matrix_cpu2) # matrix multiplication on CPU
cpu_time = time.time() - start_time

print(f"Time on CPU: {cpu_time: .4f} seconds")

# matrix to GPU
matrix_gpu1 = matrix_cpu1.to('cuda')
matrix_gpu2 = matrix_cpu2.to('cuda')

# measure time on gpu
start_time = time.time()
result_gpu = torch.matmul(matrix_gpu1, matrix_gpu2) # matrix multiplication on GPU
torch.cuda.synchronize() # ensure all gpu operations are complete
gpu_time = time.time() - start_time

print(f"Time on GPU: {gpu_time: .4f} seconds")

# compare results
print("\nspeedup (CPU time / GPU time): ", cpu_time / gpu_time)

Time on CPU:  17.5516 seconds
Time on GPU:  0.7753 seconds

speedup (CPU time / GPU time):  22.63900882349685


# **Reshaping Tensors**

In [11]:
a = torch.ones(4,4)
a

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [12]:
# reshape
a.reshape(2, 2, 2, 2)

tensor([[[[1., 1.],
          [1., 1.]],

         [[1., 1.],
          [1., 1.]]],


        [[[1., 1.],
          [1., 1.]],

         [[1., 1.],
          [1., 1.]]]])

In [13]:
# flatten
a.flatten()  # convert multidimentional tensor to a one dimentional tensor.

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [14]:
b = torch.rand(2, 3, 4)
b

tensor([[[0.8317, 0.1003, 0.7068, 0.6803],
         [0.5209, 0.9755, 0.9537, 0.9495],
         [0.5528, 0.6635, 0.6174, 0.8003]],

        [[0.6286, 0.3325, 0.0711, 0.0250],
         [0.3711, 0.5075, 0.6207, 0.9919],
         [0.4277, 0.8171, 0.9320, 0.5171]]])

In [19]:
# permute
print(b.shape)

print(b.permute(2, 0, 1).shape) # so permute allow to change shape by moving one shape to another in the existing shape (here 2, 0, 1 is the idx no of existing shape)
print(b.permute(1, 0, 2).shape) # so permute allow to change shape by moving one shape to another in the existing shape (here 2, 0, 1 is the idx no of existing shape)
print(b.permute(1, 2, 0).shape) # so permute allow to change shape by moving one shape to another in the existing shape (here 2, 0, 1 is the idx no of existing shape)

torch.Size([2, 3, 4])
torch.Size([4, 2, 3])
torch.Size([3, 2, 4])
torch.Size([3, 4, 2])


In [18]:
# unsqueeze
# image size
c= torch.rand(226, 226, 3)

print(c.unsqueeze(0).shape) #this will add a new dimention in index 0 (position).
print(c.unsqueeze(1).shape) #this will add a new dimention in index 1 (position).
print(c.unsqueeze(2).shape) #this will add a new dimention in index 2 (position).

torch.Size([1, 226, 226, 3])
torch.Size([226, 1, 226, 3])
torch.Size([226, 226, 1, 3])


In [20]:
# Squeeze
d = torch.rand(1,20)
print(d)

d.squeeze(0) # remove dimention from the mentioned index (here it is index 0)

tensor([[0.1932, 0.8297, 0.5261, 0.9758, 0.9457, 0.8077, 0.0998, 0.4752, 0.9937,
         0.6420, 0.1235, 0.8967, 0.2348, 0.2690, 0.6394, 0.4413, 0.7800, 0.5255,
         0.6422, 0.2179]])


tensor([0.1932, 0.8297, 0.5261, 0.9758, 0.9457, 0.8077, 0.0998, 0.4752, 0.9937,
        0.6420, 0.1235, 0.8967, 0.2348, 0.2690, 0.6394, 0.4413, 0.7800, 0.5255,
        0.6422, 0.2179])

# **NumPy and PyTorch**

In [21]:
import numpy as np

In [22]:
a = torch.tensor([1,2,3])
a

tensor([1, 2, 3])

In [23]:
# convert tensor to numpy array
b = a.numpy()
b

array([1, 2, 3])

In [24]:
type(b)

numpy.ndarray

In [25]:
c = np.array([1,2,3])
c

array([1, 2, 3])

In [26]:
# convert numpy array to tensor
torch.from_numpy(c)

tensor([1, 2, 3])