In [17]:
import torch
print(torch.__version__)

2.6.0+cu124


In [18]:
if torch.cuda.is_available():
    print("CUDA is available.")
    print("Number of CUDA devices:", torch.cuda.device_count())
    print("Device name:", torch.cuda.get_device_name(0))
else:
    print("CUDA is not available.")

CUDA is available.
Number of CUDA devices: 1
Device name: Tesla T4


## Creating Tensor

In [19]:
# using empty -> creates a memory for this size
x = torch.empty(2,3)
print(x)
type(x)

tensor([[3.5397e-16, 4.5832e-41, 6.1115e-32],
        [0.0000e+00, 0.0000e+00, 0.0000e+00]])


torch.Tensor

In [20]:
# using zeros - can be used to initialize biases
torch.zeros(2,3)

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [21]:
# using ones
torch.ones(2,3)

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [22]:
# using rand - random value between 0 & 1
torch.rand(2,3)

tensor([[0.1546, 0.3500, 0.8590],
        [0.8866, 0.3668, 0.5939]])

In [23]:
# manual seed
torch.manual_seed(1729) # will always have same rand values
torch.rand(2,3)

tensor([[0.3126, 0.3791, 0.3087],
        [0.0736, 0.4216, 0.0691]])

In [24]:
# using tensor
torch.tensor([[1,2,3],[4,5,6]])

tensor([[1, 2, 3],
        [4, 5, 6]])

In [25]:
# other ways
#arange
print(torch.arange(0,10,2))

# linspace
print(torch.linspace(0,10,10))

# using eye (Identity Matrix)
print(torch.eye(5))

tensor([0, 2, 4, 6, 8])
tensor([ 0.0000,  1.1111,  2.2222,  3.3333,  4.4444,  5.5556,  6.6667,  7.7778,
         8.8889, 10.0000])
tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])


## Tensor shape

In [26]:
x = torch.tensor([[1,2,3],[5,6,7]])
x

tensor([[1, 2, 3],
        [5, 6, 7]])

In [27]:
x.shape

torch.Size([2, 3])

In [28]:
torch.empty_like(x)

tensor([[    3078641164681456, -7487508346140878841, -9214075854466395896],
        [   99080296783605763, -1152534472197537552,    67555506255906304]])

In [29]:
torch.zeros_like(x)

tensor([[0, 0, 0],
        [0, 0, 0]])

In [30]:
torch.rand_like(x) # rand generates float between 0 to 1

RuntimeError: "check_uniform_bounds" not implemented for 'Long'

In [48]:
torch.rand_like(x, dtype=torch.float32)

tensor([[0.8703, 0.6888, 0.1889],
        [0.9039, 0.3524, 0.2360]])

## Tensor Data Type

In [49]:
x.dtype

torch.int64

In [50]:
# assign data type
torch.tensor([[1,2,3],[5,6.0,7.0]], dtype=torch.int)

tensor([[1, 2, 3],
        [5, 6, 7]], dtype=torch.int32)

In [51]:
torch.tensor([[1,2,3],[5,6,7]], dtype=torch.float64)

tensor([[1., 2., 3.],
        [5., 6., 7.]], dtype=torch.float64)

In [52]:
# using to()
x = x.to(torch.float32)

In [53]:
x.dtype

torch.float32

## Different Data types in Pytorch

| **Data Type**             | **Dtype**         | **Description**                                                                                                                                                                |
|---------------------------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **32-bit Floating Point** | `torch.float32`   | Standard floating-point type used for most deep learning tasks. Provides a balance between precision and memory usage.                                                         |
| **64-bit Floating Point** | `torch.float64`   | Double-precision floating point. Useful for high-precision numerical tasks but uses more memory.                                                                               |
| **16-bit Floating Point** | `torch.float16`   | Half-precision floating point. Commonly used in mixed-precision training to reduce memory and computational overhead on modern GPUs.                                            |
| **BFloat16**              | `torch.bfloat16`  | Brain floating-point format with reduced precision compared to `float16`. Used in mixed-precision training, especially on TPUs.                                                |
| **8-bit Floating Point**  | `torch.float8`    | Ultra-low-precision floating point. Used for experimental applications and extreme memory-constrained environments (less common).                                               |
| **8-bit Integer**         | `torch.int8`      | 8-bit signed integer. Used for quantized models to save memory and computation in inference.                                                                                   |
| **16-bit Integer**        | `torch.int16`     | 16-bit signed integer. Useful for special numerical tasks requiring intermediate precision.                                                                                    |
| **32-bit Integer**        | `torch.int32`     | Standard signed integer type. Commonly used for indexing and general-purpose numerical tasks.                                                                                  |
| **64-bit Integer**        | `torch.int64`     | Long integer type. Often used for large indexing arrays or for tasks involving large numbers.                                                                                  |
| **8-bit Unsigned Integer**| `torch.uint8`     | 8-bit unsigned integer. Commonly used for image data (e.g., pixel values between 0 and 255).                                                                                    |
| **Boolean**               | `torch.bool`      | Boolean type, stores `True` or `False` values. Often used for masks in logical operations.                                                                                      |
| **Complex 64**            | `torch.complex64` | Complex number type with 32-bit real and 32-bit imaginary parts. Used for scientific and signal processing tasks.                                                               |
| **Complex 128**           | `torch.complex128`| Complex number type with 64-bit real and 64-bit imaginary parts. Offers higher precision but uses more memory.                                                                 |
| **Quantized Integer**     | `torch.qint8`     | Quantized signed 8-bit integer. Used in quantized models for efficient inference.                                                                                              |
| **Quantized Unsigned Integer** | `torch.quint8` | Quantized unsigned 8-bit integer. Often used for quantized tensors in image-related tasks.                                                                                     |


## Mathematical Operations

In [54]:
## Scalar Operation
x=torch.rand(2,2)
x+2
x-2
x*2
x/2
(100*x) // 3
((100*x) // 3) % 2
x**2

tensor([[0.6301, 0.9295],
        [0.0721, 0.7458]])

In [55]:
## Element wise operation between two tensors
x=torch.rand(2,2)
y=torch.rand(2,2)
print(x)
print(y)
x+y
x-y
x*y
x/y

tensor([[0.6218, 0.2521],
        [0.2401, 0.0160]])
tensor([[0.3254, 0.0441],
        [0.7365, 0.5185]])


tensor([[1.9106, 5.7185],
        [0.3260, 0.0309]])

In [56]:
# round
x=torch.rand(2,2)
print(x)
print(x.round())

tensor([[0.7219, 0.4771],
        [0.2640, 0.0832]])
tensor([[1., 0.],
        [0., 0.]])


In [57]:
# ceil # floor # clamp -> to put the tensors in a particular range
x.clamp(2, 3)

tensor([[2., 2.],
        [2., 2.]])

## Reduction Operation

In [58]:
import torch
e = torch.randint(size = (2,3), low = 0, high = 10,dtype=torch.float32)
e

tensor([[3., 6., 4.],
        [8., 5., 3.]])

In [59]:
# sum
torch.sum(e)

# sum along colums
torch.sum(e, 0)

# sum along rows
torch.sum(e, 1)

tensor([13., 16.])

In [60]:
## mean
torch.mean(e)
## mean along columns
torch.mean(e, 0)
## mean along rows
torch.mean(e, 1)

tensor([4.3333, 5.3333])

In [61]:
# max and min
torch.max(e)
torch.min(e)

tensor(3.)

In [62]:
# product
torch.prod(e)

tensor(8640.)

In [63]:
# standard deviation
torch.std(e)

# variance
torch.var(e)

# argmax - Give the postion of the maximum number in the tensor
torch.argmax(e)

# argmin - Give the position of the minimum number in the tensor
torch.argmin(e)

tensor(0)

## Matrix Operation

In [64]:
a = torch.randint(size = (2,3), low = 0, high = 10,dtype=torch.float32)
b = torch.randint(size = (3,2), low = 0, high = 10,dtype=torch.float32)
print(a)
print(b)

tensor([[3., 5., 6.],
        [6., 3., 9.]])
tensor([[3., 5.],
        [4., 2.],
        [0., 8.]])


In [65]:
# matrix multiplication
torch.matmul(a,b)

tensor([[ 29.,  73.],
        [ 30., 108.]])

In [66]:
# dot product
vector1 = torch.rand(3)
vector2 = torch.rand(3)
print(vector1)
print(vector2)
torch.dot(vector1, vector2)

tensor([0.6341, 0.8873, 0.5026])
tensor([0.0250, 0.4842, 0.8439])


tensor(0.8696)

In [67]:
# transpose
torch.transpose(a, 0, 1)

tensor([[3., 6.],
        [5., 3.],
        [6., 9.]])

In [68]:
# determinant
p = torch.randint(size = (3,3), low = 0, high = 10,dtype=torch.float32)
print(p)
torch.det(p)

tensor([[7., 5., 4.],
        [3., 8., 8.],
        [0., 8., 5.]])


tensor(-147.)

In [69]:
# inverse
torch.inverse(p)

tensor([[ 0.1633, -0.0476, -0.0544],
        [ 0.1020, -0.2381,  0.2993],
        [-0.1633,  0.3810, -0.2789]])

## Comparison Operators
- Comparing 2 matrices


In [70]:
import torch
x = torch.randint(size = (2,3), low = 0, high = 10,dtype=torch.float32)
y = torch.randint(size = (2,3), low = 0, high = 10,dtype=torch.float32)
print(x)
print(y)

tensor([[5., 0., 2.],
        [1., 9., 3.]])
tensor([[0., 3., 3.],
        [6., 3., 4.]])


In [71]:
# greater than
x>y
x<y
x == y
x != y


tensor([[True, True, True],
        [True, True, True]])

## Special Functions

In [72]:
z = torch.randint(size= (2,3), low = 0, high = 10, dtype=torch.float32)
print(z)

tensor([[5., 4., 0.],
        [4., 9., 0.]])


In [73]:
# log
torch.log(z)

tensor([[1.6094, 1.3863,   -inf],
        [1.3863, 2.1972,   -inf]])

In [74]:
# exp
torch.exp(z)

tensor([[1.4841e+02, 5.4598e+01, 1.0000e+00],
        [5.4598e+01, 8.1031e+03, 1.0000e+00]])

In [75]:
# sqrt
torch.sqrt(z)

tensor([[2.2361, 2.0000, 0.0000],
        [2.0000, 3.0000, 0.0000]])

In [76]:
# sigmoid
torch.sigmoid(z)

tensor([[0.9933, 0.9820, 0.5000],
        [0.9820, 0.9999, 0.5000]])

In [77]:
# softmax - softmax expects the data to be in float
torch.softmax(z, dim=0)

tensor([[0.7311, 0.0067, 0.5000],
        [0.2689, 0.9933, 0.5000]])

In [78]:
# relu (-ve value to 0 , rest as it is)
torch.relu(z)

tensor([[5., 4., 0.],
        [4., 9., 0.]])

## Inplace Operators

In [79]:
import torch
m = torch.rand(2,3)
n = torch.rand(2,3)

print(m)
print(n)

tensor([[0.3902, 0.2752, 0.7893],
        [0.0590, 0.7548, 0.0293]])
tensor([[0.2749, 0.3410, 0.9311],
        [0.7347, 0.9818, 0.0898]])


In [80]:
m+n # will allocate memory to save this result but storing large tensors will be a problem

tensor([[0.6651, 0.6162, 1.7204],
        [0.7937, 1.7366, 0.1191]])

In [81]:
m.add_(n) # to make the permanent changes in the tensor instead of storing in a seperate memory

tensor([[0.6651, 0.6162, 1.7204],
        [0.7937, 1.7366, 0.1191]])

In [82]:
m

tensor([[0.6651, 0.6162, 1.7204],
        [0.7937, 1.7366, 0.1191]])

In [83]:
n.relu_()

tensor([[0.2749, 0.3410, 0.9311],
        [0.7347, 0.9818, 0.0898]])

In [84]:
n

tensor([[0.2749, 0.3410, 0.9311],
        [0.7347, 0.9818, 0.0898]])

## Copying a Tensor

In [85]:
a = torch.rand(2,3)
a

tensor([[0.0841, 0.8782, 0.6802],
        [0.0988, 0.9399, 0.4570]])

In [86]:
b = a
b

tensor([[0.0841, 0.8782, 0.6802],
        [0.0988, 0.9399, 0.4570]])

In [87]:
a[0][0] = 0
a

tensor([[0.0000, 0.8782, 0.6802],
        [0.0988, 0.9399, 0.4570]])

In [88]:
b # The changes done in a is reflected in b which is not desirable in some cases

tensor([[0.0000, 0.8782, 0.6802],
        [0.0988, 0.9399, 0.4570]])

In [89]:
print(id(a))
print(id(b))

140469953195056
140469953195056


In [90]:
# instead of using assignment operator we should use clone function
b = a.clone()
b

tensor([[0.0000, 0.8782, 0.6802],
        [0.0988, 0.9399, 0.4570]])

In [91]:
a[0][0] = 10
a

tensor([[10.0000,  0.8782,  0.6802],
        [ 0.0988,  0.9399,  0.4570]])

In [92]:
b

tensor([[0.0000, 0.8782, 0.6802],
        [0.0988, 0.9399, 0.4570]])

## Tensor Operation on GPU

In [93]:
import torch
torch.cuda.is_available()

True

In [94]:
device = torch.device("cuda")

In [95]:
# creating a new tensor on GPU
torch.rand((2,3),device=device)

tensor([[0.0024, 0.6778, 0.2441],
        [0.6812, 0.9678, 0.6866]], device='cuda:0')

## Comparing CPU and GPU performance difference

In [96]:
import torch
import time
size = 10000

# In CPU
mat1 = torch.randn(size,size)
mat2 = torch.randn(size,size)

# Measuring time
start_time = time.time()
result = torch.matmul(mat1,mat2)

cpu_time = time.time() - start_time
print("Time on CPU:",cpu_time)

# Moving the metrics to GPU
mat1_gpu = mat1.to('cuda')
mat2_gpu = mat2.to('cuda')

# Measuring time
start_time = time.time()
result = torch.matmul(mat1_gpu,mat2_gpu)

gpu_time = time.time() - start_time
print("Time on GPU:",gpu_time)


Time on CPU: 18.846004962921143
Time on GPU: 0.000255584716796875


## Reshaping Tensors

In [97]:
a = torch.ones(4,4)
a

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [98]:
# reshape - To reshape the product of original tensor must be same as the reshaped tensor
a.reshape(2,2,2,2)

tensor([[[[1., 1.],
          [1., 1.]],

         [[1., 1.],
          [1., 1.]]],


        [[[1., 1.],
          [1., 1.]],

         [[1., 1.],
          [1., 1.]]]])

In [99]:
# flatten - to make everything in one dimension
a.flatten()


tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [100]:
# permute
b = torch.rand(2,3,4)
b

tensor([[[0.9502, 0.8672, 0.1327, 0.8549],
         [0.6770, 0.5256, 0.7988, 0.3025],
         [0.8994, 0.6601, 0.3722, 0.8262]],

        [[0.9326, 0.0828, 0.4096, 0.0072],
         [0.7757, 0.9349, 0.6352, 0.9472],
         [0.3727, 0.8973, 0.3909, 0.0313]]])

In [101]:
b.permute(2,0,1)

tensor([[[0.9502, 0.6770, 0.8994],
         [0.9326, 0.7757, 0.3727]],

        [[0.8672, 0.5256, 0.6601],
         [0.0828, 0.9349, 0.8973]],

        [[0.1327, 0.7988, 0.3722],
         [0.4096, 0.6352, 0.3909]],

        [[0.8549, 0.3025, 0.8262],
         [0.0072, 0.9472, 0.0313]]])

In [102]:
b.permute(2,0,1).shape

torch.Size([4, 2, 3])

In [103]:
# unsqueeze - To change the dimension to batches to insert in model
c = torch.rand(226,226,3) # image size
c.unsqueeze(0).shape

torch.Size([1, 226, 226, 3])

In [104]:
c.unsqueeze(1).shape

torch.Size([226, 1, 226, 3])

In [105]:
c.unsqueeze(2).shape

torch.Size([226, 226, 1, 3])

In [107]:
# Squeeze - to reduce the dimension
p = torch.rand(1,20)
p.squeeze().shape

torch.Size([20])

## NumPy and PyTorch

In [108]:
import numpy as np

In [110]:
aa = torch.tensor([1,2,3])
# To convert to numpy array
b = aa.numpy()
type(b)

numpy.ndarray

In [111]:
dd = np.array([1,2,3])
# To convert to torch tensor
cc = torch.from_numpy(dd)
type(cc)

torch.Tensor