In [5]:
import torch
print(torch.__version__)

2.5.1+cu124


In [6]:
if torch.cuda.is_available():
    print("CUDA is available")
    print("Number of CUDA devices:", torch.cuda.device_count())
    print("Device name:", torch.cuda.get_device_name(0))
else:
    print("CUDA is not available")

CUDA is available
Number of CUDA devices: 1
Device name: Tesla T4


### Creating a Tensor

In [6]:
# using empty
a = torch.empty(2, 3)
# Does not assign values shows the values already existing in that memory space

In [8]:
# check type
type(a)

torch.Tensor

In [10]:
# using zeros
torch.zeros(2, 3)

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [11]:
# using ones
torch.ones(2, 3)

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [12]:
# using rand
torch.rand(2, 3)

tensor([[0.7072, 0.2618, 0.8588],
        [0.5660, 0.1724, 0.1038]])

In [13]:
# manual seed
torch.manual_seed(42)
torch.rand(2, 3)

tensor([[0.8823, 0.9150, 0.3829],
        [0.9593, 0.3904, 0.6009]])

In [14]:
# using tensor
torch.tensor([[1, 2, 3], [4, 5, 6]])

tensor([[1, 2, 3],
        [4, 5, 6]])

In [15]:
# Other ways

# using arange
print("Using arange -> ", torch.arange(0, 10, 2))

# using linspace (Linearly spaced)
print("Using linspace -> ", torch.linspace(0, 10, 10))

# using eye (Identity matrix)
print("Using eye -> ", torch.eye(5))

# using full
print("Using full -> ", torch.full((3, 3), 5))

Using arange ->  tensor([0, 2, 4, 6, 8])
Using linspace ->  tensor([ 0.0000,  1.1111,  2.2222,  3.3333,  4.4444,  5.5556,  6.6667,  7.7778,
         8.8889, 10.0000])
Using eye ->  tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])
Using full ->  tensor([[5, 5, 5],
        [5, 5, 5],
        [5, 5, 5]])


### Tensor Shape

In [18]:
x = torch.tensor([[1, 2, 3], [4, 5, 6]])
x

tensor([[1, 2, 3],
        [4, 5, 6]])

In [19]:
x.shape

torch.Size([2, 3])

In [20]:
# If you want to create tensors of simailar shape of a tensor then use empty_like, zeros_like, ones_like
torch.empty_like(x)

tensor([[0, 0, 0],
        [0, 1, 0]])

In [21]:
torch.zeros_like(x)

tensor([[0, 0, 0],
        [0, 0, 0]])

In [22]:
torch.ones_like(x)

tensor([[1, 1, 1],
        [1, 1, 1]])

In [27]:
# torch.rand_like(x) -> This will give an error because x has integers but rand_like will generate floats
torch.rand_like(x, dtype=torch.float32)

tensor([[0.2566, 0.7936, 0.9408],
        [0.1332, 0.9346, 0.5936]])

### Tensor datatypes

In [23]:
# find datatype
x.dtype

torch.int64

In [24]:
# assign data type
torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.float32)

tensor([[1., 2., 3.],
        [4., 5., 6.]])

In [26]:
# using to()
print(x)
x.to(torch.float32)

tensor([[1, 2, 3],
        [4, 5, 6]])


tensor([[1., 2., 3.],
        [4., 5., 6.]])

| **Data Type**             | **Dtype**         | **Description**                                                                                                                                                                |
|---------------------------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **32-bit Floating Point** | `torch.float32`   | Standard floating-point type used for most deep learning tasks. Provides a balance between precision and memory usage.                                                         |
| **64-bit Floating Point** | `torch.float64`   | Double-precision floating point. Useful for high-precision numerical tasks but uses more memory.                                                                               |
| **16-bit Floating Point** | `torch.float16`   | Half-precision floating point. Commonly used in mixed-precision training to reduce memory and computational overhead on modern GPUs.                                            |
| **BFloat16**              | `torch.bfloat16`  | Brain floating-point format with reduced precision compared to `float16`. Used in mixed-precision training, especially on TPUs.                                                |
| **8-bit Floating Point**  | `torch.float8`    | Ultra-low-precision floating point. Used for experimental applications and extreme memory-constrained environments (less common).                                               |
| **8-bit Integer**         | `torch.int8`      | 8-bit signed integer. Used for quantized models to save memory and computation in inference.                                                                                   |
| **16-bit Integer**        | `torch.int16`     | 16-bit signed integer. Useful for special numerical tasks requiring intermediate precision.                                                                                    |
| **32-bit Integer**        | `torch.int32`     | Standard signed integer type. Commonly used for indexing and general-purpose numerical tasks.                                                                                  |
| **64-bit Integer**        | `torch.int64`     | Long integer type. Often used for large indexing arrays or for tasks involving large numbers.                                                                                  |
| **8-bit Unsigned Integer**| `torch.uint8`     | 8-bit unsigned integer. Commonly used for image data (e.g., pixel values between 0 and 255).                                                                                    |
| **Boolean**               | `torch.bool`      | Boolean type, stores `True` or `False` values. Often used for masks in logical operations.                                                                                      |
| **Complex 64**            | `torch.complex64` | Complex number type with 32-bit real and 32-bit imaginary parts. Used for scientific and signal processing tasks.                                                               |
| **Complex 128**           | `torch.complex128`| Complex number type with 64-bit real and 64-bit imaginary parts. Offers higher precision but uses more memory.                                                                 |
| **Quantized Integer**     | `torch.qint8`     | Quantized signed 8-bit integer. Used in quantized models for efficient inference.                                                                                              |
| **Quantized Unsigned Integer** | `torch.quint8` | Quantized unsigned 8-bit integer. Often used for quantized tensors in image-related tasks.                                                                                     |


### Mathematical Operations on Tensors

#### 1. Scalar Operation

In [28]:
x = torch.rand(2, 2)
x

tensor([[0.8694, 0.5677],
        [0.7411, 0.4294]])

In [29]:
# addition
print(x + 2)

# subtraction
print(x - 2)

# mutliplication
print(x * 2)

# division
print(x / 2)

# int division
print((x * 100)//3)

# mod
print(x % 2)

# exponentiation
print(x ** 2)

tensor([[2.8694, 2.5677],
        [2.7411, 2.4294]])
tensor([[-1.1306, -1.4323],
        [-1.2589, -1.5706]])
tensor([[1.7388, 1.1354],
        [1.4822, 0.8588]])
tensor([[0.4347, 0.2839],
        [0.3705, 0.2147]])
tensor([[28., 18.],
        [24., 14.]])
tensor([[0.8694, 0.5677],
        [0.7411, 0.4294]])
tensor([[0.7559, 0.3223],
        [0.5492, 0.1844]])


#### 2. Element Wise Operation

In [30]:
a = torch.rand(2, 3)
b = torch.rand(2, 3)

print(a)
print(b)

tensor([[0.8854, 0.5739, 0.2666],
        [0.6274, 0.2696, 0.4414]])
tensor([[0.2969, 0.8317, 0.1053],
        [0.2695, 0.3588, 0.1994]])


In [31]:
# add
a + b

tensor([[1.1824, 1.4056, 0.3719],
        [0.8969, 0.6284, 0.6407]])

In [32]:
# subtract
a - b

tensor([[ 0.5885, -0.2578,  0.1613],
        [ 0.3580, -0.0892,  0.2420]])

In [33]:
# multiply
a * b

tensor([[0.2629, 0.4773, 0.0281],
        [0.1691, 0.0967, 0.0880]])

In [34]:
# division
a / b

tensor([[2.9821, 0.6900, 2.5313],
        [2.3282, 0.7515, 2.2139]])

In [35]:
# mod
a % b

tensor([[0.2916, 0.5739, 0.0560],
        [0.0885, 0.2696, 0.0426]])

In [36]:
c = torch.tensor([1, -2, 3, -4])

In [37]:
# abs
torch.abs(c)

tensor([1, 2, 3, 4])

In [38]:
# negative
torch.neg(c)

tensor([-1,  2, -3,  4])

In [39]:
d = torch.tensor([1.2, 1.5, 1.8])
d

tensor([1.2000, 1.5000, 1.8000])

In [40]:
# round
torch.round(d)

tensor([1., 2., 2.])

In [41]:
# ceil
torch.ceil(d)

tensor([2., 2., 2.])

In [42]:
# floor
torch.floor(d)

tensor([1., 1., 1.])

In [43]:
# clamp -> you can keep the tensors elements in a range
torch.clamp(d, min=1.1, max=1.7)

tensor([1.2000, 1.5000, 1.7000])

#### 3. Reduction Operations

In [47]:
e = torch.randint(size=(2,3), low=0, high=10, dtype=torch.float32)
f = torch.randint(size=(2,3), low=0, high=10, dtype=torch.float32)

print(e)
print(f)

tensor([[3., 4., 9.],
        [6., 2., 0.]])
tensor([[6., 2., 7.],
        [9., 7., 3.]])


In [48]:
# sum
print(torch.sum(e))

# sum along columns
print(torch.sum(e, dim=0))

# sum along rows
print(torch.sum(e, dim=1))

tensor(24.)
tensor([9., 6., 9.])
tensor([16.,  8.])


In [49]:
# mean
print(torch.mean(e))

# mean along columns
print(torch.mean(e, dim=0))

# mean along rows
print(torch.mean(e, dim=1))

tensor(4.)
tensor([4.5000, 3.0000, 4.5000])
tensor([5.3333, 2.6667])


In [50]:
# median
print(torch.median(e))

# median along columns
print(torch.median(e, dim=0))

# median along rows
print(torch.median(e, dim=1))

tensor(3.)
torch.return_types.median(
values=tensor([3., 2., 0.]),
indices=tensor([0, 1, 1]))
torch.return_types.median(
values=tensor([4., 2.]),
indices=tensor([1, 1]))


In [51]:
# max and min
print(torch.max(e))
print(torch.min(e))

tensor(9.)
tensor(0.)


In [53]:
# product
print(torch.prod(e))

tensor(0.)


In [54]:
# standard deviation
print(torch.std(e))

tensor(3.1623)


In [55]:
# variance
torch.var(e)

tensor(10.)

In [56]:
# argmax -> Position of max item
torch.argmax(e)

tensor(2)

In [57]:
# argmin -> Position of min item
torch.argmin(e)

tensor(5)

#### 4. Matrix Operation

In [58]:
g = torch.randint(size=(2,3), low=0, high=10)
h = torch.randint(size=(3,2), low=0, high=10)

print(g)
print(h)

tensor([[3, 4, 3],
        [7, 0, 9]])
tensor([[0, 9],
        [6, 9],
        [5, 4]])


In [59]:
# matrix multiplication
torch.matmul(g, h)

tensor([[39, 75],
        [45, 99]])

In [60]:
vector1 = torch.tensor([1, 2, 3])
vector2 = torch.tensor([4, 5, 6])

# dot product
torch.dot(vector1, vector2)

tensor(32)

In [62]:
# transpose
torch.transpose(f, 0, 1)

tensor([[6., 9.],
        [2., 7.],
        [7., 3.]])

In [65]:
# determinant
torch.det(torch.matmul(g, h).to(torch.float32))

tensor(485.9998)

In [67]:
# inverse
torch.inverse(torch.matmul(g, h).to(torch.float32))

tensor([[ 0.2037, -0.1543],
        [-0.0926,  0.0802]])

#### 5. Comparison Operations

In [68]:
i = torch.randint(size=(2, 3), low=0, high=10)
j = torch.randint(size=(2, 3), low=0, high=10)

print(i)
print(j)

tensor([[8, 8, 6],
        [0, 0, 0]])
tensor([[0, 1, 3],
        [0, 1, 1]])


In [69]:
# greater than
print(j > j)

# less than
print(i < j)

# equal to
print(i == j)

# not equal to
print(i != j)

# greater than equal to
print(i >= j)

# less than equal to
print(i <= j)

tensor([[False, False, False],
        [False, False, False]])
tensor([[False, False, False],
        [False,  True,  True]])
tensor([[False, False, False],
        [ True, False, False]])
tensor([[ True,  True,  True],
        [False,  True,  True]])
tensor([[ True,  True,  True],
        [ True, False, False]])
tensor([[False, False, False],
        [ True,  True,  True]])


#### 6. Special Functions

In [70]:
k = torch.randint(size=(2, 3), low=0, high=10)

print(k)

tensor([[7, 9, 4],
        [3, 8, 9]])


In [71]:
# log
torch.log(k)

tensor([[1.9459, 2.1972, 1.3863],
        [1.0986, 2.0794, 2.1972]])

In [72]:
# exp
torch.exp(k)

tensor([[1096.6332, 8103.0840,   54.5981],
        [  20.0855, 2980.9580, 8103.0840]])

In [73]:
# sqrt
torch.sqrt(k)

tensor([[2.6458, 3.0000, 2.0000],
        [1.7321, 2.8284, 3.0000]])

In [74]:
# sigmoid
torch.sigmoid(k)

tensor([[0.9991, 0.9999, 0.9820],
        [0.9526, 0.9997, 0.9999]])

In [77]:
# softmax
k = torch.randint(size=(2, 3), low=0, high=10, dtype=torch.float32)

print(k)
torch.softmax(k, dim=0)

tensor([[6., 3., 2.],
        [0., 9., 8.]])


tensor([[0.9975, 0.0025, 0.0025],
        [0.0025, 0.9975, 0.9975]])

In [78]:
# relu
torch.relu(k)

tensor([[6., 3., 2.],
        [0., 9., 8.]])

### Inplace Operations

In [79]:
m = torch.rand(2, 3)
n = torch.rand(2, 3)

print(m)
print(n)

tensor([[0.2477, 0.6524, 0.6057],
        [0.3725, 0.7980, 0.8399]])
tensor([[0.1374, 0.2331, 0.9578],
        [0.3313, 0.3227, 0.0162]])


In [80]:
# Sometimes there are very big tensors and you don't want a new tensor to be created so we use inplace
m.add_(n) # This is inplace operation
print(m)
print(n)

tensor([[0.3851, 0.8855, 1.5635],
        [0.7038, 1.1208, 0.8561]])
tensor([[0.1374, 0.2331, 0.9578],
        [0.3313, 0.3227, 0.0162]])


In [81]:
m.relu_() # This is inplace operation
# When ever you want to perform inplace operation then write it after the object and use _ which signify inplace operation

tensor([[0.3851, 0.8855, 1.5635],
        [0.7038, 1.1208, 0.8561]])

### Copying a Tensor

In [82]:
a = torch.rand(2, 3)
print(a)
b = a
print(b)

# In this case if you make changes in a then the changes will be reflected in b which is not desirable

tensor([[0.2137, 0.6249, 0.4340],
        [0.1371, 0.5117, 0.1585]])
tensor([[0.2137, 0.6249, 0.4340],
        [0.1371, 0.5117, 0.1585]])


In [83]:
b = a.clone() # This will create a completely independent copy

### Tensor Operations on GPU

In [7]:
torch.cuda.is_available()

True

In [8]:
device = torch.device('cuda')

In [9]:
# To run your tensor operations on GPU either you have to create the tensors on gpu itself or you have to move the exisiting tensors on cpu on the gpu

# Creating a new tensor on GPU
torch.rand((2, 3), device=device)

tensor([[0.4220, 0.5361, 0.6003],
        [0.6543, 0.0068, 0.6750]], device='cuda:0')

In [14]:
# Moving an existing tensor to GPU
a = torch.rand(2, 3) # this tensor is been created on cpu
print(a)

b = a.to(device)
print(b)

tensor([[0.9050, 0.4191, 0.9823],
        [0.2971, 0.7622, 0.9573]])
tensor([[0.9050, 0.4191, 0.9823],
        [0.2971, 0.7622, 0.9573]], device='cuda:0')


In [16]:
b + 5 # This operations is performed on gpu

tensor([[5.9050, 5.4191, 5.9823],
        [5.2971, 5.7622, 5.9573]], device='cuda:0')

### Comparing CPU and GPU tensor operations speed

In [18]:
import time

size = 10000

matrix_cpu1 = torch.rand(size, size)
matrix_cpu2 = torch.rand(size, size)

start_time1 = time.time()
result_cpu = torch.matmul(matrix_cpu1, matrix_cpu2)
end_time1 = time.time()
print("CPU time:", end_time1 - start_time1)

matrix_gpu1 = matrix_cpu1.to(device)
matrix_gpu2 = matrix_cpu2.to(device)

start_time2 = time.time()
result_gpu = torch.matmul(matrix_gpu1, matrix_gpu2)
end_time2 = time.time()
print("GPU time:", end_time2 - start_time2)

CPU time: 18.118367910385132
GPU time: 0.041734933853149414


In [20]:
# speed up
print("Speed up:", (end_time1 - start_time1) / (end_time2 - start_time2))

Speed up: 434.12954658409933


### Reshaping Tensors

In [21]:
a = torch.ones(4, 4)
a

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [22]:
# reshape -> Multiplication of size should be same
a.reshape(2, 2, 2, 2)

tensor([[[[1., 1.],
          [1., 1.]],

         [[1., 1.],
          [1., 1.]]],


        [[[1., 1.],
          [1., 1.]],

         [[1., 1.],
          [1., 1.]]]])

In [23]:
# flatten -> Gives 1D tensor
a.flatten()

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [24]:
b = torch.rand(2, 3, 4)
b

tensor([[[0.8224, 0.0033, 0.3980, 0.4494],
         [0.7140, 0.4645, 0.3907, 0.9580],
         [0.2661, 0.6172, 0.8202, 0.2697]],

        [[0.9109, 0.8416, 0.2678, 0.4606],
         [0.4440, 0.3801, 0.1722, 0.0331],
         [0.9729, 0.3197, 0.8083, 0.9337]]])

In [28]:
# permute
print(b.permute(2, 0, 1).shape)
b.permute(2, 0, 1)

torch.Size([4, 2, 3])


tensor([[[0.8224, 0.7140, 0.2661],
         [0.9109, 0.4440, 0.9729]],

        [[0.0033, 0.4645, 0.6172],
         [0.8416, 0.3801, 0.3197]],

        [[0.3980, 0.3907, 0.8202],
         [0.2678, 0.1722, 0.8083]],

        [[0.4494, 0.9580, 0.2697],
         [0.4606, 0.0331, 0.9337]]])

In [31]:
# unsqueeze -> You can add a dimension at any place you want
c = torch.rand(256, 256, 3)
print(c.unsqueeze(0).shape)
print(c.unsqueeze(1).shape)

torch.Size([1, 256, 256, 3])
torch.Size([256, 1, 256, 3])


In [32]:
# squeeze
d = torch.rand(1, 256, 256, 3)
print(d.shape)
print(d.squeeze(0).shape)

torch.Size([1, 256, 256, 3])
torch.Size([256, 256, 3])


### NumPy and PyTorch

In [33]:
import numpy as np

In [34]:
a = torch.tensor([1, 2, 3])
a

tensor([1, 2, 3])

In [35]:
b = a.numpy()
b

array([1, 2, 3])

In [36]:
type(b)

numpy.ndarray

In [37]:
c = np.array([1, 2, 3])
c

array([1, 2, 3])

In [38]:
# Converting a numpy array to pytorch tensor
torch.from_numpy(c)

tensor([1, 2, 3])