In [5]:
import torch
print(torch.__version__)

2.5.1


In [4]:
if torch.cuda.is_available():
    print("GPU is available! Using GPU:")
    print(torch.cuda.get_device_name(0))
    # print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    print("GPU not available. Using CPU.")

GPU is available! Using GPU:
NVIDIA GeForce GTX 1650


## Creating a Tensor

In [44]:
# using empty
a = torch.empty(2,3)
a
# a[0,1] # OR
# a[1][2]

tensor([[3.9727e-23, 1.1308e-42, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00]])

In [45]:
# check type
type(a)

torch.Tensor

In [46]:
# using zeros
torch.zeros(2,3)

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [47]:
# using ones
torch.ones(2,3)

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [81]:
# using rand
torch.rand(size = (2,3))

tensor([[0.5277, 0.2472, 0.7909],
        [0.4235, 0.0169, 0.2209]])

In [62]:
# use of seed
torch.rand(2,3)

tensor([[0.4818, 0.8853, 0.5784],
        [0.6169, 0.3535, 0.2339]])

In [82]:
# manual_seed
torch.manual_seed(100)
torch.rand(2,3)

tensor([[0.1117, 0.8158, 0.2626],
        [0.4839, 0.6765, 0.7539]])

In [83]:
torch.manual_seed(100)
torch.rand(2,3)

tensor([[0.1117, 0.8158, 0.2626],
        [0.4839, 0.6765, 0.7539]])

In [84]:
# using tensor
torch.tensor([[1,2,3],[4,5,6]])

tensor([[1, 2, 3],
        [4, 5, 6]])

In [87]:
# other ways

# arange
print("using arange ->", torch.arange(0,10,2))

# using linspace
print("using linspace ->", torch.linspace(0,10,10))

# using eye
print("using eye ->", torch.eye(n=5)) #m = no of cols

# using full
print("using full ->", torch.full(size=(3, 3), fill_value=5))

using arange -> tensor([0, 2, 4, 6, 8])
using linspace -> tensor([ 0.0000,  1.1111,  2.2222,  3.3333,  4.4444,  5.5556,  6.6667,  7.7778,
         8.8889, 10.0000])
using eye -> tensor([[1., 0.],
        [0., 1.],
        [0., 0.],
        [0., 0.],
        [0., 0.]])
using full -> tensor([[5, 5, 5],
        [5, 5, 5],
        [5, 5, 5]])


## Tensor Shapes

In [88]:
x = torch.tensor([[1,2,3],[4,5,6]])
x

tensor([[1, 2, 3],
        [4, 5, 6]])

In [89]:
x.shape

torch.Size([2, 3])

In [92]:
# same shape as `x`, values can be anything
torch.empty_like(input = x)

tensor([[4579660422986072064, 4635705220378015972, 4661726013461248228],
        [4681742012764214613, 4692750812811507940,                   0]])

In [93]:
torch.zeros_like(x)

tensor([[0, 0, 0],
        [0, 0, 0]])

In [94]:
torch.ones_like(x)

tensor([[1, 1, 1],
        [1, 1, 1]])

In [98]:
# torch.rand_like(x) # must mention datatype
t1 = torch.rand_like(x, dtype=torch.float32)
t1

tensor([[0.4440, 0.9478, 0.7445],
        [0.4892, 0.2426, 0.7003]])

## Tensor Data Types

In [100]:
# find data type
x.dtype

torch.int64

In [103]:
# assign data type
torch.tensor([1.0,2.9,3.3], dtype=torch.int32)

tensor([1, 2, 3], dtype=torch.int32)

In [105]:
torch.tensor([1,2,3], dtype=torch.float64)

tensor([1., 2., 3.], dtype=torch.float64)

In [106]:
# using to()
x.to(torch.float32)

tensor([[1., 2., 3.],
        [4., 5., 6.]])

| **Data Type**             | **Dtype**         | **Description**                                                                                                                                                                |
|---------------------------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **32-bit Floating Point** | `torch.float32`   | Standard floating-point type used for most deep learning tasks. Provides a balance between precision and memory usage.                                                         |
| **64-bit Floating Point** | `torch.float64`   | Double-precision floating point. Useful for high-precision numerical tasks but uses more memory.                                                                               |
| **16-bit Floating Point** | `torch.float16`   | Half-precision floating point. Commonly used in mixed-precision training to reduce memory and computational overhead on modern GPUs.                                            |
| **BFloat16**              | `torch.bfloat16`  | Brain floating-point format with reduced precision compared to `float16`. Used in mixed-precision training, especially on TPUs.                                                |
| **8-bit Floating Point**  | `torch.float8`    | Ultra-low-precision floating point. Used for experimental applications and extreme memory-constrained environments (less common).                                               |
| **8-bit Integer**         | `torch.int8`      | 8-bit signed integer. Used for quantized models to save memory and computation in inference.                                                                                   |
| **16-bit Integer**        | `torch.int16`     | 16-bit signed integer. Useful for special numerical tasks requiring intermediate precision.                                                                                    |
| **32-bit Integer**        | `torch.int32`     | Standard signed integer type. Commonly used for indexing and general-purpose numerical tasks.                                                                                  |
| **64-bit Integer**        | `torch.int64`     | Long integer type. Often used for large indexing arrays or for tasks involving large numbers.                                                                                  |
| **8-bit Unsigned Integer**| `torch.uint8`     | 8-bit unsigned integer. Commonly used for image data (e.g., pixel values between 0 and 255).                                                                                    |
| **Boolean**               | `torch.bool`      | Boolean type, stores `True` or `False` values. Often used for masks in logical operations.                                                                                      |
| **Complex 64**            | `torch.complex64` | Complex number type with 32-bit real and 32-bit imaginary parts. Used for scientific and signal processing tasks.                                                               |
| **Complex 128**           | `torch.complex128`| Complex number type with 64-bit real and 64-bit imaginary parts. Offers higher precision but uses more memory.                                                                 |
| **Quantized Integer**     | `torch.qint8`     | Quantized signed 8-bit integer. Used in quantized models for efficient inference.                                                                                              |
| **Quantized Unsigned Integer** | `torch.quint8` | Quantized unsigned 8-bit integer. Often used for quantized tensors in image-related tasks.                                                                                     |


## Mathematical operations

### 1. Scalar operation

In [107]:
x = torch.rand(2,2)
x

tensor([[0.5277, 0.2472],
        [0.7909, 0.4235]])

In [108]:
# addition
x + 2
# substraction
x - 2
# multiplication
x * 3
# division
x / 3
# int division
(x * 100)//3
# mod
((x * 100)//3)%2
# power
x**2

tensor([[0.2785, 0.0611],
        [0.6255, 0.1793]])

### 2. Element wise operation

In [109]:
a = torch.rand(2,3)
b = torch.rand(2,3)

print(a)
print(b)

tensor([[0.0169, 0.2209, 0.9535],
        [0.7064, 0.1629, 0.8902]])
tensor([[0.5163, 0.0359, 0.6476],
        [0.3430, 0.3182, 0.5261]])


In [110]:
# add
a + b
# sub
a - b
# multiply
a * b
# division
a / b
# power
a ** b
# mod
a % b

tensor([[0.0169, 0.0056, 0.3059],
        [0.0204, 0.1629, 0.3641]])

In [111]:
c = torch.tensor([1, -2, 3, -4])

In [112]:
# abs
torch.abs(c)

tensor([1, 2, 3, 4])

In [113]:
# negative
torch.neg(c)

tensor([-1,  2, -3,  4])

In [122]:
d = torch.tensor([1.9, 2.3, 3.7, 4.4])
d

tensor([1.9000, 2.3000, 3.7000, 4.4000])

In [124]:
# round
torch.round(d)

tensor([2., 2., 4., 4.])

In [125]:
d.round()

tensor([2., 2., 4., 4.])

In [128]:
d.round_() # inplace modification

tensor([2., 2., 4., 4.])

In [129]:
d

tensor([2., 2., 4., 4.])

In [130]:
# ceil
torch.ceil(d) # Or d.ceil()

tensor([2., 2., 4., 4.])

In [131]:
# floor
torch.floor(d)

tensor([2., 2., 4., 4.])

In [133]:
# clamp
torch.clamp(d, min=2, max=4)

tensor([2., 2., 4., 4.])

### 3. Reduction operation

In [134]:
e = torch.randint(size=(2,3), low=0, high=10, dtype=torch.float32)
e

tensor([[5., 9., 8.],
        [9., 7., 9.]])

In [137]:
print( torch.sum(e) )
# sum along columns
print( torch.sum(e, dim=0) )
# sum along rows
print( torch.sum(e, dim=1) )

tensor(47.)
tensor([14., 16., 17.])
tensor([22., 25.])


In [140]:
# mean
# torch.mean(e)
# mean along col
# torch.mean(e, dim=0)
# mean along row
torch.mean(e, dim=1)

tensor([7.3333, 8.3333])

In [144]:
# median
torch.median(e) # OR e.median()

tensor(8.)

In [147]:
# max and min
torch.max(e)
torch.min(e)

tensor(5.)

In [150]:
# product
torch.prod(e)
# torch.prod(e, dim=1)

tensor([360., 567.])

In [151]:
# standard deviation
torch.std(e)

tensor(1.6021)

In [152]:
# variance
torch.var(e)

tensor(2.5667)

In [154]:
# argmax
torch.argmax(e)

tensor(1)

In [None]:
# argmin
torch.argmin(e)

### 4. Matrix operations

In [173]:
f = torch.randint(size=(2,3), low=0, high=10)
g = torch.randint(size=(3,2), low=0, high=10)

print(f)
print(g)

tensor([[6, 6, 8],
        [4, 9, 0]])
tensor([[7, 9],
        [2, 7],
        [0, 2]])


In [174]:
# matrix multiplcation
# torch.mm(f, g)       # 2-D Matrix multiplication
torch.matmul(f, g)     # n-D

tensor([[ 54, 112],
        [ 46,  99]])

In [175]:
# OR
f.matmul(g)

tensor([[ 54, 112],
        [ 46,  99]])

In [179]:
vector1 = torch.tensor([1, 2])
vector2 = torch.tensor([3, 4])

# dot product
torch.dot(vector1, vector2)

tensor(11)

In [184]:
# transpose
torch.transpose(input=f, dim0=0, dim1=1)

tensor([[6, 4],
        [6, 9],
        [8, 0]])

In [193]:
h = torch.randint(size=(3,3), low=0, high=10, dtype=torch.float32)
h

tensor([[0., 2., 2.],
        [3., 7., 1.],
        [5., 0., 2.]])

In [186]:
# determinant
torch.det(h)

tensor(-18.)

In [196]:
# inverse
torch.inverse(h)

tensor([[-0.1944,  0.0556,  0.1667],
        [ 0.0139,  0.1389, -0.0833],
        [ 0.4861, -0.1389,  0.0833]])

### 5. Comparison operations

In [198]:
i = torch.randint(size=(2,3), low=0, high=10)
j = torch.randint(size=(2,3), low=0, high=10)

print(i)
print(j)

tensor([[7, 8, 1],
        [2, 3, 2]])
tensor([[7, 9, 3],
        [9, 7, 7]])


In [199]:
# greater than
i > j
# less than
i < j
# equal to
i == j
# not equal to
i != j
# greater than equal to

# less than equal to

tensor([[False,  True,  True],
        [ True,  True,  True]])

### 6. Special functions

In [200]:
k = torch.randint(size=(2,3), low=0, high=10, dtype=torch.float32)
k

tensor([[5., 9., 2.],
        [2., 4., 8.]])

In [201]:
# log
torch.log(k)

tensor([[1.6094, 2.1972, 0.6931],
        [0.6931, 1.3863, 2.0794]])

In [202]:
# exp
torch.exp(k)

tensor([[1.4841e+02, 8.1031e+03, 7.3891e+00],
        [7.3891e+00, 5.4598e+01, 2.9810e+03]])

In [203]:
# sqrt
torch.sqrt(k)

tensor([[2.2361, 3.0000, 1.4142],
        [1.4142, 2.0000, 2.8284]])

In [204]:
# sigmoid
torch.sigmoid(k)

tensor([[0.9933, 0.9999, 0.8808],
        [0.8808, 0.9820, 0.9997]])

In [205]:
# softmax
torch.softmax(k, dim=0)

tensor([[0.9526, 0.9933, 0.0025],
        [0.0474, 0.0067, 0.9975]])

In [207]:
# relu
torch.relu(k) # OR
# k.relu()

tensor([[5., 9., 2.],
        [2., 4., 8.]])

## Inplace Operations

In [208]:
m = torch.rand(2,3)
n = torch.rand(2,3)

print(m)
print(n)

tensor([[0.3134, 0.2983, 0.3436],
        [0.2028, 0.9792, 0.4947]])
tensor([[0.3617, 0.9687, 0.0359],
        [0.3041, 0.9867, 0.1290]])


In [209]:
m.add_(n)

tensor([[0.6751, 1.2670, 0.3795],
        [0.5069, 1.9659, 0.6237]])

In [211]:
m

tensor([[0.6751, 1.2670, 0.3795],
        [0.5069, 1.9659, 0.6237]])

In [212]:
n

tensor([[0.3617, 0.9687, 0.0359],
        [0.3041, 0.9867, 0.1290]])

In [213]:
torch.relu(m)

tensor([[0.6751, 1.2670, 0.3795],
        [0.5069, 1.9659, 0.6237]])

In [214]:
m.relu_()

tensor([[0.6751, 1.2670, 0.3795],
        [0.5069, 1.9659, 0.6237]])

In [215]:
m

tensor([[0.6751, 1.2670, 0.3795],
        [0.5069, 1.9659, 0.6237]])

## Copying a Tensor

In [230]:
a = torch.rand(2,3)
a

tensor([[0.5772, 0.3771, 0.2440],
        [0.8994, 0.1041, 0.9193]])

In [217]:
b = a # here b is referencing memory of a

In [218]:
b

tensor([[0.6887, 0.1637, 0.0899],
        [0.3139, 0.1219, 0.3516]])

In [219]:
a[0][0] = 0

In [220]:
a

tensor([[0.0000, 0.1637, 0.0899],
        [0.3139, 0.1219, 0.3516]])

In [221]:
b

tensor([[0.0000, 0.1637, 0.0899],
        [0.3139, 0.1219, 0.3516]])

In [222]:
id(a)

2972417657824

In [223]:
id(b)

2972417657824

In [231]:
b = a.clone()

In [229]:
b.copy_(a)

tensor([[0.0000, 0.1637, 0.0899],
        [0.3139, 0.1219, 0.3516]])

In [232]:
id(a) == id(b)

False

In [225]:
a

tensor([[0.0000, 0.1637, 0.0899],
        [0.3139, 0.1219, 0.3516]])

In [233]:
b

tensor([[0.5772, 0.3771, 0.2440],
        [0.8994, 0.1041, 0.9193]])

In [234]:
a[0][0] = 10

In [235]:
a

tensor([[10.0000,  0.3771,  0.2440],
        [ 0.8994,  0.1041,  0.9193]])

In [236]:
b

tensor([[0.5772, 0.3771, 0.2440],
        [0.8994, 0.1041, 0.9193]])

In [237]:
id(a)

2972417661504

In [238]:
id(b)

2972417661584

# Tensor Operations on GPU

In [239]:
torch.cuda.is_available()

True

In [245]:
gpu = torch.device('cuda')
gpu

device(type='cuda')

In [247]:
t1 = torch.rand(2,3)  #cpu

In [248]:
# torch.rand((2,3), device=gpu) #same
t2 = torch.rand(2,3, device=gpu)
t2

tensor([[0.8344, 0.7001, 0.5361],
        [0.8883, 0.0220, 0.7938]], device='cuda:0')

In [249]:
t3 = t1.to(gpu)  #copy from cpu to gpu, not inplace
t1, t3

(tensor([[0.4964, 0.0589, 0.9840],
         [0.5836, 0.6737, 0.4738]]),
 tensor([[0.4964, 0.0589, 0.9840],
         [0.5836, 0.6737, 0.4738]], device='cuda:0'))

### Speed comparison

### **Comparison for PyTorch**

| Feature                     | GTX 1650 (Mobile)                   | Ryzen 5 3550H                       |
|-----------------------------|-------------------------------------|-------------------------------------|
| **Role**                    | GPU (accelerates tensor ops)        | CPU (general-purpose tasks)         |
| **PyTorch Usage**           | Training/inference acceleration     | Data pre-processing, small models   |
| **Tensor Computations**     | Faster, parallelized (CUDA cores)   | Slower, limited by fewer cores      |
| **Memory**                  | 4GB GDDR5 (VRAM, fast access)       | Uses system RAM (slower)            |
| **Parallelism**             | High (CUDA cores - 896)             | Moderate (4 cores, 8 threads)       |
| **Limitations**             | Limited VRAM for large models       | Not optimized for heavy tensor ops  |
| **Best Use Case**           | Training small/medium models        | Pre-processing, light inference     |

---

### Please Note:
- **GTX 1650 (Mobile)**: The better option for PyTorch workloads involving heavy tensor computations, such as training or inference on models that fit into 4GB VRAM.
- **Ryzen 5 3550H**: Adequate for CPU-bound tasks like pre-processing, model evaluation for smaller models, and managing data pipelines. If your dataset or model exceeds the GTX 1650's VRAM, consider techniques like **gradient checkpointing**, **model sharding**, or running on the CPU (although slower).

In [250]:
import time

size = 10000 # Large size for performance comparison

# Create random matrices on CPU

matrix_cpu1 = torch.randn(size, size)
matrix_cpu2 = torch.randn(size, size)

# Perform matrix multiplication
start_time = time.time()
result_cpu = torch.matmul(matrix_cpu1, matrix_cpu2) # "Matrix multiplication on CPU
cpu_time = time.time() - start_time

# Measure time on CPU
print(f"Time on CPU: {cpu_time:.4f} seconds")

# Check if GPU/CUDA is available
if torch.cuda.is_available():
    # Move matrices to GPU
    matrix_gpu1 = matrix_cpu1.to('cuda')
    matrix_gpu2 = matrix_cpu2.to('cuda')

    # Measure time on GPU
    start_time = time.time()
    result_gpu = torch.matmul(matrix_gpu1, matrix_gpu2)
    
    # Synchronize to ensure all GPU operations are complete and calculate time
    torch.cuda.synchronize()
    gpu_time = time.time() - start_time

    print(f"Time taken on GPU: {gpu_time:.4f} seconds")
    print(f'Performance gain: {round(cpu_time / gpu_time)}')
else:
    print("CUDA is not available. GPU computation cannot be performed.")

Time on CPU: 32.8641 seconds
Time taken on GPU: 6.4059 seconds
Performance gain: 5


# Reshaping Tensors (not inplace)

In [251]:
a = torch.ones(4,6)
a

tensor([[1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.]])

In [255]:
a.reshape(2,3,4) # think in opposit way like -> 4 items first, then spread similar 3 times,  then spread similar 2 times

tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]])

In [257]:
a.shape

torch.Size([4, 6])

In [258]:
a.reshape(-1,3)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

In [259]:
a.reshape(1,-1) #same as (1,24)

tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1.]])

In [260]:
# Flatten - 1D tensor
a.flatten()

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1.])

In [261]:
b = torch.randint(low=1,high=100,size=(2,3,4))
b

tensor([[[86, 34, 48,  2],
         [84,  6, 39, 61],
         [83, 26, 47, 79]],

        [[ 8, 44, 73, 18],
         [34,  1, 42, 83],
         [56, 53, 61, 17]]])

In [262]:
b.permute(2,0,1)

tensor([[[86, 84, 83],
         [ 8, 34, 56]],

        [[34,  6, 26],
         [44,  1, 53]],

        [[48, 39, 47],
         [73, 42, 61]],

        [[ 2, 61, 79],
         [18, 83, 17]]])

In [264]:
b.permute(2,0,1).shape

torch.Size([4, 2, 3])

In [278]:
# Swap only 2 axes
y = b.transpose(dim0=1, dim1=2).shape  # Swaps dimensions 1 and 2
y

torch.Size([2, 4, 3])

In [279]:
# 3D tensor with shape [2, 3, 4]
x = torch.randint(1,20, (2, 3, 4))

# Swap dimensions 1 and 2
y = torch.transpose(x, 1, 2) # torch.transpose(x, 0, 1) is equivalent to x.t()

print("Original :", x.shape)  # torch.Size([2, 3, 4])
print("Transposed :", y.shape)  # torch.Size([2, 4, 3])

Original : torch.Size([2, 3, 4])
Transposed : torch.Size([2, 4, 3])


In [286]:
# Unsqueze -  extra dim (of 1) added in specified position
c = torch.rand(226,226,3)  # Typical image size
c.shape

torch.Size([226, 226, 3])

In [304]:
c.unsqueeze(dim=0).shape

torch.Size([1, 226, 226, 3])

In [290]:
c.unsqueeze(dim=2).shape

torch.Size([226, 226, 1, 3])

In [298]:
# Squeeze -> removes extra dim (where its 1)
d = torch.rand(1,20,1,1,2)
d.shape

torch.Size([1, 20, 1, 1, 2])

In [300]:
# d.squeeze(1) # No effect as 1st dimension is 20 ie !=1
d.squeeze().shape # by default, dim=None (removes all 1s)

torch.Size([20, 2])

In [302]:
d.squeeze(dim=2).shape  # fyi, it can be tuple

torch.Size([1, 20, 2])

# Numpy <--> Tensor

In [305]:
import numpy as np

In [306]:
t = torch.tensor([5,4,42])
t , type(t)

(tensor([ 5,  4, 42]), torch.Tensor)

In [307]:
arr = t.numpy()
arr , type(arr)

(array([ 5,  4, 42], dtype=int64), numpy.ndarray)

In [308]:
a = np.random.randint(1,100,(5,2))
a

array([[51, 84],
       [88, 54],
       [97, 73],
       [55,  7],
       [33, 51]])

In [309]:
ta = torch.from_numpy(a) 
ta

tensor([[51, 84],
        [88, 54],
        [97, 73],
        [55,  7],
        [33, 51]], dtype=torch.int32)

In [310]:
ta_np = ta.numpy()
print(ta_np)

[[51 84]
 [88 54]
 [97 73]
 [55  7]
 [33 51]]
