In [None]:
import torch
print(torch.__version__)

In [None]:
if torch.cuda.is_available():
    print("GPU is available! Using GPU:")
    print(torch.cuda.get_device_name(0))
    # print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    print("GPU not available. Using CPU.")

## Creating a Tensor

In [None]:
# using empty
a = torch.empty(2,3)
a
# a[0,1] # OR
# a[1][2]

In [None]:
# check type
type(a)

In [None]:
# using ones
one = torch.ones(1)
one

In [None]:
one.item()

In [None]:
# using zeros
torch.zeros(2,3)

In [None]:
torch.eye(3)

In [None]:
# using rand
torch.rand(size = (2,3))

In [None]:
# use of seed
torch.rand(2,3)

In [None]:
# manual_seed
torch.manual_seed(100)
torch.rand(2,3)

In [None]:
torch.manual_seed(100) # everytime use this
torch.rand(2,3)

In [None]:
# other ways

# arange
print("using arange ->", torch.arange(0,10,2))

# using linspace
print("using linspace ->", torch.linspace(0,10,10))

# using eye
print("using eye ->", torch.eye(n=5)) #m = no of cols

# using full
print("using full ->", torch.full(size=(3, 3), fill_value=5))

## Tensor Shapes

In [None]:
x = torch.tensor([[1,2,3],[4,5,6]])
x

In [None]:
x.shape

In [None]:
# same shape as `x`, values can be anything
torch.empty_like(input = x)

In [None]:
torch.zeros_like(x)

In [None]:
torch.ones_like(x)

In [None]:
# torch.rand_like(x) # must mention datatype
t1 = torch.rand_like(x, dtype=torch.float32)
t1

## Tensor Data Types

In [None]:
# find data type
x.dtype

In [None]:
# assign data type
torch.tensor([1.0,2.9,3.3], dtype=torch.int32)

In [None]:
torch.tensor([1,2,3], dtype=torch.float64)

In [None]:
# type conversion
x.to(torch.float32)
# x.float()

| **Data Type**             | **Dtype**         | **Description**                                                                                                                                                                |
|---------------------------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **32-bit Floating Point** | `torch.float32`   | Standard floating-point type used for most deep learning tasks. Provides a balance between precision and memory usage.                                                         |
| **64-bit Floating Point** | `torch.float64`   | Double-precision floating point. Useful for high-precision numerical tasks but uses more memory.                                                                               |
| **16-bit Floating Point** | `torch.float16`   | Half-precision floating point. Commonly used in mixed-precision training to reduce memory and computational overhead on modern GPUs.                                            |
| **BFloat16**              | `torch.bfloat16`  | Brain floating-point format with reduced precision compared to `float16`. Used in mixed-precision training, especially on TPUs.                                                |
| **8-bit Floating Point**  | `torch.float8`    | Ultra-low-precision floating point. Used for experimental applications and extreme memory-constrained environments (less common).                                               |
| **8-bit Integer**         | `torch.int8`      | 8-bit signed integer. Used for quantized models to save memory and computation in inference.                                                                                   |
| **16-bit Integer**        | `torch.int16`     | 16-bit signed integer. Useful for special numerical tasks requiring intermediate precision.                                                                                    |
| **32-bit Integer**        | `torch.int32`     | Standard signed integer type. Commonly used for indexing and general-purpose numerical tasks.                                                                                  |
| **64-bit Integer**        | `torch.int64`     | Long integer type. Often used for large indexing arrays or for tasks involving large numbers.                                                                                  |
| **8-bit Unsigned Integer**| `torch.uint8`     | 8-bit unsigned integer. Commonly used for image data (e.g., pixel values between 0 and 255).                                                                                    |
| **Boolean**               | `torch.bool`      | Boolean type, stores `True` or `False` values. Often used for masks in logical operations.                                                                                      |
| **Complex 64**            | `torch.complex64` | Complex number type with 32-bit real and 32-bit imaginary parts. Used for scientific and signal processing tasks.                                                               |
| **Complex 128**           | `torch.complex128`| Complex number type with 64-bit real and 64-bit imaginary parts. Offers higher precision but uses more memory.                                                                 |
| **Quantized Integer**     | `torch.qint8`     | Quantized signed 8-bit integer. Used in quantized models for efficient inference.                                                                                              |
| **Quantized Unsigned Integer** | `torch.quint8` | Quantized unsigned 8-bit integer. Often used for quantized tensors in image-related tasks.                                                                                     |


## Mathematical operations

### 1. Scalar operation

In [None]:
x = torch.rand(2,2)
x

In [None]:
# addition
x + 2
# substraction
x - 2
# multiplication
x * 3
# division
x / 3
# int division
(x * 100)//3
# mod
((x * 100)//3)%2
# power
x**2

### 2. Element wise operation

In [None]:
a = torch.rand(2,3)
b = torch.rand(2,3)

print(a)
print(b)

In [None]:
# add
a + b
# sub
a - b
# multiply
a * b  # OR, torch.mul(a, b)
# division
a / b
# power
a ** b
# mod
a % b

In [None]:
c = torch.tensor([1, -2, 3, -4])

In [None]:
# abs
torch.abs(c)

In [None]:
# negative
torch.neg(c)

In [None]:
d = torch.tensor([1.9, 2.3, 3.7, 4.4])
d

In [None]:
# round
torch.round(d)

In [None]:
d.round()

In [None]:
d

In [None]:
d.round_() # inplace modification

In [None]:
d

In [None]:
# ceil
torch.ceil(d) # Or d.ceil()

In [None]:
# floor
torch.floor(d)

In [None]:
# clamp
torch.clamp(d, min=2, max=4)

### 3. Reduction operation

N.B: dim=i → Add across values at the same position in axis i, then remove axis i

In new shape, That dim is eliminated.

In [None]:
e = torch.randint(size=(2,3), low=0, high=10, dtype=torch.float32)
e

In [None]:
print( torch.sum(e) )
# sum along columns
print( torch.sum(e, dim=0) ) # dim 0 (left most) is reduced ie catch 1 row, merging other rows into it, producing the output
# sum along rows
print( torch.sum(e, dim=1) )

In [None]:
# mean
# torch.mean(e)
# mean along col
# torch.mean(e, dim=0)
# mean along row
torch.mean(e, dim=1)

In [None]:
# median
torch.median(e) # OR e.median()

In [None]:
# max and min
torch.max(e)
torch.min(e)

In [None]:
# product
torch.prod(e)
# torch.prod(e, dim=1)

In [None]:
# standard deviation
torch.std(e)

In [None]:
# variance
torch.var(e)

In [None]:
e

In [None]:
# argmax - flatten and check
torch.argmax(e)

In [None]:
# argmin
torch.argmin(e)

In [None]:
# more complex reduction
x = torch.tensor([
  [[1, 2], [3, 4]],
  [[5, 6], [7, 8]]
])
print(torch.sum(x, dim=0))
print(torch.sum(x, dim=1))
print(torch.sum(x, dim=2))

### 4. Matrix operations

In [None]:
f = torch.randint(size=(2,3), low=0, high=10)
g = torch.randint(size=(3,2), low=0, high=10)

print(f)
print(g)

In [None]:
# matrix multiplcation
# torch.mm(f, g)       # 2-D Matrix multiplication
torch.matmul(f, g)     # n-D

In [None]:
# OR
# f @ g
f.matmul(g)

In [None]:
vector1 = torch.tensor([1, 2])
vector2 = torch.tensor([3, 4])

# dot product - 1D only, can also use torch.matmul or @
torch.dot(vector1, vector2)

In [None]:
# transpose
torch.transpose(input=f, dim0=0, dim1=1)

In [None]:
h = torch.randint(size=(3,3), low=0, high=10, dtype=torch.float32)
h

In [None]:
# determinant
torch.det(h)

In [None]:
# inverse
torch.inverse(h)

### 5. Comparison operations

In [None]:
i = torch.randint(size=(2,3), low=0, high=10)
j = torch.randint(size=(2,3), low=0, high=10)

print(i)
print(j)

In [None]:
# greater than
i > j
# less than
i < j
# equal to
i == j
# not equal to
i != j
# greater than equal to

# less than equal to

### 6. Special functions

In [None]:
k = torch.randint(size=(2,3), low=0, high=10, dtype=torch.float32)
k

In [None]:
# log
torch.log(k)

In [None]:
# exp
torch.exp(k)

In [None]:
# sqrt
torch.sqrt(k)

In [None]:
torch.sin(k) # OR
# k.sin()

In [None]:
# sigmoid
torch.sigmoid(k)

In [None]:
# softmax
torch.softmax(k, dim=0)

In [None]:
# relu
torch.relu(k) # OR
# k.relu()

## Inplace Operations

In [None]:
m = torch.rand(2,3)
n = torch.rand(2,3)

print(m)
print(n)

In [None]:
m.add_(n)

In [None]:
m

In [None]:
n

In [None]:
torch.relu(m)

In [None]:
m.relu_()

In [None]:
m

## Copying a Tensor

In [None]:
a = torch.rand(2,3)
a

In [None]:
b = a # here b is referencing memory of a
id(a) , id(b)

In [None]:
b = a.clone()

In [None]:
b.copy_(a)

In [None]:
id(a) == id(b)

In [None]:
a

In [None]:
b

In [None]:
a[0][0] = 10

In [None]:
a

In [None]:
b # b is not changed as it is a clone

# Tensor Operations on GPU

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 'cuda', 'cuda:0', 'cuda:1', 'cpu' etc

In [None]:
t1 = torch.rand(2,3)  #cpu

In [None]:
# create tensor on GPU
t2 = torch.rand(2,3, device=device) 
t2

In [None]:
t3 = t1.to(device)  # copy from cpu to gpu, not inplace
t1, t3

### Speed comparison

### **Comparison for PyTorch**

| Feature                     | GTX 1650 (Mobile)                   | Ryzen 5 3550H                       |
|-----------------------------|-------------------------------------|-------------------------------------|
| **Role**                    | GPU (accelerates tensor ops)        | CPU (general-purpose tasks)         |
| **PyTorch Usage**           | Training/inference acceleration     | Data pre-processing, small models   |
| **Tensor Computations**     | Faster, parallelized (CUDA cores)   | Slower, limited by fewer cores      |
| **Memory**                  | 4GB GDDR5 (VRAM, fast access)       | Uses system RAM (slower)            |
| **Parallelism**             | High (CUDA cores - 896)             | Moderate (4 cores, 8 threads)       |
| **Limitations**             | Limited VRAM for large models       | Not optimized for heavy tensor ops  |
| **Best Use Case**           | Training small/medium models        | Pre-processing, light inference     |

---

### Please Note:
- **GTX 1650 (Mobile)**: The better option for PyTorch workloads involving heavy tensor computations, such as training or inference on models that fit into 4GB VRAM.
- **Ryzen 5 3550H**: Adequate for CPU-bound tasks like pre-processing, model evaluation for smaller models, and managing data pipelines. If your dataset or model exceeds the GTX 1650's VRAM, consider techniques like **gradient checkpointing**, **model sharding**, or running on the CPU (although slower).

In [None]:
import time

size = 10000 # Large size for performance comparison

# Create random matrices on CPU

matrix_cpu1 = torch.randn(size, size)
matrix_cpu2 = torch.randn(size, size)

# Perform matrix multiplication
start_time = time.perf_counter()
result_cpu = torch.matmul(matrix_cpu1, matrix_cpu2) # "Matrix multiplication on CPU
cpu_time = time.perf_counter() - start_time

# Measure time on CPU
print(f"Time on CPU: {cpu_time:.4f} seconds")

# Check if GPU/CUDA is available
if torch.cuda.is_available():
    # Move matrices to GPU
    matrix_gpu1 = matrix_cpu1.to('cuda')
    matrix_gpu2 = matrix_cpu2.to('cuda')

    # Measure time on GPU
    start_time = time.perf_counter()
    result_gpu = torch.matmul(matrix_gpu1, matrix_gpu2)
    
    # Synchronize to ensure all GPU operations are complete and calculate time
    torch.cuda.synchronize()
    gpu_time = time.perf_counter() - start_time

    print(f"Time taken on GPU: {gpu_time:.4f} seconds")
    print(f'Performance gain: {round(cpu_time / gpu_time)}')
else:
    print("CUDA is not available. GPU computation cannot be performed.")

# Reshaping Tensors (not inplace)

In [None]:
a = torch.ones(4,6)
a

In [None]:
a.reshape(2,3,4) # first 2 blocks, then each block has 3 rows and 4 columns
# like flattening first, then reshaping

In [None]:
a.shape # shows in this format (dim0 , dim1, dim2, ... )

In [None]:
a.reshape(-1,3) 

In [None]:
a.reshape(1,-1) #same as (1,24)

In [None]:
# Flatten - 1D tensor
a.flatten()

In [None]:
b = torch.randint(low=1,high=100,size=(2,3,4))
b

In [None]:
b.permute(2,0,1)
# new shape of the tensor will be (4, 2, 3).

In [None]:
b.permute(2,0,1).shape

In [None]:
# Swap only 2 axes
y = b.transpose(dim0=1, dim1=2)  # flips row <-> colm
b, y

In [None]:
b.shape, y.shape

In [None]:
# Unsqueze -  extra dim (of 1) added in specified position
c = torch.rand(226,226,3)  # Typical image size
c.shape

In [None]:
c.unsqueeze(dim=0).shape

In [None]:
c.unsqueeze(dim=2).shape

In [None]:
# Squeeze -> removes extra dim (where its 1)
d = torch.rand(1,20,1,1,2)
d.shape

In [None]:
# d.squeeze(1) # No effect as 1st dimension is 20 ie !=1
d.squeeze().shape # by default, dim=None (removes all 1s)

In [None]:
d.squeeze(dim=2).shape  # fyi, it can be tuple

# Numpy <--> Tensor

In [None]:
import numpy as np
np.__version__

In [None]:
t = torch.tensor([5,4,42])  # 5 blocks of 4x42 matrix 
t , type(t)

In [None]:
arr = t.numpy()
arr , type(arr)

In [None]:
a = np.random.randint(1,100,(5,2))
a

In [None]:
ta = torch.from_numpy(a) 
ta

In [None]:
ta_np = ta.numpy()
print(ta_np)