In [1]:
import torch
print(torch.__version__)

2.8.0+cu126


In [2]:
if torch.cuda.is_available():
    print("GPU is available!")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    print("GPU not available. Using CPU.")

GPU is available!
Using GPU: Tesla T4


# Creating a Tensor

In [3]:
#empty
a = torch.empty(2,3)
print(a)
type(a)

tensor([[2.3822e-44, 0.0000e+00, 5.4992e-11],
        [0.0000e+00, 3.3631e-44, 0.0000e+00]])


torch.Tensor

In [4]:
torch.zeros(2,3)

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [5]:
torch.ones(2,3)

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [6]:
#random
torch.rand(2,3)

tensor([[0.2608, 0.1082, 0.3679],
        [0.6145, 0.4139, 0.4981]])

In [7]:
torch.rand(2,3)

tensor([[0.8282, 0.8681, 0.5463],
        [0.6939, 0.3963, 0.5759]])

In [8]:
#manual seed
torch.manual_seed(100)
torch.rand(2,3)

tensor([[0.1117, 0.8158, 0.2626],
        [0.4839, 0.6765, 0.7539]])

In [9]:
torch.manual_seed(100)
torch.rand(2,3)

tensor([[0.1117, 0.8158, 0.2626],
        [0.4839, 0.6765, 0.7539]])

In [10]:
torch.tensor([[1,5,2],[3,6,4]])

tensor([[1, 5, 2],
        [3, 6, 4]])

In [11]:
torch.arange(0,10)

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [12]:
torch.arange(0,10,2)

tensor([0, 2, 4, 6, 8])

In [13]:
torch.linspace(0,10,10)

tensor([ 0.0000,  1.1111,  2.2222,  3.3333,  4.4444,  5.5556,  6.6667,  7.7778,
         8.8889, 10.0000])

In [14]:
torch.eye(5)

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])

In [15]:
torch.full((3, 3), 5)

tensor([[5, 5, 5],
        [5, 5, 5],
        [5, 5, 5]])

# Tensor Shapes

In [16]:
x = torch.tensor([[1,2,3],[4,5,6]])
x

tensor([[1, 2, 3],
        [4, 5, 6]])

In [17]:
x.shape

torch.Size([2, 3])

In [18]:
torch.empty_like(x)

tensor([[134493568224656,       779600592,       779370704],
        [      779385360,               0,       742801504]])

In [19]:
torch.zeros_like(x)

tensor([[0, 0, 0],
        [0, 0, 0]])

In [20]:
torch.ones_like(x)

tensor([[1, 1, 1],
        [1, 1, 1]])

In [21]:
torch.rand_like(x, dtype=torch.float32)

tensor([[0.2627, 0.0428, 0.2080],
        [0.1180, 0.1217, 0.7356]])

# Tensor Data Types

In [22]:
x.dtype

torch.int64

In [23]:
torch.tensor([1.0,2.0,3.0], dtype=torch.int32)

tensor([1, 2, 3], dtype=torch.int32)

In [24]:
torch.tensor([1,2,3], dtype=torch.float64)

tensor([1., 2., 3.], dtype=torch.float64)

In [25]:
x.to(torch.int32)

tensor([[1, 2, 3],
        [4, 5, 6]], dtype=torch.int32)

In [26]:
x.to(torch.float32)

tensor([[1., 2., 3.],
        [4., 5., 6.]])

| **Data Type**             | **Dtype**         | **Description**                                                                                                                                                                |
|---------------------------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **32-bit Floating Point** | `torch.float32`   | Standard floating-point type used for most deep learning tasks. Provides a balance between precision and memory usage.                                                         |
| **64-bit Floating Point** | `torch.float64`   | Double-precision floating point. Useful for high-precision numerical tasks but uses more memory.                                                                               |
| **16-bit Floating Point** | `torch.float16`   | Half-precision floating point. Commonly used in mixed-precision training to reduce memory and computational overhead on modern GPUs.                                            |
| **BFloat16**              | `torch.bfloat16`  | Brain floating-point format with reduced precision compared to `float16`. Used in mixed-precision training, especially on TPUs.                                                |
| **8-bit Floating Point**  | `torch.float8`    | Ultra-low-precision floating point. Used for experimental applications and extreme memory-constrained environments (less common).                                               |
| **8-bit Integer**         | `torch.int8`      | 8-bit signed integer. Used for quantized models to save memory and computation in inference.                                                                                   |
| **16-bit Integer**        | `torch.int16`     | 16-bit signed integer. Useful for special numerical tasks requiring intermediate precision.                                                                                    |
| **32-bit Integer**        | `torch.int32`     | Standard signed integer type. Commonly used for indexing and general-purpose numerical tasks.                                                                                  |
| **64-bit Integer**        | `torch.int64`     | Long integer type. Often used for large indexing arrays or for tasks involving large numbers.                                                                                  |
| **8-bit Unsigned Integer**| `torch.uint8`     | 8-bit unsigned integer. Commonly used for image data (e.g., pixel values between 0 and 255).                                                                                    |
| **Boolean**               | `torch.bool`      | Boolean type, stores `True` or `False` values. Often used for masks in logical operations.                                                                                      |
| **Complex 64**            | `torch.complex64` | Complex number type with 32-bit real and 32-bit imaginary parts. Used for scientific and signal processing tasks.                                                               |
| **Complex 128**           | `torch.complex128`| Complex number type with 64-bit real and 64-bit imaginary parts. Offers higher precision but uses more memory.                                                                 |
| **Quantized Integer**     | `torch.qint8`     | Quantized signed 8-bit integer. Used in quantized models for efficient inference.                                                                                              |
| **Quantized Unsigned Integer** | `torch.quint8` | Quantized unsigned 8-bit integer. Often used for quantized tensors in image-related tasks.                                                                                     |


# Mathematical operations

## Scalar operation

In [27]:
x = torch.rand(2,2)
x

tensor([[0.7118, 0.7876],
        [0.4183, 0.9014]])

In [28]:
#addition
x + 2
#subtraction
x - 2
#multiplication
x * 3
#division
x / 3
#int division
(x * 100)//3
#mod
((x * 100)//3)%2
#power
x**2

tensor([[0.5066, 0.6203],
        [0.1750, 0.8125]])

## Element wise operation

In [29]:
a = torch.rand(2,3)
b = torch.rand(2,3)

print(a)
print(b)

tensor([[0.9969, 0.7565, 0.2239],
        [0.3023, 0.1784, 0.8238]])
tensor([[0.5557, 0.9770, 0.4440],
        [0.9478, 0.7445, 0.4892]])


In [30]:
#add
a + b
#sub
a - b
#multiply
a * b
#division
a / b
#power
a ** b
#mod
a % b

tensor([[0.4411, 0.7565, 0.2239],
        [0.3023, 0.1784, 0.3346]])

In [31]:
c = torch.tensor([1, -2, 3, -4])

In [32]:
torch.abs(c)

tensor([1, 2, 3, 4])

In [33]:
torch.neg(c)

tensor([-1,  2, -3,  4])

In [34]:
d = torch.tensor([1.9, 2.3, 3.7, 4.4])

In [35]:
torch.round(d)

tensor([2., 2., 4., 4.])

In [36]:
torch.ceil(d)

tensor([2., 3., 4., 5.])

In [37]:
torch.floor(d)

tensor([1., 2., 3., 4.])

In [38]:
torch.clamp(d, min=2, max=3)

tensor([2.0000, 2.3000, 3.0000, 3.0000])

## Reduction operation

In [39]:
e = torch.randint(size=(2,3), low=0, high=10, dtype=torch.float32)
e

tensor([[8., 0., 7.],
        [0., 0., 9.]])

In [40]:
# sum
torch.sum(e)

tensor(24.)

In [41]:
# sum along columns
torch.sum(e, dim=0)

tensor([ 8.,  0., 16.])

In [42]:
# sum along rows
torch.sum(e, dim=1)

tensor([15.,  9.])

In [43]:
# mean
torch.mean(e)

tensor(4.)

In [44]:
# mean along col
torch.mean(e, dim=0)

tensor([4., 0., 8.])

In [45]:
# median
torch.median(e)

tensor(0.)

In [46]:
torch.max(e)

tensor(9.)

In [47]:
torch.min(e)

tensor(0.)

In [48]:
# product
torch.prod(e)

tensor(0.)

In [49]:
# standard deviation
torch.std(e)

tensor(4.4272)

In [50]:
# variance
torch.var(e)

tensor(19.6000)

In [51]:
torch.argmax(e)

tensor(5)

In [52]:
torch.argmin(e)

tensor(1)

## Matrix operations

In [53]:
f = torch.randint(size=(2,3), low=0, high=10)
g = torch.randint(size=(3,2), low=0, high=10)

print(f)
print(g)

tensor([[5, 7, 3],
        [9, 4, 0]])
tensor([[5, 7],
        [5, 9],
        [9, 7]])


In [54]:
# matrix multiplcation
torch.matmul(f, g)

tensor([[ 87, 119],
        [ 65,  99]])

In [55]:
vector1 = torch.tensor([1, 2])
vector2 = torch.tensor([3, 4])

# dot product
torch.dot(vector1, vector2)

tensor(11)

In [56]:
# transpose
torch.transpose(f, 0, 1)

tensor([[5, 9],
        [7, 4],
        [3, 0]])

In [57]:
h = torch.randint(size=(3,3), low=0, high=10, dtype=torch.float32)
h

tensor([[5., 9., 8.],
        [9., 7., 9.],
        [2., 6., 7.]])

In [58]:
# determinant
torch.det(h)

tensor(-110.)

In [59]:
# inverse
torch.inverse(h)

tensor([[ 0.0455,  0.1364, -0.2273],
        [ 0.4091, -0.1727, -0.2455],
        [-0.3636,  0.1091,  0.4182]])

## Comparison operations

In [60]:
i = torch.randint(size=(2,3), low=0, high=10)
j = torch.randint(size=(2,3), low=0, high=10)

print(i)
print(j)

tensor([[7, 8, 3],
        [6, 1, 5]])
tensor([[5, 0, 4],
        [3, 8, 8]])


In [61]:
# greater than
i > j

tensor([[ True,  True, False],
        [ True, False, False]])

In [62]:
# less than
i < j

tensor([[False, False,  True],
        [False,  True,  True]])

In [63]:
# equal to
i == j

tensor([[False, False, False],
        [False, False, False]])

In [64]:
# not equal to
i != j

tensor([[True, True, True],
        [True, True, True]])

In [65]:
# greater than equal to
i >= j

tensor([[ True,  True, False],
        [ True, False, False]])

In [66]:
# less than equal to
i <= j

tensor([[False, False,  True],
        [False,  True,  True]])

## Special functions

In [67]:
k = torch.randint(size=(2,3), low=0, high=10, dtype=torch.float32)
k

tensor([[3., 3., 5.],
        [0., 6., 4.]])

In [68]:
# log
torch.log(k)

tensor([[1.0986, 1.0986, 1.6094],
        [  -inf, 1.7918, 1.3863]])

In [69]:
# exp
torch.exp(k)

tensor([[ 20.0855,  20.0855, 148.4132],
        [  1.0000, 403.4288,  54.5981]])

In [70]:
# sqrt
torch.sqrt(k)

tensor([[1.7321, 1.7321, 2.2361],
        [0.0000, 2.4495, 2.0000]])

In [71]:
# sigmoid
torch.sigmoid(k)

tensor([[0.9526, 0.9526, 0.9933],
        [0.5000, 0.9975, 0.9820]])

In [72]:
# softmax
torch.softmax(k, dim=0)

tensor([[0.9526, 0.0474, 0.7311],
        [0.0474, 0.9526, 0.2689]])

In [73]:
# relu
torch.relu(k)

tensor([[3., 3., 5.],
        [0., 6., 4.]])

# Inplace Operations

In [74]:
m = torch.rand(2,3)
n = torch.rand(2,3)

print(m)
print(n)

tensor([[0.6574, 0.3451, 0.0453],
        [0.9798, 0.5548, 0.6868]])
tensor([[0.4920, 0.0748, 0.9605],
        [0.3271, 0.0103, 0.9516]])


In [75]:
m.add_(n)

tensor([[1.1494, 0.4199, 1.0058],
        [1.3069, 0.5650, 1.6384]])

In [76]:
m

tensor([[1.1494, 0.4199, 1.0058],
        [1.3069, 0.5650, 1.6384]])

In [77]:
n

tensor([[0.4920, 0.0748, 0.9605],
        [0.3271, 0.0103, 0.9516]])

In [78]:
torch.relu(m)

tensor([[1.1494, 0.4199, 1.0058],
        [1.3069, 0.5650, 1.6384]])

In [79]:
m.relu_()

tensor([[1.1494, 0.4199, 1.0058],
        [1.3069, 0.5650, 1.6384]])

In [80]:
m

tensor([[1.1494, 0.4199, 1.0058],
        [1.3069, 0.5650, 1.6384]])

# Copying a Tensor

In [81]:
a = torch.rand(2,3)
a

tensor([[0.2855, 0.2324, 0.9141],
        [0.7668, 0.1659, 0.4393]])

In [82]:
b = a

In [83]:
b

tensor([[0.2855, 0.2324, 0.9141],
        [0.7668, 0.1659, 0.4393]])

In [84]:
a[0][0] = 0
a

tensor([[0.0000, 0.2324, 0.9141],
        [0.7668, 0.1659, 0.4393]])

In [85]:
b

tensor([[0.0000, 0.2324, 0.9141],
        [0.7668, 0.1659, 0.4393]])

In [86]:
id(a)

134488880634672

In [87]:
id(b)

134488880634672

In [88]:
#main copy
b = a.clone()

In [89]:
a

tensor([[0.0000, 0.2324, 0.9141],
        [0.7668, 0.1659, 0.4393]])

In [90]:
b

tensor([[0.0000, 0.2324, 0.9141],
        [0.7668, 0.1659, 0.4393]])

In [91]:
a[0][0] = 10
a

tensor([[10.0000,  0.2324,  0.9141],
        [ 0.7668,  0.1659,  0.4393]])

In [92]:
b

tensor([[0.0000, 0.2324, 0.9141],
        [0.7668, 0.1659, 0.4393]])

In [93]:
id(a)

134488880634672

In [94]:
id(b)

134488880641312

# Reshaping Tensors

In [95]:
w =  torch.ones(4,4)
w

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [96]:
w.reshape(2,2,2,2)

tensor([[[[1., 1.],
          [1., 1.]],

         [[1., 1.],
          [1., 1.]]],


        [[[1., 1.],
          [1., 1.]],

         [[1., 1.],
          [1., 1.]]]])

In [97]:
w.flatten()

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [98]:
x = torch.rand(2,3,4)
x

tensor([[[0.2243, 0.8935, 0.0497, 0.1780],
         [0.3011, 0.1893, 0.9186, 0.2131],
         [0.3957, 0.6017, 0.4234, 0.5224]],

        [[0.4175, 0.0340, 0.9157, 0.3079],
         [0.6269, 0.8277, 0.6594, 0.0887],
         [0.4890, 0.5887, 0.7340, 0.8497]]])

In [99]:
x.permute(2,0,1)

tensor([[[0.2243, 0.3011, 0.3957],
         [0.4175, 0.6269, 0.4890]],

        [[0.8935, 0.1893, 0.6017],
         [0.0340, 0.8277, 0.5887]],

        [[0.0497, 0.9186, 0.4234],
         [0.9157, 0.6594, 0.7340]],

        [[0.1780, 0.2131, 0.5224],
         [0.3079, 0.0887, 0.8497]]])

In [100]:
x.permute(2,0,1).shape

torch.Size([4, 2, 3])

In [101]:
x.permute(2,1,0)

tensor([[[0.2243, 0.4175],
         [0.3011, 0.6269],
         [0.3957, 0.4890]],

        [[0.8935, 0.0340],
         [0.1893, 0.8277],
         [0.6017, 0.5887]],

        [[0.0497, 0.9157],
         [0.9186, 0.6594],
         [0.4234, 0.7340]],

        [[0.1780, 0.3079],
         [0.2131, 0.0887],
         [0.5224, 0.8497]]])

In [102]:
x.permute(2,1,0).shape

torch.Size([4, 3, 2])

In [103]:
y = torch.rand(226,226,3)
y.unsqueeze(0).shape

torch.Size([1, 226, 226, 3])

In [104]:
z = torch.rand(1,20)
z.squeeze(0).shape

torch.Size([20])

# NumPy and PyTorch

PyTorch tensors are fundamentally similar to NumPy arrays as both handle multi-dimensional numerical data, but tensors are optimized for deep learning with key features like GPU acceleration and automatic differentiation (Autograd).

In [105]:
import numpy as np
i = np.array([1,2,3])
i

array([1, 2, 3])

In [106]:
torch.from_numpy(i)

tensor([1, 2, 3])

# Tensor Operations on GPU

In [107]:
torch.cuda.is_available()

True

In [108]:
device = torch.device('cuda')

In [109]:
torch.rand((2,3), device=device)

tensor([[0.3563, 0.0303, 0.7088],
        [0.2009, 0.0224, 0.9896]], device='cuda:0')

In [110]:
a =  torch.rand(2,3)
a

tensor([[0.6106, 0.0260, 0.9428],
        [0.8335, 0.6601, 0.2328]])

In [111]:
b = a.to(device)
b

tensor([[0.6106, 0.0260, 0.9428],
        [0.8335, 0.6601, 0.2328]], device='cuda:0')

In [113]:
import torch
import time

size = 10000
matrix_cpu1 = torch.randn(size, size)
matrix_cpu2 = torch.randn(size, size)

start_time = time.time()
result_cpu = torch. matmul(matrix_cpu1, matrix_cpu2)
cpu_time = time.time() - start_time

print(f"Time on CPU: {cpu_time: .4f} seconds")

matrix_gpu1 = matrix_cpu1.to('cuda')
matrix_gpu2 = matrix_cpu2.to('cuda')

start_time = time.time()
result_gpu = torch. matmul(matrix_gpu1, matrix_gpu2)
torch.cuda.synchronize() #ensure all GPU operations are complete
gpu_time = time.time() - start_time

print(f"Time on GPU: {gpu_time: .4f} seconds")

Time on CPU:  7.3700 seconds
Time on GPU:  0.5119 seconds
