In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import torch
print(torch.__version__)

2.8.0+cu126


In [None]:
!nvidia-smi

/bin/bash: line 1: nvidia-smi: command not found


## Scalar, Tensor and Matrix

In [None]:
x = torch.tensor(7)
x, x.ndim, x.item(), x.shape #in pytorch scalar has no dimenisons

(tensor(7), 0, 7, torch.Size([]))

In [None]:
x = torch.tensor([7])
x, x.ndim, x.item(), x.shape

(tensor([7]), 1, 7, torch.Size([1]))

In [None]:
x = torch.tensor([7,6])
x, x.ndim,  x.shape # has 2 dimesnions

(tensor([7, 6]), 1, torch.Size([2]))

In [None]:
x[0].item(), x[1].item()

(7, 6)

In [None]:
x = torch.tensor([[7,8], [9,10]]) # matrix

x.ndim, x.shape

(2, torch.Size([2, 2]))

## Selecting a Row, Column, Element

In [None]:
x[0], x[1] # accessing row

(tensor([7, 8]), tensor([ 9, 10]))

In [None]:
x[0][1], x[0,1] # both work, the second is faster since 1st is two successive indexing

(tensor(8), tensor(8))

In [None]:
x[:,0], x[:,1] # accessing column

(tensor([7, 9]), tensor([ 8, 10]))

In [None]:
x = torch.tensor([[[1,2,4],
                   [3,5,6],
                   [2,10,3]]])   # tensorrr-sama

In [None]:
x.ndim, x.shape # shape look weird  !!...it means we have ONE 3x3 tensor

(3, torch.Size([1, 3, 3]))

we can think of it like "1 image of size 3x3"

In [None]:
x[0]

tensor([[ 1,  2,  4],
        [ 3,  5,  6],
        [ 2, 10,  3]])

In [None]:
x[:,:,:]

tensor([[[ 1,  2,  4],
         [ 3,  5,  6],
         [ 2, 10,  3]]])

In [None]:
x[0,:,:]

tensor([[ 1,  2,  4],
        [ 3,  5,  6],
        [ 2, 10,  3]])

In [None]:
x[0,0,:]

tensor([1, 2, 4])

In [None]:
x[0,:,0]

In [None]:
x[:,0,0]

tensor([1, 3, 2])

In [None]:
x[0,0,0]

tensor(1)

## Analogy with Image

In [None]:
x = torch.tensor([[[1,2,4],
                   [3,5,6],
                   [2,10,3]],

                  [[3,4,5],
                  [12,23,45],
                  [56,12,43]]])

In [None]:
x.ndim, x.shape # 2 images of size 3x3

(3, torch.Size([2, 3, 3]))

In [None]:
x[0] # selecting first image

tensor([[ 1,  2,  4],
        [ 3,  5,  6],
        [ 2, 10,  3]])

In [None]:
x[0,:,:]

tensor([[ 1,  2,  4],
        [ 3,  5,  6],
        [ 2, 10,  3]])

In [None]:
x[:,0,:] # first row for each image...maybe useless??

tensor([[1, 2, 4],
        [3, 4, 5]])

In [None]:
x[:,:,0] # first pixel of each

tensor([[ 1,  3,  2],
        [ 3, 12, 56]])

Interpretation:

Dimension 0 ‚Üí 2 (like a batch size of 2).

Dimension 1 ‚Üí 3 (height, rows).

Dimension 2 ‚Üí 3 (width, columns).

So yes ‚Äî you can think of it as 2 images of size 3√ó3.

‚ö†Ô∏è But one nuance: in vision tasks, PyTorch usually uses NCHW convention:

N = batch size

C = channels

H = height

W = width

Here your tensor is [2, 3, 3]. That matches N, H, W, i.e. no channel dimension yet (grayscale).

If you wanted RGB images, you‚Äôd expect shape [2, 3, 3, 3] (or [2, 3, 3, 3] depending on convention) ‚Äî because each pixel has 3 channels.

In [None]:
x = torch.arange(2*3*3*3).reshape(2, 3, 3, 3)

print("Shape:", x.shape)
print(x)

Shape: torch.Size([2, 3, 3, 3])
tensor([[[[ 0,  1,  2],
          [ 3,  4,  5],
          [ 6,  7,  8]],

         [[ 9, 10, 11],
          [12, 13, 14],
          [15, 16, 17]],

         [[18, 19, 20],
          [21, 22, 23],
          [24, 25, 26]]],


        [[[27, 28, 29],
          [30, 31, 32],
          [33, 34, 35]],

         [[36, 37, 38],
          [39, 40, 41],
          [42, 43, 44]],

         [[45, 46, 47],
          [48, 49, 50],
          [51, 52, 53]]]])


How to interpret: 2 3x3 RGB images

Axis 0 ‚Üí 2 images (batch).

Axis 1 ‚Üí 3 rows (height).

Axis 2 ‚Üí 3 cols (width).

Axis 3 ‚Üí 3 channels/features per pixel.

In [None]:
x = torch.rand(4) # 1 dim so 1 bracket
x

tensor([0.6928, 0.2906, 0.4711, 0.8806])

In [None]:
x = torch.rand(3,4) # 2 dimensions so 2 brackets
x

tensor([[0.3084, 0.1980, 0.3757, 0.7809],
        [0.8052, 0.6669, 0.0620, 0.0819],
        [0.7246, 0.5777, 0.5551, 0.1689]])

In [None]:
x = torch.rand(1,3,4) # 3 dimensions so 3 brackets
x

tensor([[[0.7086, 0.5962, 0.3027, 0.6741],
         [0.2717, 0.8067, 0.2763, 0.2155],
         [0.5973, 0.9163, 0.5194, 0.3911]]])

It doesn‚Äôt carry an ‚Äúimage‚Äù or ‚Äúbatch‚Äù interpretation on its own. Interpretation only comes from context:

In ML, we usually impose meaning:

[1,3,4] ‚Üí ‚Äúbatch of 1 sample, each of size 3√ó4.‚Äù

[3,4] ‚Üí could mean ‚Äúone sample with 3 features and 4 timesteps,‚Äù or ‚Äú3 samples each with 4 features,‚Äù depending on how you treat the axes.

Without that convention, it‚Äôs just numbers in a grid.

In [None]:
x = torch.rand(2,3,4) # 3 dimensions so 3 brackets...........2 mono images of size 3x4
x

tensor([[[0.6815, 0.9277, 0.3684, 0.3456],
         [0.4703, 0.8811, 0.0911, 0.1918],
         [0.4440, 0.0894, 0.6446, 0.0855]],

        [[0.0181, 0.8578, 0.2318, 0.0015],
         [0.7572, 0.3111, 0.6815, 0.5139],
         [0.6068, 0.8450, 0.1117, 0.7629]]])

## Ones and Zero tensors

In [None]:
zeros = torch.zeros(size=(3,4))
ones = torch.ones(size = (3,4))

zeros, ones

(tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]))

In [None]:
torch.range(0,10) # deprecated

  torch.range(0,10) # deprecated


tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

In [None]:
torch.arange(0,10) # deprecated

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
x = torch.tensor([1,2,3,4])
x.dtype

torch.int64

In [None]:
x = torch.tensor([1.,2.,3.,4.])
x.dtype

torch.float32

In [None]:
x = torch.tensor([1,2,3,4], dtype = torch.float16)
x.dtype

torch.float16

## Tensor Arguments - dtype, shape, grad

In [None]:
# other important parameters while creating tensors

x = torch.tensor([1.,2.,3.,4.], dtype = torch.float32, # there are 10, look in documentation
                                device = None, # tensor on cpu or cuda
                                requires_grad = True) # whther to track gradients with this tensor operation

In [None]:
x.element_size() #4 each element uses 4 bytes (32 bits = 4 bytes).

4

In [None]:
x.storage().nbytes() # 16 because x has 4 elements (each 4 bytes)

16

In [None]:
x = torch.tensor([[1,2],[3,4]], dtype = torch.float32)
y = torch.tensor([[1,2],[3,4]], dtype = torch.float32)

## Multiplication

In [None]:
x*y # does element-wise

tensor([[ 1.,  4.],
        [ 9., 16.]])

In [None]:
torch.matmul(x,y)

tensor([[ 7., 10.],
        [15., 22.]])

In [None]:
x @ y #does mat-mul just like abovw

tensor([[ 7., 10.],
        [15., 22.]])

## Tensor-  min, max, sum, mean

In [None]:
x = torch.tensor([1,2,3,4])

torch.min(x), x.min(), torch.max(x), x.max()

(tensor(1), tensor(1), tensor(4), tensor(4))

In [None]:
torch.sum(x), x.sum()

(tensor(10), tensor(10))

In [None]:
#torch.mean(x), x.mean() this doesn't work...dtype issue...float64 doesnt have this, and torch.mean supports float32, not float64!!
torch.mean(x.type(torch.float32))

tensor(2.5000)

In [None]:
x.type(torch.float32).mean() # this also work

tensor(2.5000)

In [None]:
x.mean(dtype=torch.float32)

tensor(2.5000)

This one is more efficient üöÄ because:

PyTorch doesn‚Äôt create a whole new float32 copy of x.

Instead, it tells the mean kernel: ‚Äúaccumulate and output in float32.‚Äù

So only the computation is done in float32, no extra tensor allocation.

In [None]:
x.argmin() # finds position of min value

tensor(0)

In [None]:
x.argmax() # useful in softmax

tensor(3)

## üîÑ Difference between `reshape()` and `view()` in PyTorch


In [None]:
x = torch.arange(1.,10.)
x

tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [None]:
 #x.reshape(1,7) # invalid

 x.reshape(1,9)

tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [None]:
x.reshape(9,1)

tensor([[1.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.],
        [7.],
        [8.],
        [9.]])

In [None]:
xx = x.reshape(3,3)
xx

tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

In [None]:
#changing the view
xxx = x.view(1,9)
xxx

tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]])

Both `.view()` and `.reshape()` change the **shape** of a tensor, but they differ in how they handle **memory**.


### üß† Key Difference

- **`view()`** ‚Üí Only works if the tensor is **contiguous in memory** (stored in one continuous block).  
- **`reshape()`** ‚Üí Works in **all cases**; if the tensor isn‚Äôt contiguous, it will **make a copy** behind the scenes.


### üß© Example

```python
x = torch.arange(6)
y = x.view(2, 3)       # ‚úÖ Works fine
z = x.reshape(2, 3)    # ‚úÖ Same result


a = torch.arange(6).view(2, 3)
b = a.t()               # Transposed ‚Üí Non-contiguous

b.view(6)               # ‚ùå RuntimeError
b.reshape(6)            # ‚úÖ Works (makes a copy)


## torch Stack and permute

In [None]:
torch.stack([x,x,x]) # stacks tensors vertically (along dim=0)

tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.],
        [1., 2., 3., 4., 5., 6., 7., 8., 9.],
        [1., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [None]:
torch.stack([x,x,x], dim=0) # stacks tensors vertically (along dim=0)

tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.],
        [1., 2., 3., 4., 5., 6., 7., 8., 9.],
        [1., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [None]:
torch.stack([x,x,x], dim=1) # stacks tensors horizaontalluy (along dim=0)

tensor([[1., 1., 1.],
        [2., 2., 2.],
        [3., 3., 3.],
        [4., 4., 4.],
        [5., 5., 5.],
        [6., 6., 6.],
        [7., 7., 7.],
        [8., 8., 8.],
        [9., 9., 9.]])

In [None]:
x = torch.randn(2, 3, 4)
x

tensor([[[ 0.5663, -0.6957,  0.5359, -1.3692],
         [-2.0371, -0.8621,  0.4828, -0.3460],
         [ 0.0294,  1.1338,  0.3389, -0.8915]],

        [[ 0.8913, -1.1242,  0.1890,  0.8281],
         [-0.5499, -0.2697,  0.5652,  0.1030],
         [-2.0573,  0.9260, -1.2318, -0.1442]]])

In [None]:
x.permute(1,0,2)

tensor([[[ 0.5663, -0.6957,  0.5359, -1.3692],
         [ 0.8913, -1.1242,  0.1890,  0.8281]],

        [[-2.0371, -0.8621,  0.4828, -0.3460],
         [-0.5499, -0.2697,  0.5652,  0.1030]],

        [[ 0.0294,  1.1338,  0.3389, -0.8915],
         [-2.0573,  0.9260, -1.2318, -0.1442]]])

It reorders existing dimensions ie (0,1,2) => (1,0,2),
so, in this case, new dimension is (2,3,4) => (3,2,4) ÃÑÃÑ

In [None]:
x

tensor([[[ 0.5663, -0.6957,  0.5359, -1.3692],
         [-2.0371, -0.8621,  0.4828, -0.3460],
         [ 0.0294,  1.1338,  0.3389, -0.8915]],

        [[ 0.8913, -1.1242,  0.1890,  0.8281],
         [-0.5499, -0.2697,  0.5652,  0.1030],
         [-2.0573,  0.9260, -1.2318, -0.1442]]])

## üîÑ Understanding `torch.permute`

**What it does:**  
`permute()` reorders existing dimensions of a tensor.  
It doesn‚Äôt change data or add new ones ‚Äî it only changes how you *index* the same data in memory.

```python
x = torch.randn(2, 3, 4)
x_permuted = x.permute(1, 0, 2)
print(x.shape)          # (2, 3, 4)
print(x_permuted.shape) # (3, 2, 4)
```

Reordering tensor dimensions for model compatibility.
Example:
(batch, height, width, channels) ‚Üí (batch, channels, height, width)
before feeding into a PyTorch Conv2D layer.

## torch Squeeze and unsqueeze


In [None]:
x = torch.arange(1,11)
x

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [None]:
x.size()

torch.Size([10])

In [None]:
xx = x.reshape(1,10)
xx

tensor([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10]])

In [None]:
xx.size()

torch.Size([1, 10])

In [None]:
xxx = torch.squeeze(xx)

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [None]:
xx.size(), xxx.size()

(torch.Size([1, 10]), torch.Size([1, 9]))

## Differnce between reshape() and permute

reshape changes sizes of dimensions, not their order.
permute reorders existing axes ‚Äî it doesn‚Äôt change how many axes exist or how long they are.

In [None]:
x = torch.randn(2, 3, 4)
x

tensor([[[-1.0416,  0.9143, -0.6903,  1.3059],
         [ 2.3172, -0.3119,  0.0691,  0.1537],
         [ 0.8972, -0.0977, -0.0300,  0.4406]],

        [[ 0.3402, -1.7755,  0.4092,  0.7672],
         [-0.3362, -0.2492,  0.6483, -0.4623],
         [ 1.0981, -1.4715, -0.0363,  0.6668]]])

In [None]:
x.reshape(1,4,-1)

tensor([[[-1.0416,  0.9143, -0.6903,  1.3059,  2.3172, -0.3119],
         [ 0.0691,  0.1537,  0.8972, -0.0977, -0.0300,  0.4406],
         [ 0.3402, -1.7755,  0.4092,  0.7672, -0.3362, -0.2492],
         [ 0.6483, -0.4623,  1.0981, -1.4715, -0.0363,  0.6668]]])

In [None]:
x.reshape(1,4,1,-1)

tensor([[[[-1.0416,  0.9143, -0.6903,  1.3059,  2.3172, -0.3119]],

         [[ 0.0691,  0.1537,  0.8972, -0.0977, -0.0300,  0.4406]],

         [[ 0.3402, -1.7755,  0.4092,  0.7672, -0.3362, -0.2492]],

         [[ 0.6483, -0.4623,  1.0981, -1.4715, -0.0363,  0.6668]]]])

In [None]:
#x.permute(1,4,1) # this won't work since since x has just 3 dimensions 0,1,2

x.permute(1,0,2) # swap first 2 axes (0,1)

tensor([[[-1.0416,  0.9143, -0.6903,  1.3059],
         [ 0.3402, -1.7755,  0.4092,  0.7672]],

        [[ 2.3172, -0.3119,  0.0691,  0.1537],
         [-0.3362, -0.2492,  0.6483, -0.4623]],

        [[ 0.8972, -0.0977, -0.0300,  0.4406],
         [ 1.0981, -1.4715, -0.0363,  0.6668]]])

### üî∏ When to Use `reshape` vs `permute`

#### üü¢ Use `reshape` when:
- Flatten or unflatten tensors. `x = x.reshape(batch_size, -1)`
- Merging or splitting dimensions. `x = x.reshape(batch, channels * height * width)`
- Axis order stays the same ‚Äî only the *shape* changes.
- Example use case: preparing input for a `Linear` layer.

#### üîµ Use `permute` when:
- To reorder axes (change their meaning). `x = x.permute(0, 3, 1, 2)` converts NHWC ‚Üí NCHW
- Transpose dimensions. `x = x.permute(1, 0)` ‚Üí swaps rows and columns
- Different frameworks expect different axis orders (e.g., PyTorch vs NumPy).

#### ‚ö° Rule of Thumb:
- ‚ÄúSame data, different grouping‚Äù ‚Üí `reshape`  
- ‚ÄúSame data, different orientation‚Äù ‚Üí `permute`

# GPU access with PyTorch

In [None]:
import torch
torch.cuda.is_available()

False

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [None]:
torch.cuda.device_count() # count number of devices

0

## putting tensors and models on gpu

'cpu'

In [None]:
x = torch.tensor([1,2,3])
x.device

device(type='cpu')

## move tensor to GPU if available

In [None]:
xx = x.to(device)
xx.device

device(type='cpu')

## numpy works only with cpu, so we might need to move back

In [None]:
xx.numpy() # this doesn't work if tensor is on gpu, we need to first copy to cpu
xx

tensor([1, 2, 3])

In [None]:
xxx = xx.cpu().numpy() # moving to cpu, then converting
xxx

array([1, 2, 3])

PyTorch added .cpu() and .cuda() before .to() existed.
They‚Äôre convenience wrappers specialized for the two most common moves:

``` python
x = x.cuda()   # same as x.to("cuda")  
x = x.cpu()    # same as x.to("cpu")
```

They‚Äôre slightly faster and more readable for these simple cases.    

‚úÖ Summary

Use .to(device) for general, portable code.

Use .cpu() / .cuda() when you know where it‚Äôs going.