<a href="https://colab.research.google.com/github/Signeemmanuel/research-journey-2025/blob/master/Notebooks/Phase_1_Foundation/Day_005_PyTorch_Shapes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import numpy as np

print(f"Pytorch version {torch.__version__}")

Pytorch version 2.9.0+cu126


In [2]:
torch.manual_seed(42)

<torch._C.Generator at 0x7ec8db7496d0>

# The Anatomy of tensors
Creating tensors and inspecting their properties.

In [3]:
# Create a 3-Rank tensor (eg. represent a simplified image: Channel, Height, Width)
x = torch.rand(3, 4, 4)

print("Tensor x:\n", x)
print("\n-----Properties-----\n")
print(f"Shape: {x.shape}")
print(f"Data Type: {x.dtype}")
print(f"Device: {x.device}")
print(f"Total Elements: {x.numel()}")

Tensor x:
 tensor([[[0.8823, 0.9150, 0.3829, 0.9593],
         [0.3904, 0.6009, 0.2566, 0.7936],
         [0.9408, 0.1332, 0.9346, 0.5936],
         [0.8694, 0.5677, 0.7411, 0.4294]],

        [[0.8854, 0.5739, 0.2666, 0.6274],
         [0.2696, 0.4414, 0.2969, 0.8317],
         [0.1053, 0.2695, 0.3588, 0.1994],
         [0.5472, 0.0062, 0.9516, 0.0753]],

        [[0.8860, 0.5832, 0.3376, 0.8090],
         [0.5779, 0.9040, 0.5547, 0.3423],
         [0.6343, 0.3644, 0.7104, 0.9464],
         [0.7890, 0.2814, 0.7886, 0.5895]]])

-----Properties-----

Shape: torch.Size([3, 4, 4])
Data Type: torch.float32
Device: cpu
Total Elements: 48


# View vs. Reshape
Understanding memory layout

In [4]:
# We want to flatten the image pixel per channel
# Target (3, 16)

# METHOD 1: View
# View works only on contigous tensors (tensors stored in a single block of memory)
z_view = x.view(3, 16)
print(f"View Shape: {z_view.shape}")

# METHOD 2: reshape
# reshape works on any tensors. it will copy data if neccessary to make it contiguous
z_reshape = x.reshape(3, 16)
print(f"Reshape shape: {z_reshape.shape}")

# Why use view()?
# view() is faster because it never copies data. It just changes the metadata (stride).
# If you try to view() a non-contiguous tensor, PyTorch throws an error.

View Shape: torch.Size([3, 16])
Reshape shape: torch.Size([3, 16])


# Permute (Crucial for Computer Vision)
Concept changing dimension order (NHWC vs NCHW)

In [5]:
# Dummy batch of images
# N (Batch) = 2
# C (Channels) = 3
# H (Height) = 224
# W (Width) = 224
batch_images = torch.zeros(2, 3, 244, 244)
print(f"Original Shape: {batch_images.shape}")

# PROBLEM: Matplotlib and OpenCV expect images as (Height, Width, Channels).
# We need to move dim 1 (C) to the end

# ACTION: permute dimension (0, 2, 3, 1)
# keep (0) N at pos 0
# Move (1) C at pos 3
# Move (2) H at pos 1
# Move (3) W at pos 2
images_for_plotting = batch_images.permute(0, 2, 3, 1)
print(f"Permuted shape (OpenCV Format): {images_for_plotting.shape}")

# WARNING: Permute breaks continuity
print(f"Is Contiguous: {images_for_plotting.is_contiguous()}")

# # If we try to view this now it will crash
try:
  images_for_plotting.view(2, -1) # Try to flatten
except RuntimeError as e:
  print(f"\n Expected Error: {e}")


# FIX: Cal .contiguous() before .view()
flattened = images_for_plotting.contiguous().view(2, -1)
print(f"Fixed shape: {flattened.shape}")

Original Shape: torch.Size([2, 3, 244, 244])
Permuted shape (OpenCV Format): torch.Size([2, 244, 244, 3])
Is Contiguous: False

 Expected Error: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.
Fixed shape: torch.Size([2, 178608])


# Squeeze and Unsqueeze
Concepts: Adding or removing dimensions of size 1(Broadcasting prep).

In [6]:
# Unsqueeze: Add a dimension (often for batching)
img = torch.zeros(3, 224, 224) # Single image
print(f"Single Image shape: {img.shape}")

# The Model expects (Batch, C, H, W). We need to add batch dim at dim index 0.
img_batch = img.unsqueeze(1)
print(f"Batch Image shape: {img_batch.shape}")

# Squeeze: Remove dimension of size 1
# Useful when a model outputs shape (Batch, 1) but you need batch for loss calculation
model_output = torch.zeros(10, 1)
print(f"Model output: {model_output.shape}")

squeeze_output = model_output.squeeze(1) # Remove dim at index 1
print(f"Squeeze ouput: {squeeze_output.shape}")

Single Image shape: torch.Size([3, 224, 224])
Batch Image shape: torch.Size([3, 1, 224, 224])
Model output: torch.Size([10, 1])
Squeeze ouput: torch.Size([10])


# Practical Challenge (Flattening Linear Layer)
This is exactly what happens inside CNN classifyer head

In [7]:
# Simulate the output of a convolutional layer
# Batch = 128, Channel = 64, FeatureMap = 4x4

conv_output = torch.rand(128, 64, 4, 4)

# We want to feed this into a Linear Layer (Dense).
# Linear layers expect (Batch_Size, Input_Features).
# We must flatten (64, 4, 4) into a single vector of size 64*4*4 = 1024.

# Correct way:
# Keep dim 0 (Batch) as is.
# Flatten everything else (-1 tells PyTorch to figure out the number).
flat_output = conv_output.view(128, -1)

print(f"Conv Output: {conv_output.shape}")
print(f"Linear Input: {flat_output.shape}")

assert flat_output.shape == (128, 1024)
print("Transformation correct.")

Conv Output: torch.Size([128, 64, 4, 4])
Linear Input: torch.Size([128, 1024])
Transformation correct.
