In [1]:
#-----creating a dummy tensor-----#
import torch
from PIL import Image
import torchvision.transforms as transforms
image = Image.open('magic_lamp.png')
transform = transforms.Compose([
    transforms.PILToTensor()
])
tensor = transform(image)
print(f"sizeOfImage:","50x50")
print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")


sizeOfImage: 50x50
Shape of tensor: torch.Size([4, 50, 50])
Datatype of tensor: torch.uint8
Device tensor is stored on: cpu


In [2]:
from torch import nn
#-----creating a dummy tensor-----#
import torch
from PIL import Image
import torchvision.transforms as transforms
image = Image.open('magic_lamp.png')
transform = transforms.Compose([
    transforms.PILToTensor()
])
tensor = transform(image)
print(f"sizeOfImage:","50x50")
print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(50*50*4, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits
model = NeuralNetwork()
print(model)

sizeOfImage: 50x50
Shape of tensor: torch.Size([4, 50, 50])
Datatype of tensor: torch.uint8
Device tensor is stored on: cpu
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=10000, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [3]:
import torch
from torch import nn
from PIL import Image
import torchvision.transforms as transforms

# Load the image and convert to proper tensor format
image = Image.open('magic_lamp.png')
transform = transforms.Compose([
    transforms.Resize((50, 50)),  # Ensure 50x50 size
    transforms.ToTensor()  # Converts to float32 and normalizes to [0,1]
])
dummy_tensor = transform(image)

print(f"sizeOfImage: 50x50")
print(f"Shape of tensor: {dummy_tensor.shape}")
print(f"Datatype of tensor: {dummy_tensor.dtype}")
#print(f"Device tensor is stored on: {dummy_tensor.device}")

# Get the actual number of channels from the tensor
channels = dummy_tensor.shape[0]

class NeuralNetwork(nn.Module):
    def __init__(self, input_channels):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(50*50*input_channels, 512),  #
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork(channels)
#print(model)

# Add batch dimension for the model
input_tensor = dummy_tensor.unsqueeze(0)
logits = model(input_tensor)
print(f"Input->Output:   {dummy_tensor.shape}->{logits.shape}")
print(f"Output logits: {logits}")


sizeOfImage: 50x50
Shape of tensor: torch.Size([4, 50, 50])
Datatype of tensor: torch.float32
Input->Output:   torch.Size([4, 50, 50])->torch.Size([1, 10])
Output logits: tensor([[-0.0059, -0.0186,  0.0160,  0.0523, -0.0003,  0.0428,  0.0967, -0.0012,
         -0.0351, -0.1542]], grad_fn=<AddmmBackward0>)


In [4]:
# Method 1: Simple Sequential
import torch
from torch import nn
from PIL import Image
import torchvision.transforms as transforms

# Load the image and convert to proper tensor format
image = Image.open('magic_lamp.png')
transform = transforms.Compose([
    transforms.Resize((50, 50)),  # Ensure 50x50 size
    transforms.ToTensor()  # Converts to float32
])
dummy_tensor = transform(image)
input_tensor = dummy_tensor.flatten()

print(f"sizeOfImage: 50x50")
print(f"Shape of tensor: {dummy_tensor.shape}")
print(f"Datatype of tensor: {dummy_tensor.dtype}")
print(f"Shape of tensor: {input_tensor.shape}")
print(f"Datatype of tensor: {input_tensor.dtype}")
#print(f"Device tensor is stored on: {dummy_tensor.device}")

model_sequential = nn.Sequential(
    nn.Linear(10000, 1000),  
    nn.ReLU(),
    nn.Linear(1000, 100),            
    nn.ReLU(),
    nn.Linear(100, 10),        
    nn.ReLU(),
    nn.Dropout(),  #randomly sets some neurnons to zero
)

# Debug function to see shapes at each layer
def debug_sequential_shapes(model, input_tensor):
    x = input_tensor
    print(f"Input shape: {x.shape}")
    
    for i, layer in enumerate(model):
        x = layer(x)
        print(f"After layer {i+1} ({layer.__class__.__name__}): {x.shape}")
    
    return x
print(debug_sequential_shapes(model_sequential,input_tensor))




sizeOfImage: 50x50
Shape of tensor: torch.Size([4, 50, 50])
Datatype of tensor: torch.float32
Shape of tensor: torch.Size([10000])
Datatype of tensor: torch.float32
Input shape: torch.Size([10000])
After layer 1 (Linear): torch.Size([1000])
After layer 2 (ReLU): torch.Size([1000])
After layer 3 (Linear): torch.Size([100])
After layer 4 (ReLU): torch.Size([100])
After layer 5 (Linear): torch.Size([10])
After layer 6 (ReLU): torch.Size([10])
After layer 7 (Dropout): torch.Size([10])
tensor([0.0986, 0.0000, 0.0473, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0675,
        0.0000], grad_fn=<MulBackward0>)


In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# 1. LINEAR LAYER
# Applies a linear transformation: y = xW^T + b
# Commonly used as the final layer in classifiers or for dimensionality changes
linear = nn.Linear(3, 2)  # Maps 3 features to 2 features
x = torch.tensor([[1.0, 2.0, 3.0]])
output = linear(x)
print("nn.Linear(3, 2) - Fully Connected Layer")
print("Description: Applies linear transformation with learnable weights and bias")
print("Use case: Classification heads, feature transformation")
print("Input :", x.shape)  # torch.Size([1, 3])
print("Output:", output.shape)  # torch.Size([1, 2])
print()

# 2. 2D CONVOLUTIONAL LAYER
# Applies 2D convolution operation using learnable filters/kernels
# Essential for computer vision tasks to detect features like edges, textures
conv = nn.Conv2d(in_channels=4, out_channels=6, kernel_size=5)
x = torch.randn(1, 4, 32, 32)  # Batch=1, Channels=4, Height=32, Width=32
output = conv(x)
print("nn.Conv2d(in_channels=4, out_channels=6, kernel_size=5)")
print("Description: Applies 2D convolution with 6 filters of size 5x5")
print("Use case: Feature extraction in images, CNNs")
print("Input :", x.shape)  # torch.Size([1, 4, 32, 32])
print("Output:", output.shape)  # torch.Size([1, 6, 28, 28])
print()

# 3. MAX POOLING LAYER
# Downsamples by taking the maximum value in each pooling window
# Reduces spatial dimensions while retaining important features
maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
x = torch.randn(1, 3, 28, 28)
output = maxpool(x)
print("nn.MaxPool2d(kernel_size=2, stride=2)")
print("Description: Downsamples by taking max value in 2x2 windows")
print("Use case: Reducing spatial dimensions, translation invariance")
print("Input :", x.shape)  # torch.Size([1, 3, 28, 28])
print("Output:", output.shape)  # torch.Size([1, 3, 14, 14])
print()

# 4. BATCH NORMALIZATION LAYER
# Normalizes inputs by adjusting and scaling activations
# Accelerates training and provides regularization effect
batchnorm = nn.BatchNorm2d(3)  # 3 channels to normalize
x = torch.randn(4, 3, 16, 16)  # Batch of 4 images
output = batchnorm(x)
print("nn.BatchNorm2d(3)")
print("Description: Normalizes activations across batch dimension")
print("Use case: Stabilizing training, faster convergence")
print("Input :", x.shape)  # torch.Size([4, 3, 16, 16])
print("Output:", output.shape)  # torch.Size([4, 3, 16, 16])
print()

# 5. DROPOUT LAYER
# Randomly sets input elements to zero during training
# Prevents overfitting by forcing network to not rely on specific neurons
dropout = nn.Dropout(p=0.5)  # 50% probability of zeroing elements
x = torch.randn(2, 10)
output = dropout(x)
print("nn.Dropout(p=0.5)")
print("Description: Randomly zeros 50% of input elements during training")
print("Use case: Regularization, preventing overfitting")
print("Input :", x.shape)  # torch.Size([2, 10])
print("Output:", output.shape)  # torch.Size([2, 10])
print()

# 6. LSTM LAYER
# Long Short-Term Memory - processes sequential data with memory cells
# Can capture long-term dependencies in sequences
lstm = nn.LSTM(input_size=10, hidden_size=20, num_layers=2, batch_first=True)
x = torch.randn(3, 5, 10)  # Batch=3, Sequence=5, Features=10
output, (h_n, c_n) = lstm(x)
print("nn.LSTM(input_size=10, hidden_size=20, num_layers=2)")
print("Description: Processes sequences with memory cells and gates")
print("Use case: NLP, time series, any sequential data")
print("Input :", x.shape)  # torch.Size([3, 5, 10])
print("Output:", output.shape)  # torch.Size([3, 5, 20])
print("Hidden:", h_n.shape)  # torch.Size([2, 3, 20])
print("Cell  :", c_n.shape)  # torch.Size([2, 3, 20])
print()


nn.Linear(3, 2) - Fully Connected Layer
Description: Applies linear transformation with learnable weights and bias
Use case: Classification heads, feature transformation
Input : torch.Size([1, 3])
Output: torch.Size([1, 2])

nn.Conv2d(in_channels=4, out_channels=6, kernel_size=5)
Description: Applies 2D convolution with 6 filters of size 5x5
Use case: Feature extraction in images, CNNs
Input : torch.Size([1, 4, 32, 32])
Output: torch.Size([1, 6, 28, 28])

nn.MaxPool2d(kernel_size=2, stride=2)
Description: Downsamples by taking max value in 2x2 windows
Use case: Reducing spatial dimensions, translation invariance
Input : torch.Size([1, 3, 28, 28])
Output: torch.Size([1, 3, 14, 14])

nn.BatchNorm2d(3)
Description: Normalizes activations across batch dimension
Use case: Stabilizing training, faster convergence
Input : torch.Size([4, 3, 16, 16])
Output: torch.Size([4, 3, 16, 16])

nn.Dropout(p=0.5)
Description: Randomly zeros 50% of input elements during training
Use case: Regularization, 