# Learning Rate Scheduler
PyTorch provides `torch.optim.lr_scheduler` to adjust the learning rate dynamically.

## StepLR (Reduce LR Every Few Epochs)

In [1]:
import torch
import torch.optim as optim

# Define a simple model
model = torch.nn.Linear(10, 1)

# Define optimizer
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Define learning rate scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

# Training loop
for epoch in range(10):
    optimizer.step()  # Simulate training step
    scheduler.step()  # Update learning rate
    print(f"Epoch {epoch+1}, Learning Rate: {scheduler.get_last_lr()[0]:.6f}")


Epoch 1, Learning Rate: 0.010000
Epoch 2, Learning Rate: 0.010000
Epoch 3, Learning Rate: 0.010000
Epoch 4, Learning Rate: 0.010000
Epoch 5, Learning Rate: 0.001000
Epoch 6, Learning Rate: 0.001000
Epoch 7, Learning Rate: 0.001000
Epoch 8, Learning Rate: 0.001000
Epoch 9, Learning Rate: 0.001000
Epoch 10, Learning Rate: 0.000100


## Exponential Decay

In [2]:
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)

## Cosine Annealing

In [3]:
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)


# Weight Initialization

Proper weight initialization helps prevent vanishing/exploding gradients.

## Xavier Initialization (Glorot)

In [4]:
import torch.nn as nn

def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)

# Apply initialization
model.apply(init_weights)


Linear(in_features=10, out_features=1, bias=True)

## He Initialization (Kaiming)

In [5]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')

model.apply(init_weights)


Linear(in_features=10, out_features=1, bias=True)

## Zero Initialization (Not Recommended)

In [6]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.zeros_(m.weight)

model.apply(init_weights)


Linear(in_features=10, out_features=1, bias=True)