## Step-by-Step Guide to Implementing a Custom Optimizer in PyTorch

### (1) Import Necessary Libraries


In [1]:
import torch
from torch.optim.optimizer import Optimizer


### (2) Define custom optimizer class 

Create a class which inherits from `torch.optim.Optimize`

In [4]:
class SGD_no_momentum(Optimizer):
    def __init__(self, params, lr=0.001):
        defaults = dict(lr=lr)  # Store default parameters (like learning rate) in a dictionary
        # Initialize the base Optimizer class with parameters:
        super(SGD_no_momentum, self).__init__(params, defaults)  

    def step(self, closure=None):
        if closure is not None:
            closure()  # If a closure function is provided, execute it (used for evaluating the loss)
        
        
        # Loop over parameter groups (useful for different hyperparameters for different parameters)
        for group in self.param_groups:
            lr = group['lr']
            for p in group['params']:  # Loop over parameters in the group
                if p.grad is None:
                    continue  # Skip parameters that do not have gradients
                grad = p.grad.data  # Retrieve the gradient data for the parameter
                p.data.add_(-lr, grad)  # Update the parameter using the gradient and learning rate
                
                
    def adjust_learning_rate(self, new_lr):
        # Method to adjust learning rate dynamically
        for group in self.param_groups:
            group['lr'] = new_lr

### (3) Initialize in a simple model

In [18]:
import torch
import torch.nn as nn

# Create a simple linear regression model
model = nn.Linear(1, 1)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = SGD_no_momentum(model.parameters(), lr=0.01)  # Call parameters() method

# Sample data
x = torch.randn(100, 1)
y = 2 * x + 1 + 0.1 * torch.randn(100, 1)

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    # Forward pass
    output = model(x)
    loss = criterion(output, y)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print loss every 10 epochs
    if epoch % 10 == 0:
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}')

# Evaluate the model
with torch.no_grad():
    test_x = torch.tensor([[0.5], [1.0]])
    test_y = model(test_x)
    print("Predicted values:", test_y)

Epoch 1/100, Loss: 2.4916
Epoch 11/100, Loss: 1.7430
Epoch 21/100, Loss: 1.2204
Epoch 31/100, Loss: 0.8554
Epoch 41/100, Loss: 0.6005
Epoch 51/100, Loss: 0.4224
Epoch 61/100, Loss: 0.2979
Epoch 71/100, Loss: 0.2109
Epoch 81/100, Loss: 0.1501
Epoch 91/100, Loss: 0.1075
Predicted values: tensor([[1.7556],
        [2.6341]])


Not finished - provide more explanations!