In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [9]:
# house data (size sq ft, bedrooms, age)
X = torch.tensor([
    [1400, 3, 20],
    [1600, 4, 15],
    [1700, 3, 10],
    [1875, 3, 5],
    [1100, 2, 25]
], dtype=torch.float32)

# Normalize the data (important for better training)
X_mean, X_std = X.mean(dim=0), X.std(dim=0)
print(X_mean, X_std)

X_norm = (X - X_mean) / X_std  # Standardization
print(X_norm)

Y = torch.tensor([[245], [312], [279], [308], [199]], dtype=torch.float32)  # Prices in $1000s

tensor([1535.,    3.,   15.]) tensor([297.6995,   0.7071,   7.9057])
tensor([[-0.4535,  0.0000,  0.6325],
        [ 0.2183,  1.4142,  0.0000],
        [ 0.5543,  0.0000, -0.6325],
        [ 1.1421,  0.0000, -1.2649],
        [-1.4612, -1.4142,  1.2649]])


In [12]:
class HousePriceNN(nn.Module):
    def __init__(self):
        super(HousePriceNN, self).__init__()
        self.layer1 = nn.Linear(3,1)

    def forward(self, x):
        x = self.layer1(x)
        return x

In [30]:
loss_fn = nn.MSELoss()

model = HousePriceNN()
model1 = HousePriceNN()
model2 = HousePriceNN()
model3 = HousePriceNN()
model4 = HousePriceNN()

sgd = optim.SGD(model1.parameters(), lr = 0.01)
momentum_sgd = optim.SGD(model2.parameters(), lr = 0.01, momentum = 0.9)
adam = optim.Adam(model3.parameters(), lr = 0.01)
rmsprop = optim.RMSprop(model4.parameters(), lr = 0.01)

In [31]:
# Training step for one optimizer

model = HousePriceNN()


epochs = 100
optimizer = optim.SGD(model.parameters(), lr = 0.01)
for i in range(1, epochs + 1):
    predictions = model(X_norm)

    loss = loss_fn(predictions, Y)

    optimizer.zero_grad()
    loss.backward()

    optimizer.step()

    if i % 20 == 0:
        print(f"epoch: {i}, loss: {loss}")

def train_step(optimizer, name, model, i):
    optimizer.zero_grad()
    y_pred = model(X_norm)
    loss = loss_fn(y_pred, Y)
    loss.backward()
    optimizer.step()
    if i%40 == 0 or i==399:
        print(f"{i}: {name}: Loss = {loss.item():.4f}")
    return model

# Try different optimizers
for i in range(400):
    model1 = train_step(sgd, "SGD", model1, i)
    model2 = train_step(momentum_sgd, "Momentum SGD", model2, i)
    model3 = train_step(adam, "Adam", model3, i)
    model4 = train_step(rmsprop, "RMSprop", model4, i)

epoch: 20, loss: 34090.5234375
epoch: 40, loss: 15137.5966796875
epoch: 60, loss: 6756.50244140625
epoch: 80, loss: 3033.2578125
epoch: 100, loss: 1375.1597900390625
0: SGD: Loss = 73738.2656
0: Momentum SGD: Loss = 73789.9062
0: Adam: Loss = 74127.0000
0: RMSprop: Loss = 74109.6875
40: SGD: Loss = 14439.7549
40: Momentum SGD: Loss = 217.9328
40: Adam: Loss = 73835.4453
40: RMSprop: Loss = 73264.7656
80: SGD: Loss = 2894.0308
80: Momentum SGD: Loss = 39.7863
80: Adam: Loss = 73546.8516
80: RMSprop: Loss = 72839.7344
120: SGD: Loss = 607.1400
120: Momentum SGD: Loss = 35.6121
120: Adam: Loss = 73261.3281
120: RMSprop: Loss = 72487.2344
160: SGD: Loss = 151.3872
160: Momentum SGD: Loss = 34.7928
160: Adam: Loss = 72978.8203
160: RMSprop: Loss = 72168.0469
200: SGD: Loss = 59.9659
200: Momentum SGD: Loss = 34.2301
200: Adam: Loss = 72699.2578
200: RMSprop: Loss = 71867.7109
240: SGD: Loss = 41.3708
240: Momentum SGD: Loss = 33.6882
240: Adam: Loss = 72422.5781
240: RMSprop: Loss = 71579.2

In [32]:
nn.BatchNorm1d(10)

BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

In [35]:
import torch.optim as optim

# Simple model without BatchNorm
class NoBatchNorm(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc = nn.Linear(3, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        return self.relu(self.fc(x))

x = torch.randn(100, 3)
optimizer = optim.SGD(model.parameters(), lr=0.1)  # Initial LR = 0.1

# Using StepLR
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

model = NoBatchNorm()

for epoch in range(30):  
    model(x)  # Your training loop
    scheduler.step()  # Update learning rate
    
    print(f"Epoch {epoch+1}, Learning Rate: {scheduler.get_last_lr()}")


Epoch 1, Learning Rate: [0.1]
Epoch 2, Learning Rate: [0.1]
Epoch 3, Learning Rate: [0.1]
Epoch 4, Learning Rate: [0.1]
Epoch 5, Learning Rate: [0.1]
Epoch 6, Learning Rate: [0.1]
Epoch 7, Learning Rate: [0.1]
Epoch 8, Learning Rate: [0.1]
Epoch 9, Learning Rate: [0.1]
Epoch 10, Learning Rate: [0.010000000000000002]
Epoch 11, Learning Rate: [0.010000000000000002]
Epoch 12, Learning Rate: [0.010000000000000002]
Epoch 13, Learning Rate: [0.010000000000000002]
Epoch 14, Learning Rate: [0.010000000000000002]
Epoch 15, Learning Rate: [0.010000000000000002]
Epoch 16, Learning Rate: [0.010000000000000002]
Epoch 17, Learning Rate: [0.010000000000000002]
Epoch 18, Learning Rate: [0.010000000000000002]
Epoch 19, Learning Rate: [0.010000000000000002]
Epoch 20, Learning Rate: [0.0010000000000000002]
Epoch 21, Learning Rate: [0.0010000000000000002]
Epoch 22, Learning Rate: [0.0010000000000000002]
Epoch 23, Learning Rate: [0.0010000000000000002]
Epoch 24, Learning Rate: [0.0010000000000000002]
Epoch



In [37]:
# Optimizer with initial learning rate = 0.1
optimizer = optim.SGD(model.parameters(), lr=0.1)

scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

for epoch in range(10):
    optimizer.step()
    scheduler.step()
    
    print(f"Epoch {epoch+1}, Learning Rate: {scheduler.get_last_lr()}")


Epoch 1, Learning Rate: [0.09000000000000001]
Epoch 2, Learning Rate: [0.08100000000000002]
Epoch 3, Learning Rate: [0.07290000000000002]
Epoch 4, Learning Rate: [0.06561000000000002]
Epoch 5, Learning Rate: [0.05904900000000002]
Epoch 6, Learning Rate: [0.05314410000000002]
Epoch 7, Learning Rate: [0.04782969000000002]
Epoch 8, Learning Rate: [0.043046721000000024]
Epoch 9, Learning Rate: [0.03874204890000002]
Epoch 10, Learning Rate: [0.03486784401000002]


In [41]:
optimizer = optim.SGD(model.parameters(), lr=0.1)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5)

for epoch in range(10):
    if epoch < 3:
        loss = 1.0 / (epoch + 1)  # Simulated decreasing loss
    else:
        loss = loss
    scheduler.step(loss)
    
    print(f"Epoch {epoch+1}, Learning Rate: {scheduler.get_last_lr()}, loss: {loss}")


Epoch 1, Learning Rate: [0.1], loss: 1.0
Epoch 2, Learning Rate: [0.1], loss: 0.5
Epoch 3, Learning Rate: [0.1], loss: 0.3333333333333333
Epoch 4, Learning Rate: [0.1], loss: 0.3333333333333333
Epoch 5, Learning Rate: [0.1], loss: 0.3333333333333333
Epoch 6, Learning Rate: [0.1], loss: 0.3333333333333333
Epoch 7, Learning Rate: [0.05], loss: 0.3333333333333333
Epoch 8, Learning Rate: [0.05], loss: 0.3333333333333333
Epoch 9, Learning Rate: [0.05], loss: 0.3333333333333333
Epoch 10, Learning Rate: [0.05], loss: 0.3333333333333333
