In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init

# Load data
X = torch.tensor([[2.4]], dtype=torch.float32)
y = torch.tensor([0], dtype=torch.int64)

In [2]:
print(X)
print(y)

tensor([[2.4000]])
tensor([0])


In [4]:
# Define the model
class SoftmaxRegression(nn.Module):
    def __init__(self):
        super(SoftmaxRegression, self).__init__()
        self.linear1 = nn.Linear(1, 1)
        self.linear2 = nn.Linear(1, 2)
        self.sigmoid = nn.Sigmoid()

        # Initialize weights
        init.normal_(self.linear1.weight, mean=0, std=10)
        init.normal_(self.linear2.weight, mean=0, std=10)

        # If you also want to initialize the biases to zero (this is optional)
        init.zeros_(self.linear1.bias)
        init.zeros_(self.linear2.bias)
        
    def forward(self, x):
        x = self.linear1(x)
        x = self.sigmoid(x)
        x = self.linear2(x)
        return x

model = SoftmaxRegression()

In [5]:
print(model.linear1.weight)
print(model.linear1.bias)

Parameter containing:
tensor([[6.7488]], requires_grad=True)
Parameter containing:
tensor([0.], requires_grad=True)


In [6]:
print(model.linear2.weight)
print(model.linear2.bias)

Parameter containing:
tensor([[ 9.8082],
        [13.3538]], requires_grad=True)
Parameter containing:
tensor([0., 0.], requires_grad=True)


In [7]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop
max_epoch = 1
for epoch in range(max_epoch):
    xi = X[0].unsqueeze(0)
    yi = y[0].unsqueeze(0)

    print(f'xi: {xi}')
    print(f'yi: {yi}')
    
    # Zero the gradients
    optimizer.zero_grad()
    
    # Forward pass
    outputs = model(xi)
    print(f'outputs: {outputs.data}')
    
    loss = criterion(outputs, yi)
    print(f'loss: {loss}')
    
    # Backward pass and optimization
    loss.backward()
    optimizer.step()
    break

xi: tensor([[2.4000]])
yi: tensor([0])
outputs: tensor([[ 9.8082, 13.3538]])
loss: 3.574042797088623


In [8]:
print(model.linear1.weight.grad)
print(model.linear1.bias.grad)

tensor([[9.8596e-07]])
tensor([4.1082e-07])


In [9]:
print(model.linear2.weight.grad)
print(model.linear2.bias.grad)

tensor([[-0.9720],
        [ 0.9720]])
tensor([-0.9720,  0.9720])


In [10]:
print(model.linear1.weight)
print(model.linear1.bias)

Parameter containing:
tensor([[6.7488]], requires_grad=True)
Parameter containing:
tensor([-4.1082e-09], requires_grad=True)


In [11]:
print(model.linear2.weight)
print(model.linear2.bias)

Parameter containing:
tensor([[ 9.8179],
        [13.3441]], requires_grad=True)
Parameter containing:
tensor([ 0.0097, -0.0097], requires_grad=True)
