![soft_conversion](./figs/soft_conv.png)

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

torch.manual_seed(1)

<torch._C.Generator at 0x10633cdf0>

In [3]:
from torch.utils.data import DataLoader, TensorDataset

def get_data():
    x_train = [[1, 2, 1, 1],
           [2, 1, 3, 2],
           [3, 1, 3, 4],
           [4, 1, 5, 5],
           [1, 7, 5, 5],
           [1, 2, 5, 6],
           [1, 6, 6, 6],
           [1, 7, 7, 7]]
    y_train = [2, 2, 2, 1, 1, 1, 0, 0]
    x_train = torch.FloatTensor(x_train)
    y_train = torch.LongTensor(y_train)

    # one hot encoding for y
    y_one_hot = torch.zeros(8, 3)
    y_one_hot.scatter_(1, y_train.unsqueeze(1), 1)

    return DataLoader(dataset=TensorDataset(x_train, y_one_hot), batch_size=2, shuffle=True)

## Low level softmax

In [5]:
W = torch.zeros((4, 3), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

optim = torch.optim.SGD([W, b], lr=0.1)

In [6]:
epochs = 100000
loader = get_data()

for epoch in range(1 + epochs):
    for data in loader:
        X, y = data

        forward = F.softmax(torch.matmul(X, W) + b, dim=1)
        loss = (y * -torch.log(forward)).sum(dim=1).mean()

        optim.zero_grad()
        loss.backward()
        optim.step()

    if epoch % 10000 == 0:
        print(f'Epoch: {epoch:4d}/{epochs:4d}, loss: {loss:.4f} ')


Epoch:    0/100000, loss: 5.8315 
Epoch: 10000/100000, loss: 0.0138 
Epoch: 20000/100000, loss: 0.0115 
Epoch: 30000/100000, loss: 0.0078 
Epoch: 40000/100000, loss: 0.0026 
Epoch: 50000/100000, loss: 0.0011 
Epoch: 60000/100000, loss: 0.0000 
Epoch: 70000/100000, loss: 0.0037 
Epoch: 80000/100000, loss: 0.0017 
Epoch: 90000/100000, loss: 0.0015 
Epoch: 100000/100000, loss: nan 


## High level softmax

In [7]:
W = torch.zeros((4, 3), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

optim = torch.optim.SGD([W, b], lr=0.1)

In [8]:
epochs = 100000
loader = get_data()

for epoch in range(1 + epochs):
    for data in loader:
        X, y = data

        z = torch.matmul(X, W) + b
        loss = F.cross_entropy(z, y)

        optim.zero_grad()
        loss.backward()
        optim.step()

    if epoch % 10000 == 0:
        print(f'Epoch: {epoch:4d}/{epochs:4d}, loss: {loss:.4f} ')


Epoch:    0/100000, loss: 11.5029 
Epoch: 10000/100000, loss: 0.0144 
Epoch: 20000/100000, loss: 0.0063 
Epoch: 30000/100000, loss: 0.0078 
Epoch: 40000/100000, loss: 0.0001 
Epoch: 50000/100000, loss: 0.0027 
Epoch: 60000/100000, loss: 0.0018 
Epoch: 70000/100000, loss: 0.0020 
Epoch: 80000/100000, loss: 0.0014 
Epoch: 90000/100000, loss: 0.0012 
Epoch: 100000/100000, loss: 0.0024 
