# Softmax and Crossentropy

## Softmax

$$S(y_i)=\frac{e^y_{i}}{\sum{e^y_{i}}}$$

 

In [2]:
import torch
import torch.nn as nn
import numpy as np

In [3]:
# sofmax function

def softmax(x):
  return np.exp(x) / np.sum(np.exp(x), axis = 0)

x = np.array([2.0,1.0,0.1,0.4])
outputs = softmax(x)
print('softmax numpy:', outputs)

softmax numpy: [0.58161698 0.21396493 0.08699165 0.11742644]


In [6]:
x = torch.tensor([2.0,1.0,0.1,0.4])
outputs = torch.softmax(x, dim = 0)
print('softmax torch:', outputs)

softmax torch: tensor([0.5816, 0.2140, 0.0870, 0.1174])


## Cross-Entropy

$$D(\hat{Y}, Y)=-\frac{1}{N}\sum Y_{i}\times \log(\hat{Y_{i}})$$

In [9]:
def cross_entropy(actual, pred):
  loss = -np.sum(actual*np.log(pred))
  return loss / float(pred.shape[0])

Y = np.array([1,0,0])

# y_pred prob
Y_pred_good = np.array([0.7, 0.2,0.1])
Y_pred_bad = np.array([0.1,0.3,0.6])
l1 = cross_entropy(Y, Y_pred_good)
l2 = cross_entropy(Y, Y_pred_bad)

print(f'Loss 1 numpy: {l1:.4f}')
print(f'Loss 2 numpy: {l2:.4f}')

Loss 1 numpy: 0.1189
Loss 2 numpy: 0.7675


In [16]:
# pytorch 

loss = nn.CrossEntropyLoss()

Y = torch.tensor([0])

# nsamples x nclasses = 1x3
Y_pred_good = torch.tensor([[2.0,1.0,0.3]])
Y_pred_bad = torch.tensor([[0.4,2.3,0.23]])

l1 = loss(Y_pred_good,Y)
l2 = loss(Y_pred_bad, Y)

print(l1.item())
print(l2.item())

_, predictions1 = torch.max(Y_pred_good,1)
_, predict ions2 = torch.max(Y_pred_bad,1)

print(predictions1)
print(predictions2)

0.438618004322052
2.1435375213623047
tensor([0])
tensor([1])


In [24]:
# multiclass

x = torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
y = torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)

n_samples, n_features = x.shape
print(n_samples, n_features)

x_test = torch.tensor([5], dtype=torch.float32)
input_size = n_features
output_size = n_features

class NeuralNet(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes):
    super(NeuralNet, self).__init__()
    self.linear1 = nn.Linear(input_size, hidden_size)
    self.relu = nn.ReLU()
    self.linear2 = nn.Linear(hidden_size, num_classes)

  def forward(self,x):
    out = self.linear1(x)
    out = self.relu(out)
    out = self.linear2(out)

    return out #no softmax at the end

model = NeuralNet(input_size=input_size, hidden_size=5, num_classes = 1)
#criterion = nn.CrossEntropyLoss() #applies softmax

# training
learning_rate = 0.01
n_iters = 100

criterion =  nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate) # stochastic gradient descent

for epoch in range(n_iters):
  # prediction = forward pass
  y_pred = model(x)

  # loss
  l = criterion(y, y_pred)

  # gradients = backward pass
  l.backward() # dl/dw

  # update weights
  optimizer.step()

  # zero gradients
  optimizer.zero_grad()

  if epoch % 10 == 0:
    print(f'epoch {epoch+1}, loss = {l:.8f}')

print(f'Prediction before training: f(5) = {model(x_test).item():.3f}')


4 1
epoch 1, loss = 27.93873978
epoch 11, loss = 0.00007824
epoch 21, loss = 0.00006234
epoch 31, loss = 0.00004970
epoch 41, loss = 0.00003962
epoch 51, loss = 0.00003159
epoch 61, loss = 0.00002519
epoch 71, loss = 0.00002008
epoch 81, loss = 0.00001602
epoch 91, loss = 0.00001277
Prediction before training: f(5) = 10.005
