### **Softmax Function and Cross Entropy Loss in Pytorch:**



In [12]:
import torch
import torch.nn as nn
import numpy as np

In [13]:
# softmax using numpy
def softmax(x):
  return np.exp(x) / np.sum(np.exp(x),axis = 0) # e^x / sum(e^x)

x = np.array([2.0,1.0,0.1])
outputs = softmax(x)
print("softmax using numpy:",outputs)

softmax using numpy: [0.65900114 0.24243297 0.09856589]


In [14]:
# softmax using torch

x = torch.tensor([2.0,1.0,0.1])
outputs = torch.softmax(x,dim = 0)
print("softmax using torch",outputs)

softmax using torch tensor([0.6590, 0.2424, 0.0986])


**CrossEntropy loss** can be used in binary and multiclass classification.

> lower the loss , higher the accuracy.

In [15]:
# crossentropy loss using numpy

def cross_entropy(true,pred):
  loss = -np.sum(true * np.log(pred)) # -1/N(y*log(y'))
  return loss

# y must be one hot encoded.
# if class 0: [1 0 0], if class 1: [0 1 0], if class 2: [0 0 1]

y = np.array([1,0,0])

y_predG = np.array([0.7,0.2,0.1]) # good pred
y_predB = np.array([0.1,0.3,0.6]) # bad pred
l1 = cross_entropy(y,y_predG)
l2 = cross_entropy(y,y_predB)
print(f"loss1: {l1:.4f}")
print(f"loss2: {l2:.4f}")

loss1: 0.3567
loss2: 2.3026


**Careful!!**

> *nn.CrossEntropyLoss* applies on **nn.LogSoftmax + nn.NLLLoss (negative log likelihood loss).** 
so, no softmax in last layer.

> Y has class labels, not One-Hot!
Y_pred has raw scores (logits),no softmax.


In [16]:
# crossentropy loss using torch

loss = nn.CrossEntropyLoss()

Y = torch.tensor([0])

# n_sample x n_classes = 1 x 3

Y_predG = torch.tensor([[2.0,1.0,0.1]])
Y_predB = torch.tensor([[0.5,2.0,0.3]])

l1 = loss(Y_predG,Y)
l2 = loss(Y_predB,Y)

print(l1.item())
print(l2.item())

# get predicted class
_,pred1 = torch.max(Y_predG,1)
_,pred2 = torch.max(Y_predB,1)

print(pred1)
print(pred2)

0.4170299470424652
1.840616226196289
tensor([0])
tensor([1])


In [17]:
# increasing samples

loss = nn.CrossEntropyLoss()

Y = torch.tensor([2,0,1])

# n_sample x n_classes = 3 x 3

Y_predG = torch.tensor([[0.5,1.0,2.1],[2.0,1.0,0.1],[0.5,3.0,2.1]])
Y_predB = torch.tensor([[2.0,1.0,0.1],[0.1,1.0,2.1],[0.1,3.0,0.1]])

l1 = loss(Y_predG,Y)
l2 = loss(Y_predB,Y)

print(l1.item())
print(l2.item())

# get predicted class
_,pred1 = torch.max(Y_predG,1)
_,pred2 = torch.max(Y_predB,1)

print(pred1)
print(pred2)

0.414427250623703
1.6018242835998535
tensor([2, 0, 1])
tensor([0, 2, 1])


### **A multiclass problem in pytorch**

In [18]:
import torch
import torch.nn as nn

class NeuralNetMultiClass(nn.Module):
  def __init__(self,input_size,hidden_size,num_classes):
    super(NeuralNetMultiClass,self).__init__()
    # here define layers
    self.linear1 = nn.Linear(input_size,hidden_size)
    self.relu = nn.ReLU()
    self.linear2 = nn.Linear(hidden_size,num_classes)

  def forward(self,x):
    out = self.linear1(x)
    out = self.relu(out)
    out = self.linear2(out) 
    # no softmax at the end
    return out

model = NeuralNetMultiClass(input_size = 28*28,hidden_size = 5,num_classes = 3)
criterion = nn.CrossEntropyLoss() # applies softmax

### **A binaryclass problem in pytorch**

In [19]:
import torch
import torch.nn as nn

class NeuralNetBinaryClass(nn.Module):
  def __init__(self,input_size,hidden_size):
    super(NeuralNetBinaryClass,self).__init__()
    self.linear1 = nn.Linear(input_size,hidden_size)
    self.relu = nn.ReLU()
    self.linear2 = nn.Linear(hidden_size,1)

  def forward(self,x):
    out = self.linear1(x)
    out = self.relu(out)
    out = self.linear2(out) 
    # sigmoid at the end
    y_pred = torch.sigmoid(out)
    return y_pred

model = NeuralNetBinaryClass(input_size = 28*28,hidden_size = 5)
criterion = nn.BCELoss()

### **Activation Function**

If there is no activation function then our model is simple stacked linear model, which can  not learn the complex function.

Each weighted input is aplied on activation function so that model can learn complex (non linear) pattern and give better result. 

> *with ativation function our model can withstand on non linear data also.*

> *step, relu,leaky relu, tanh, softmax,sigmoid are some ativation fn.*

In [20]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# option 1 : create nn models
class NeuralNet(nn.Module):
  def __init__(self,input_size,hidden_size):
    super(NeuralNet,self).__init__()
    self.linear1 = nn.Linear(input_size,hidden_size)
    self.relu = nn.ReLU() # activation fn
    self.linear2 = nn.Linear(hidden_size,1)
    self.sigmoid = nn.Sigmoid() # activation fn

  def forward(self,x):
    out = self.linear1(x)
    out = self.relu(out)
    out = self.linear2(out) 
    out = self.sigmoid(out)
    return out

# option 2 : use activation function directly in forward pass (not in init)
class NeuralNet(nn.Module):
  def __init__(self,input_size,hidden_size):
    super(NeuralNetBinaryClass,self).__init__()
    self.linear1 = nn.Linear(input_size,hidden_size)
    self.linear2 = nn.Linear(hidden_size,1)

  def forward(self,x):
    out = torch.relu(self.linear1(x))
    out = torch.sigmoid(self.linear2(out))
    return out


**some activation function available in:**

**1) nn modules (used inside __init__):**
nn.ReLU(),
nn.Tanh(),
nn.LeakyReLU(),
nn.Softmax(),
nn.Sigmoid()

**2) torch package (used inside forward function):**
torch.relu(),
torch.sigmoid(),
torch.softmax(),
torch.tanh()

>*if some fn are not avilable in torch. package, then they are available 
in **torch.nn.functional as F** package.*

>F.LeakyReLU(),
F.relu()....etc.