<a href="https://colab.research.google.com/github/HabibMbow94/Convolution_Neural_Network/blob/main/Activation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# IMPORT LIBRAIRIES AND DATASETS
___

In [None]:
import random

import imageio
from tqdm import tqdm 
import matplotlib.pyplot as plt
import numpy as np
import PIL
import skimage.transform
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
import torchvision
from IPython import display
from torchvision import datasets, transforms
import torchvision.transforms as transforms


In [None]:
#define the function hyperbolic tangent
def sigmoid_f(x):
  sigmoid = 1/(1 + torch.exp(-x))
  return sigmoid

# define the function derivate hyperbolic tangent
def d_sigmoid_f(x):
  d_sigmoid = sigmoid_f(x)*(1-sigmoid_f(x))
  return d_sigmoid

In [None]:
#define the function hyperbolic tangent
def tanh_f(x):
  tan_h = (torch.exp(x) - torch.exp(-x))/(torch.exp(x) + torch.exp(-x))
  return tan_h

# define the function derivate hyperbolic tangent
def dtanh(x):
  d_tan_h = 1 - torch.square(tanh_f(x))
  return d_tan_h

In [None]:
# #define the function hyperbolic tangent 
def reLU_f(x):
  relu = torch.where(x>0, x, 0)
  relu.detach().numpy()
  print(relu.dtype)
  return relu

# # define the function derivate hyperbolic tangent
# def d_reLU_f(x):
#   d_relu = torch.max(0, 1)
#   return d_relu

In [None]:
def swish(x):
  
  return x*sigmoid_f(x)

def d_swish(x):
  
  d = sigmoid_f(x) + x*sigmoid_f(x)*(1 - swish(x))
  return d

In [None]:
def softplus(x):
  return torch.log(1+torch.exp(x))

def mish(x):
 
  return x*tanh_f(softplus(x))

def d_mish(beta,x):
  
  d= mish(x)/x +x*sigmoid_f(x)*(1 - torch.square(tanh_f(softplus(x))))
  return d

__Dataset__
Load MNIST and define train/test functions as before. Please make sure you read the code carefully and understand what it is doing.

In [None]:

torch.manual_seed(0)

# Load the training and test dataset.
mnist_train = datasets.MNIST(
    "/tmp/mnist", train=True, download=True, transform=transforms.ToTensor()
)
mnist_test = datasets.MNIST(
    "/tmp/mnist", train=False, download=True, transform=transforms.ToTensor()
)

# Size of the batches the data loader will produce.
batch_size = 64

# This creates the dataloaders.
train_loader = torch.utils.data.DataLoader(
    mnist_train, batch_size=batch_size, shuffle=True
)
test_loader = torch.utils.data.DataLoader(
    mnist_test, batch_size=batch_size, shuffle=False
)

In [None]:
batch, targets = iter(train_loader).next()
image, target = batch[0], targets[0]
print("Size of the image:", image.size())

# Convolutional Neural Network (CNN)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
#definie the train function
def train(model, criterion, data_loader, optimizer, num_epochs):
    """Simple training loop for a PyTorch model."""

    # Make sure model is in training mode.
    model.train()

    # Move model to the device (CPU or GPU).
    model.to(device)

    # Exponential moving average of the loss.
    ema_loss = None

    # Loop over epochs.
    for epoch in tqdm(range(num_epochs)):

        # Loop over data.
        for batch_idx, (data, target) in enumerate(data_loader):

            # Forward pass.
            output = model(data.to(device))
            loss = criterion(output.to(device), target.to(device))

            # Backward pass.
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # NOTE: It is important to call .item() on the loss before summing.
            if ema_loss is None:
                ema_loss = loss.item()
            else:
                ema_loss += (loss.item() - ema_loss) * 0.01

        # Print out progress the end of epoch.
        print(
            "Train Epoch: {} \ttrain Loss: {:.6f}".format(epoch, ema_loss),
        )


def test(model, data_loader, name_of_ac, use_pytorch):
    """Measures the accuracy of a model on a data set."""
    # Make sure the model is in evaluation mode.
    model.eval()
    correct = 0

    # We do not need to maintain intermediate activations while testing.
    with torch.no_grad():

        # Loop over test data.
        for data, target in data_loader:

            # Forward pass.
            output = model(data.to(device))

            # Get the label corresponding to the highest predicted probability.
            pred = output.argmax(dim=1, keepdim=True)

            # Count number of correct predictions.
            correct += pred.cpu().eq(target.view_as(pred)).sum().item()

    # Print test accuracy.
    percent = 100.0 * correct / len(data_loader.dataset)
    
    print(f"Accuracy from {name_of_ac.upper()} activation function "+ ("using" if use_pytorch else "without" )+ f" pytorch implementation : {correct} / {len(data_loader.dataset)} ({percent:.0f}%)")
    print("\n"*3)

# IMPLEMENTATION THE CONVOLUTIONAL NEURAL NETWORK

In [None]:
class DNN(nn.Module):
  """Simple convolutional network."""

  def __init__(self, image_side_size, num_classes, act_fc, use_pytorch_ac = False, in_channels=1):
    super().__init__()
    AC = {"relu":reLU_f, "tanh" : tanh_f, "swish": swish, "mish": mish}
    AC_pytorch = {"relu":F.relu, "tanh" : F.tanh, "swish": F.silu, "mish": F.mish}
    self.conv1 = nn.Conv2d(in_channels,image_side_size, 3,2,3)
    self.conv2 = nn.Conv2d(image_side_size, image_side_size,3,1,3)
    self.conv3 = nn.Conv2d(image_side_size,2*image_side_size, 3,1,2)
    self.conv4 = nn.Conv2d(2*image_side_size,2*image_side_size,3,1,1)
    self.conv5 = nn.Conv2d(2*image_side_size,1,3,1,0)
    self.linear = nn.Linear((image_side_size-8)*(image_side_size-8), num_classes)
    self.activation_functions = AC_pytorch[act_fc] if use_pytorch_ac else AC[act_fc]


  def forward(self, x):
    x = self.conv1(x)
    print(x.size())
    x = self.activation_functions(x)
    x = self.activation_functions(x)
    x = self.conv3(x)
    x = self.activation_functions(x)
    x = self.conv4(x)
    x = self.activation_functions(x)
    x = self.conv5(x)
    x = self.activation_functions(x)
    x = self.linear(x.view(x.size(0), -1))
    return x

criterion = nn.CrossEntropyLoss()

In [None]:
def my_function(a_f, use_pytorch = False):
  conv_model = DNN(28, 10, act_fc = a_f, use_pytorch_ac = use_pytorch)
  optimizer = torch.optim.SGD(conv_model.parameters(), lr=0.01, momentum=0.9)
  train(conv_model, criterion, train_loader, optimizer, num_epochs=10)
  test(conv_model, test_loader,name_of_ac=a_f, use_pytorch=use_pytorch)

In [None]:
for a_c in ['tanh','relu','swish','mish']:
  my_function(a_c, use_pytorch=True)

```markdown

num_epochs=5 



0%|          | 0/5 [00:00<?, ?it/s]/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:1933: UserWarning: nn.functional.tanh is deprecated. Use torch.tanh instead.
  warnings.warn("nn.functional.tanh is deprecated. Use torch.tanh instead.")
 20%|██        | 1/5 [00:16<01:05, 16.35s/it]Train Epoch: 0 	train Loss: 0.288599
 40%|████      | 2/5 [00:32<00:48, 16.20s/it]Train Epoch: 1 	train Loss: 0.185475
 60%|██████    | 3/5 [00:48<00:32, 16.14s/it]Train Epoch: 2 	train Loss: 0.123489
 80%|████████  | 4/5 [01:04<00:16, 16.25s/it]Train Epoch: 3 	train Loss: 0.110499
100%|██████████| 5/5 [01:20<00:00, 16.19s/it]Train Epoch: 4 	train Loss: 0.074640

Accuracy from TANH activation function using pytorch implementation : 9769 / 10000 (97.6900%)




 20%|██        | 1/5 [00:16<01:04, 16.14s/it]Train Epoch: 0 	train Loss: 0.190975
 40%|████      | 2/5 [00:32<00:48, 16.17s/it]Train Epoch: 1 	train Loss: 0.112485
 60%|██████    | 3/5 [00:48<00:32, 16.11s/it]Train Epoch: 2 	train Loss: 0.080574
 80%|████████  | 4/5 [01:04<00:16, 16.03s/it]Train Epoch: 3 	train Loss: 0.067419
100%|██████████| 5/5 [01:20<00:00, 16.07s/it]Train Epoch: 4 	train Loss: 0.057721

Accuracy from RELU activation function using pytorch implementation : 9832 / 10000 (98.3200%)




 20%|██        | 1/5 [00:16<01:05, 16.37s/it]Train Epoch: 0 	train Loss: 2.300586
 40%|████      | 2/5 [00:32<00:48, 16.14s/it]Train Epoch: 1 	train Loss: 0.284398
 60%|██████    | 3/5 [00:48<00:32, 16.06s/it]Train Epoch: 2 	train Loss: 0.108999
 80%|████████  | 4/5 [01:04<00:16, 16.04s/it]Train Epoch: 3 	train Loss: 0.087341
100%|██████████| 5/5 [01:20<00:00, 16.10s/it]Train Epoch: 4 	train Loss: 0.063580

Accuracy from SWISH activation function using pytorch implementation : 9799 / 10000 (97.9900%)




 20%|██        | 1/5 [00:17<01:09, 17.50s/it]Train Epoch: 0 	train Loss: 0.485544
 40%|████      | 2/5 [00:33<00:50, 16.82s/it]Train Epoch: 1 	train Loss: 0.143080
 60%|██████    | 3/5 [00:50<00:33, 16.54s/it]Train Epoch: 2 	train Loss: 0.089500
 80%|████████  | 4/5 [01:06<00:16, 16.40s/it]Train Epoch: 3 	train Loss: 0.075455
100%|██████████| 5/5 [01:22<00:00, 16.44s/it]Train Epoch: 4 	train Loss: 0.057078

Accuracy from MISH activation function using pytorch implementation : 9801 / 10000 (98.0100%)

```

In [None]:
#define the function hyperbolic tangent
def reLU_f(x):
  # relu = torch.tensor(torch.where(x>0, x, 0))
  relu= x if x>0 else 0
  relu.cpu().numpy()
  # print(relu.dtype)
  return relu

# define the function derivate hyperbolic tangent
def d_reLU_f(x):
  d_relu = torch.max(0, 1)
  return d_relu

In [None]:
reLU_f(1)

In [None]:
for a_c in ['tanh','swish','mish']:
  my_function(a_c, use_pytorch=False)

```markdown

for a_c in ['tanh','swish','mish']:
  my_function(a_c, use_pytorch=False)

  

 20%|██        | 1/5 [00:16<01:06, 16.67s/it]Train Epoch: 0 	train Loss: 0.313319
 40%|████      | 2/5 [00:33<00:49, 16.62s/it]Train Epoch: 1 	train Loss: 0.292987
 60%|██████    | 3/5 [00:50<00:33, 16.73s/it]Train Epoch: 2 	train Loss: 0.273542
 80%|████████  | 4/5 [01:07<00:16, 16.92s/it]Train Epoch: 3 	train Loss: 0.267651
100%|██████████| 5/5 [01:24<00:00, 16.82s/it]Train Epoch: 4 	train Loss: 0.256078

Accuracy from TANH activation function without pytorch implementation : 9229 / 10000 (92.2900%)




 20%|██        | 1/5 [00:15<01:03, 15.78s/it]Train Epoch: 0 	train Loss: 0.344519
 40%|████      | 2/5 [00:30<00:46, 15.36s/it]Train Epoch: 1 	train Loss: 0.311537
 60%|██████    | 3/5 [00:45<00:30, 15.26s/it]Train Epoch: 2 	train Loss: 0.296040
 80%|████████  | 4/5 [01:00<00:15, 15.14s/it]Train Epoch: 3 	train Loss: 0.287173
100%|██████████| 5/5 [01:15<00:00, 15.17s/it]Train Epoch: 4 	train Loss: 0.284760

Accuracy from SWISH activation function without pytorch implementation : 9216 / 10000 (92.1600%)




 20%|██        | 1/5 [00:17<01:11, 17.80s/it]Train Epoch: 0 	train Loss: 0.322612
 40%|████      | 2/5 [00:35<00:53, 17.91s/it]Train Epoch: 1 	train Loss: 0.299359
 60%|██████    | 3/5 [00:53<00:35, 17.70s/it]Train Epoch: 2 	train Loss: 0.266518
 80%|████████  | 4/5 [01:10<00:17, 17.59s/it]Train Epoch: 3 	train Loss: 0.270700
100%|██████████| 5/5 [01:28<00:00, 17.60s/it]Train Epoch: 4 	train Loss: 0.256243

Accuracy from MISH activation function without pytorch implementation : 9283 / 10000 (92.8300%)
```