<a href="https://colab.research.google.com/github/Tamilselvan-AIengineer/Deep_Learning_Techniques_-21AIC301J/blob/main/activation_function.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# MNIST dataset (images are 28x28)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.view(-1))  # flatten 28x28 -> 784
])

train_dataset = datasets.MNIST(root="./data", train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root="./data", train=False, transform=transform)

train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

# Feed-forward Neural Network
class FFNN(nn.Module):
    def __init__(self, activation_fn):
        super(FFNN, self).__init__()
        self.fc1 = nn.Linear(784, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)
        self.activation = activation_fn

    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.fc3(x)  # logits
        return x

# Train function
def train_model(activation_fn, epochs=5):
    model = FFNN(activation_fn).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss/len(train_loader):.4f}")

    # Evaluation
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    acc = 100 * correct / total
    return acc

# Study different activations
activations = {
    "ReLU": nn.ReLU(),
    "Sigmoid": nn.Sigmoid(),
    "Tanh": nn.Tanh()
}

results = {}
for name, act_fn in activations.items():
    print(f"\nTraining with activation: {name}")
    acc = train_model(act_fn, epochs=5)
    results[name] = acc

print("\nFinal Test Accuracy:")
for act, acc in results.items():
    print(f"{act}: {acc:.2f}%")


100%|██████████| 9.91M/9.91M [00:00<00:00, 17.7MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 481kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.45MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 5.81MB/s]



Training with activation: ReLU
Epoch [1/5], Loss: 0.3372
Epoch [2/5], Loss: 0.1403
Epoch [3/5], Loss: 0.0963
Epoch [4/5], Loss: 0.0728
Epoch [5/5], Loss: 0.0558

Training with activation: Sigmoid
Epoch [1/5], Loss: 0.6972
Epoch [2/5], Loss: 0.2206
Epoch [3/5], Loss: 0.1556
Epoch [4/5], Loss: 0.1168
Epoch [5/5], Loss: 0.0922

Training with activation: Tanh
Epoch [1/5], Loss: 0.3340
Epoch [2/5], Loss: 0.1378
Epoch [3/5], Loss: 0.0939
Epoch [4/5], Loss: 0.0692
Epoch [5/5], Loss: 0.0510

Final Test Accuracy:
ReLU: 96.96%
Sigmoid: 96.93%
Tanh: 97.65%
