## ex3.

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

class PRelu(nn.Module):
    def __init__(self, num_parameters=1):
        super(PRelu, self).__init__()
        self.num_parameters = num_parameters
        self.a = nn.Parameter(torch.Tensor(num_parameters))
        self.a.data.fill_(0.25)  # Initialize with a default value of 0.25

    def forward(self, x):
        pos = torch.nn.functional.relu(x)
        neg = self.a * (x - abs(x)) * 0.5
        return pos + neg

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.prelu1 = PRelu(256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = self.prelu1(x)
        x = self.fc2(x)
        return x

def train(model, train_loader, optimizer, criterion, device):
    model.train()
    train_loss = 0.0
    correct = 0
    total = 0

    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

    avg_loss = train_loss / len(train_loader)
    accuracy = 100.0 * correct / total

    return avg_loss, accuracy

def evaluate(model, test_loader, criterion, device):
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    avg_loss = test_loss / len(test_loader)
    accuracy = 100.0 * correct / total

    return avg_loss, accuracy

def main():
    torch.manual_seed(42)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])

    train_dataset = datasets.MNIST(
        root='./data', train=True, download=True, transform=transform)
    test_dataset = datasets.MNIST(
        root='./data', train=False, download=True, transform=transform)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=64, shuffle=True)
    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=64, shuffle=False)

    # Initialize the model and optimizer
    model = Net().to(device)
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    criterion = nn.CrossEntropyLoss()

    # Training loop
    num_epochs = 10
    train_losses = []
    train_accuracies = []
    test_losses = []
    test_accuracies = []

    for epoch in range(num_epochs):
        train_loss, train_acc = train(model, train_loader, optimizer, criterion, device)
        test_loss, test_acc = evaluate(model, test_loader, criterion, device)

        train_losses.append(train_loss)
        train_accuracies.append(train_acc)
        test_losses.append(test_loss)
        test_accuracies.append(test_acc)

        print(f"Epoch {epoch+1}/{num_epochs}:")
        print(f"Train Loss: {train_loss:.4f} | Train Accuracy: {train_acc:.2f}%")
        print(f"Test Loss: {test_loss:.4f} | Test Accuracy: {test_acc:.2f}%")
        print()


if __name__ == '__main__':
    main()


Epoch 1/10:
Train Loss: 0.2919 | Train Accuracy: 91.43%
Test Loss: 0.1607 | Test Accuracy: 95.12%

Epoch 2/10:
Train Loss: 0.1333 | Train Accuracy: 96.06%
Test Loss: 0.1052 | Test Accuracy: 96.74%

Epoch 3/10:
Train Loss: 0.0889 | Train Accuracy: 97.35%
Test Loss: 0.0808 | Test Accuracy: 97.48%

Epoch 4/10:
Train Loss: 0.0654 | Train Accuracy: 98.03%
Test Loss: 0.0784 | Test Accuracy: 97.53%

Epoch 5/10:
Train Loss: 0.0510 | Train Accuracy: 98.48%
Test Loss: 0.0699 | Test Accuracy: 97.81%

Epoch 6/10:
Train Loss: 0.0391 | Train Accuracy: 98.82%
Test Loss: 0.0676 | Test Accuracy: 97.87%

Epoch 7/10:
Train Loss: 0.0302 | Train Accuracy: 99.12%
Test Loss: 0.0663 | Test Accuracy: 97.89%

Epoch 8/10:
Train Loss: 0.0235 | Train Accuracy: 99.40%
Test Loss: 0.0591 | Test Accuracy: 98.09%

Epoch 9/10:
Train Loss: 0.0188 | Train Accuracy: 99.52%
Test Loss: 0.0612 | Test Accuracy: 98.10%

Epoch 10/10:
Train Loss: 0.0151 | Train Accuracy: 99.64%
Test Loss: 0.0597 | Test Accuracy: 98.09%



comparison with pervious module: 

ex3 introduces the Parametric ReLU activation function, which enhances the traditional ReLU by incorporating a learnable weight vector. This allows for adaptive activation patterns and introduces additional flexibility to the model. On the other hand, ex2 implements the dropout technique, which randomly sets elements of the input to zero during training. This helps prevent overfitting by encouraging the model to learn more robust representations and reduces reliance on specific features. 