<a href="https://colab.research.google.com/github/Redcoder815/Deep_Learning_PyTorch/blob/main/07DropOut.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
from torch.utils import data
from torchvision import transforms
from torchvision import datasets

In [None]:
def dropout_layer(X, dropout):
    assert 0 <= dropout <= 1
    if dropout == 1: return torch.zeros_like(X)
    mask = (torch.rand(X.shape) > dropout).float()
    return mask * X / (1.0 - dropout)

In [None]:
X = torch.arange(16, dtype = torch.float32).reshape((2, 8))
print('dropout_p = 0:', dropout_layer(X, 0))
print('dropout_p = 0.5:', dropout_layer(X, 0.5))
print('dropout_p = 1:', dropout_layer(X, 1))

dropout_p = 0: tensor([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11., 12., 13., 14., 15.]])
dropout_p = 0.5: tensor([[ 0.,  0.,  4.,  0.,  0., 10., 12., 14.],
        [16., 18.,  0., 22., 24.,  0.,  0., 30.]])
dropout_p = 1: tensor([[0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.]])


In [None]:
class DropoutMLPScratch(nn.Module):
    def __init__(self, num_outputs, num_hiddens_1, num_hiddens_2,
                 dropout_1, dropout_2, lr):
        super().__init__()
        self.lin1 = nn.LazyLinear(num_hiddens_1)
        self.lin2 = nn.LazyLinear(num_hiddens_2)
        self.lin3 = nn.LazyLinear(num_outputs)
        self.relu = nn.ReLU()
        self.dropout_1 = dropout_1
        self.dropout_2 = dropout_2

    def forward(self, X):
        H1 = self.relu(self.lin1(X.reshape((X.shape[0], -1))))

        # This ensures that your custom dropout_layer is only applied when self.training is True (i.e., when you've called model.train()). When you call model.eval(), these if conditions will be False, and the dropout layers will be bypassed, which is the correct behavior for inference.

        if self.training:
            H1 = dropout_layer(H1, self.dropout_1)
        H2 = self.relu(self.lin2(H1))
        if self.training:
            H2 = dropout_layer(H2, self.dropout_2)
        return self.lin3(H2)

In [None]:
hparams = {'num_outputs':10, 'num_hiddens_1':256, 'num_hiddens_2':256,
           'dropout_1':0.5, 'dropout_2':0.5, 'lr':0.1}

In [None]:
batch_size = 256
lr = 0.1
max_epochs = 3

# Data loading
transformer = transforms.ToTensor()
mnist_train = datasets.FashionMNIST(root="../data", train=True, transform=transformer, download=True)
mnist_val = datasets.FashionMNIST(root="../data", train=False, transform=transformer, download=True)

train_iter = data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=4)
val_iter = data.DataLoader(mnist_val, batch_size, shuffle=False, num_workers=4)



In [None]:
model = DropoutMLPScratch(**hparams)

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
loss_fn = nn.CrossEntropyLoss()

In [None]:
for epoch in range(max_epochs):
  model.train()
  train_loss_sum, train_accuracy_sum, n = 0.0, 0.0, 0
  for x, y in train_iter:
    y_pred = model(x)
    l = loss_fn(y_pred, y)
    optimizer.zero_grad()
    l.backward()
    optimizer.step()
    train_loss_sum += l
    predicted_labels = torch.argmax(y_pred, dim=1)
    train_accuracy_sum += (predicted_labels == y).float().sum()
    n += y.numel()

  model.eval()
  test_accuracy_sum, test_n = 0.0, 0
  with torch.no_grad():
    for x, y in val_iter:
      y_pred = model(x)
      predicted_labels = torch.argmax(y_pred, dim=1)
      test_accuracy_sum += (predicted_labels == y).float().sum()
      test_n += y.numel()
  test_accuracy = test_accuracy_sum / test_n
  print(f'Epoch {epoch + 1}, Loss: {train_loss_sum / n:.4f}, Train Accuracy: {train_accuracy_sum / n:.4f}, Validation Accuracy: {test_accuracy:.4f}')


Epoch 1, Loss: 0.0030, Train Accuracy: 0.7236, Validation Accuracy: 0.8270
Epoch 2, Loss: 0.0019, Train Accuracy: 0.8285, Validation Accuracy: 0.8462
Epoch 3, Loss: 0.0017, Train Accuracy: 0.8417, Validation Accuracy: 0.8496


In [None]:
class DropoutMLPScratch(nn.Module):
    def __init__(self, num_outputs, num_hiddens_1, num_hiddens_2,
                 dropout_1, dropout_2, lr):
        super().__init__()
        self.lin1 = nn.Linear(784, num_hiddens_1)
        self.lin2 = nn.Linear(num_hiddens_1, num_hiddens_2)
        self.lin3 = nn.Linear(num_hiddens_2, num_outputs)
        self.relu = nn.ReLU()
        self.dropout_1 = dropout_1
        self.dropout_2 = dropout_2

    def forward(self, X):
        H1 = self.relu(self.lin1(X.reshape((X.shape[0], -1))))
        if self.training:
            H1 = dropout_layer(H1, self.dropout_1)
        H2 = self.relu(self.lin2(H1))
        if self.training:
            H2 = dropout_layer(H2, self.dropout_2)
        return self.lin3(H2)

In [None]:
hparams = {'num_outputs':10, 'num_hiddens_1':256, 'num_hiddens_2':256,
           'dropout_1':0.5, 'dropout_2':0.5, 'lr':0.1}

In [None]:
model_linear_scratch = DropoutMLPScratch(**hparams)

With Sequential

In [None]:
class DropoutMLP(nn.Module):
    def __init__(self, num_outputs, num_hiddens_1, num_hiddens_2,
                 dropout_1, dropout_2, lr):
        super().__init__()
        self.net = nn.Sequential(
            nn.Flatten(), nn.LazyLinear(num_hiddens_1), nn.ReLU(),
            nn.Dropout(dropout_1), nn.LazyLinear(num_hiddens_2), nn.ReLU(),
            nn.Dropout(dropout_2), nn.LazyLinear(num_outputs))
    def forward(self, X):
      return self.net(X)

In [None]:
model_sequential = DropoutMLP(**hparams)

In [None]:
optimizer = torch.optim.Adam(model_sequential.parameters(), lr = 0.001)
loss_fn = nn.CrossEntropyLoss()

In [None]:
for epoch in range(max_epochs):
  model.train()
  train_loss_sum, train_accuracy_sum, n = 0.0, 0.0, 0
  for x, y in train_iter:
    y_pred = model_sequential(x)
    l = loss_fn(y_pred, y)
    optimizer.zero_grad()
    l.backward()
    optimizer.step()
    train_loss_sum += l
    predicted_labels = torch.argmax(y_pred, dim=1)
    train_accuracy_sum += (predicted_labels == y).float().sum()
    n += y.numel()

  model.eval()
  test_accuracy_sum, test_n = 0.0, 0
  with torch.no_grad():
    for x, y in val_iter:
      y_pred = model_sequential(x)
      predicted_labels = torch.argmax(y_pred, dim=1)
      test_accuracy_sum += (predicted_labels == y).float().sum()
      test_n += y.numel()
  test_accuracy = test_accuracy_sum / test_n
  print(f'Epoch {epoch + 1}, Loss: {train_loss_sum / n:.4f}, Train Accuracy: {train_accuracy_sum / n:.4f}, Validation Accuracy: {test_accuracy:.4f}')

Epoch 1, Loss: 0.0029, Train Accuracy: 0.7272, Validation Accuracy: 0.8007
Epoch 2, Loss: 0.0019, Train Accuracy: 0.8278, Validation Accuracy: 0.8248
Epoch 3, Loss: 0.0017, Train Accuracy: 0.8438, Validation Accuracy: 0.8309


In [None]:
class DropoutMLP_linear(nn.Module):
    def __init__(self, num_outputs, num_hiddens_1, num_hiddens_2,
                 dropout_1, dropout_2, lr):
        super().__init__()
        self.net = nn.Sequential(
            nn.Flatten(), nn.Linear(784, num_hiddens_1), nn.ReLU(),
            nn.Dropout(dropout_1), nn.Linear(num_hiddens_1, num_hiddens_2), nn.ReLU(),
            nn.Dropout(dropout_2), nn.Linear(num_hiddens_2, num_outputs))

    def forward(self, X):
        return self.net(X)

In [None]:
model_linear_sequential = DropoutMLP_linear(**hparams)

With nn.Parameter

In [None]:
class DropoutMLPScratch(nn.Module):
    def __init__(self, num_outputs, num_hiddens_1, num_hiddens_2,
                 dropout_1, dropout_2, lr):
        super().__init__()
        input_dim = 784
        self.W1 = nn.Parameter(torch.randn(input_dim, num_hiddens_1) * 0.01)
        self.b1 = nn.Parameter(torch.zeros(num_hiddens_1))

        self.W2 = nn.Parameter(torch.randn(num_hiddens_1, num_hiddens_2) * 0.01)
        self.b2 = nn.Parameter(torch.zeros(num_hiddens_2))

        self.W3 = nn.Parameter(torch.randn(num_hiddens_2, num_outputs) * 0.01)
        self.b3 = nn.Parameter(torch.zeros(num_outputs))

        self.relu = nn.ReLU()

    def forward(self, X):
        X = X.reshape((X.shape[0], -1)) # Flatten the input
        H1 = self.relu(torch.matmul(X, self.W1) + self.b1)
        if self.training:
            H1 = dropout_layer(H1, self.dropout_1)
        H2 = self.relu(torch.matmul(H1, self.W2) + self.b2)
        if self.training:
            H2 = dropout_layer(H2, self.dropout_2)
        return torch.matmul(H2, self.W3) + self.b3