In [11]:
import torch
import torchvision

## Load dataset

In [71]:
trans = torchvision.transforms.ToTensor()
mnist_train = torchvision.datasets.FashionMNIST(root="./data", train=True, transform=trans, download=True)
mnist_test = torchvision.datasets.FashionMNIST(root="./data", train=False, transform=trans, download=True)

## The dropout layer

In [35]:
def dropout_layer(X, p_keep):
    assert 0<=p_keep<=1

    # corner cases
    if p_keep==0:
        return torch.zeros_like(X)
    if p_keep==1:
        return X
    
    p_torch = torch.ones_like(X)*p_keep
    mask = torch.bernoulli(p_torch)
    # we divide X*mask by p_keep so taht we do net need modify the code when testing
    return X*mask/p_keep

## Define the model parameters

In [36]:
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256

## Define the model

In [78]:
p1_keep, p2_keep = 0.8, 0.5

class Net(torch.nn.Module):
    def __init__(self, num_inputs, num_outputs, num_hiddens1, num_hiddens2,
                 is_training=True):
        super(Net, self).__init__()
        self.num_inputs = num_inputs
        # self.num_outputs = num_outputs
        self.training = is_training
        self.lin1 = torch.nn.Linear(num_inputs, num_hiddens1)
        self.lin2 = torch.nn.Linear(num_hiddens1, num_hiddens2)
        self.lin3 = torch.nn.Linear(num_hiddens2, num_outputs)
        self.relu = torch.nn.ReLU()
    
    def forward(self, X):
        H1 = self.relu(self.lin1(X.reshape(-1, num_inputs)))
        ## use dropout only when traning
        if self.training:
            H1 = dropout_layer(H1, p1_keep)
            # print(type(H1))
        H2 = self.relu(self.lin2(H1))
        if self.training:
            H2 = dropout_layer(H2, p2_keep)
        out = self.lin3(H2)
        return out

## Traning

In [84]:
num_epochs = 3
lr = 0.1
net = Net(num_inputs, num_outputs, num_hiddens1, num_hiddens2,
                 is_training=True)
loss = torch.nn.CrossEntropyLoss()
trainer = torch.optim.SGD(net.parameters(), lr=lr)
iter_train = iter(torch.utils.data.DataLoader(mnist_train, batch_size=256, shuffle=True, num_workers = 4))
iter_test = iter(torch.utils.data.DataLoader(mnist_test, batch_size=256, shuffle=True, num_workers = 4))

## start training
for epoch in range(num_epochs):
    for X, y in iter_train:
        y_hat = net(X)
        ce_loss = loss(y_hat,y)
        ce_loss.sum().backward()
        trainer.step()


## Use Pytorch built-in dropout

In [None]:
net = torch.nn.Sequential(
    torch.nn.Flatten(), torch.nn.Linear(784, 256), torch.nn.ReLU(),
    # Add a dropout layer after the first fully connected layer
    torch.nn.Dropout(1-p1_keep), torch.nn.Linear(256, 256), torch.nn.ReLU(),
    # Add a dropout layer after the second fully connected layer
    torch.nn.Dropout(1-p2_keep), torch.nn.Linear(256, 10))


def init_weights(m):
    if type(m) == torch.nn.Linear:
        torch.nn.init.normal_(m.weight, std=0.01)

net.apply(init_weights)

num_epochs = 3
lr = 0.1
net = Net(num_inputs, num_outputs, num_hiddens1, num_hiddens2,
                 is_training=True)
loss = torch.nn.CrossEntropyLoss()
trainer = torch.optim.SGD(net.parameters(), lr=lr)
iter_train = iter(torch.utils.data.DataLoader(mnist_train, batch_size=256, shuffle=True, num_workers = 4))
iter_test = iter(torch.utils.data.DataLoader(mnist_test, batch_size=256, shuffle=True, num_workers = 4))

## start training
for epoch in range(num_epochs):
    for X, y in iter_train:
        y_hat = net(X)
        # print(y_hat.shape, y.shape)
        ce_loss = loss(y_hat,y)
        ce_loss.sum().backward()
        trainer.step()