In [1]:
%matplotlib inline
import torch
import torch.nn as nn
import numpy as np
import include.d2l_pytorch as d2l

In [2]:
X = torch.arange(16).view(2, 8)
d2l.dropout(X, 0)

tensor([[ 0.,  0.,  2.,  3.,  4.,  0.,  6.,  7.],
        [ 8.,  9., 10., 11., 12., 13., 14., 15.]])

In [3]:
d2l.dropout(X, 0.5)


tensor([[ 0.,  0.,  4.,  6.,  8., 10., 12., 14.],
        [ 0., 18.,  0., 22., 24.,  0.,  0., 30.]])

In [4]:
d2l.dropout(X, 1.0)

tensor([[0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.]])

In [5]:
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256

W1 = torch.tensor(np.random.normal(0, 0.01, size=(num_inputs, num_hiddens1)), dtype=torch.float, requires_grad=True)
b1 = torch.zeros(num_hiddens1, requires_grad=True)
W2 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens1, num_hiddens2)), dtype=torch.float, requires_grad=True)
b2 = torch.zeros(num_hiddens2, requires_grad=True)
W3 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens2, num_outputs)), dtype=torch.float, requires_grad=True)
b3 = torch.zeros(num_outputs, requires_grad=True)

params = [W1, b1, W2, b2, W3, b3]

In [6]:
drop_prob1 , drop_prob2 = 0.2, 0.5

def net(X, is_training=True):
    X = X.view(-1, num_inputs)
    H1 = (torch.matmul(X, W1) + b1).relu()
    if is_training:
        H1 = d2l.dropout(H1, drop_prob1)
    H2 = (torch.matmul(H1, W2) + b2).relu()
    if is_training:
        H2 = d2l.dropout(H2, drop_prob2)
    return torch.matmul(H2, W3) + b3


In [7]:
num_epochs, lr, batch_size = 5, 100.0, 256

loss = torch.nn.CrossEntropyLoss()
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)

epoch 1, loss 0.0045, train acc 0.562, test acc 0.735
epoch 2, loss 0.0022, train acc 0.791, test acc 0.802


In [None]:
net_simple = nn.Sequential(
    d2l.FlattenLayer(),
    nn.Linear(num_inputs, num_hiddens1),
    nn.ReLU(),
    nn.Dropout(drop_prob1),
    nn.Linear(num_hiddens1, num_hiddens2),
    nn.ReLU(),
    nn.Dropout(drop_prob2),
    nn.Linear(num_hiddens2, num_outputs),
)

for param in net_simple.parameters():
    nn.init.normal_(param, mean=0, std=0.01)

In [None]:
optimizer = torch.optim.SGD(net_simple.parameters(), lr=0.5)
d2l.train_ch3(net_simple, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)


epoch 1, loss 0.0044, train acc 0.572, test acc 0.748
epoch 2, loss 0.0023, train acc 0.783, test acc 0.804
epoch 3, loss 0.0019, train acc 0.820, test acc 0.792
epoch 4, loss 0.0017, train acc 0.839, test acc 0.823
epoch 5, loss 0.0016, train acc 0.848, test acc 0.769
