In [50]:
import torch
from torchvision import datasets, transforms

# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))])

# Download and load the training data
train_set = datasets.MNIST('./datasets/', download=True, train=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)

# Download and load the test data
test_set = datasets.FashionMNIST('./datasets/', download=True, train=False, transform=transform)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=True)

In [51]:
from torch import nn, optim
import torch.nn.functional as F

class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(in_features=784, out_features=128)
        self.fc2 = nn.Linear(in_features=128, out_features=64)
        self.fc3 = nn.Linear(64, 10)
        
    def forward(self, x):
        # making sure input tensor is flattened
        x = x.view(x.shape[0], -1)
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.log_softmax(self.fc3(x), dim=1)
        
        return x

In [52]:
model = Network()

images, labels = next(iter(test_loader))

# Get the class probabilities
ps = torch.exp(model(images))

# Make sure the shape is appropriate, we should get 10 class probabilities for 64 examples
print(ps.shape)

torch.Size([64, 10])


In [53]:
top_p, top_class = ps.topk(1, dim=1)

# Look at the most likely classes for the first 10 examples
print(top_class[:10,:])

tensor([[4],
        [6],
        [6],
        [4],
        [4],
        [7],
        [6],
        [4],
        [4],
        [6]])


In [54]:
equals = (top_class == labels.view(*top_class.shape))
equals.shape

torch.Size([64, 1])

In [55]:
accuracy = torch.mean(equals.type(torch.FloatTensor))
print(f'Accuracy: {accuracy.item()*100}%')

Accuracy: 4.6875%


In [56]:
model = Network()
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.003)

epochs = 5
steps = 0

train_losses, test_losses = [], []
for e in range(epochs):
    training_loss = 0
    for images, labels in train_loader:
        
        optimizer.zero_grad()
        
        log_ps = model(images)
        loss = criterion(log_ps, labels)
        loss.backward()
        optimizer.step()
        
        training_loss += loss.item()
        
    test_loss = 0
    accuracy = 0
        
    # Turn off gradients for validation, saves memory and computations
    with torch.no_grad():
        for images, labels in test_loader:
            log_ps = model(images)
            test_loss += criterion(log_ps, labels)
                
            ps = torch.exp(log_ps)
            top_p, top_class = ps.topk(1, dim=1)
            equals = (top_class == labels.view(*top_class.shape))
            accuracy += torch.mean(equals.type(torch.FloatTensor))
                
    train_losses.append(training_loss/len(train_loader))
    test_losses.append(test_loss/len(test_loader))

    print("Epoch: {}/{}.. ".format(e+1, epochs),
          "Training Loss: {:.3f}.. ".format(training_loss/len(train_loader)),
          "Test Loss: {:.3f}.. ".format(test_loss/len(test_loader)),
          "Test Accuracy: {:.3f}".format(accuracy/len(test_loader)))
print("Done!")

Epoch: 1/5..  Training Loss: 0.338..  Test Loss: 5.466..  Test Accuracy: 0.067
Epoch: 2/5..  Training Loss: 0.175..  Test Loss: 6.150..  Test Accuracy: 0.072
Epoch: 3/5..  Training Loss: 0.137..  Test Loss: 6.059..  Test Accuracy: 0.062
Epoch: 4/5..  Training Loss: 0.120..  Test Loss: 6.576..  Test Accuracy: 0.075
Epoch: 5/5..  Training Loss: 0.111..  Test Loss: 7.131..  Test Accuracy: 0.090
Done!


In [57]:
## Defining the model with dropout added

class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

        # Dropout module with 0.2 drop probability
        self.dropout = nn.Dropout(p=0.2)

    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)

        # Now with dropout
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))

        # output so no dropout here
        x = F.log_softmax(self.fc3(x), dim=1)

        return x

In [58]:
## Training the model with dropout, and monitor the training progress with the validation loss and accuracy

model = Network()
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.003)

epochs = 10
steps = 0

train_losses, test_losses = [], []
for e in range(epochs):
    training_loss = 0
    for images, labels in train_loader:
        
        optimizer.zero_grad()
        
        log_ps = model(images)
        loss = criterion(log_ps, labels)
        loss.backward()
        optimizer.step()
        
        training_loss += loss.item()
        
    test_loss = 0
    accuracy = 0
        
    # Turn off gradients for validation, saves memory and computations
    with torch.no_grad():
        model.eval()
        for images, labels in test_loader:
            log_ps = model(images)
            test_loss += criterion(log_ps, labels)
                
            ps = torch.exp(log_ps)
            top_p, top_class = ps.topk(1, dim=1)
            equals = (top_class == labels.view(*top_class.shape))
            accuracy += torch.mean(equals.type(torch.FloatTensor))
        
    model.train()
        
    train_losses.append(training_loss/len(train_loader))
    test_losses.append(test_loss/len(test_loader))

    print("Epoch: {}/{}.. ".format(e+1, epochs),
          "Training Loss: {:.3f}.. ".format(train_losses[-1]),
          "Test Loss: {:.3f}.. ".format(test_losses[-1]),
          "Test Accuracy: {:.3f}".format(accuracy/len(test_loader)))
print("Done!")

Epoch: 1/10..  Training Loss: 0.459..  Test Loss: 4.719..  Test Accuracy: 0.074
Epoch: 2/10..  Training Loss: 0.291..  Test Loss: 5.627..  Test Accuracy: 0.053
Epoch: 3/10..  Training Loss: 0.258..  Test Loss: 5.832..  Test Accuracy: 0.054
Epoch: 4/10..  Training Loss: 0.241..  Test Loss: 6.073..  Test Accuracy: 0.049
Epoch: 5/10..  Training Loss: 0.234..  Test Loss: 5.474..  Test Accuracy: 0.066
Epoch: 6/10..  Training Loss: 0.224..  Test Loss: 6.083..  Test Accuracy: 0.070
Epoch: 7/10..  Training Loss: 0.218..  Test Loss: 6.165..  Test Accuracy: 0.063
Epoch: 8/10..  Training Loss: 0.210..  Test Loss: 6.802..  Test Accuracy: 0.059
Epoch: 9/10..  Training Loss: 0.210..  Test Loss: 7.090..  Test Accuracy: 0.062
Epoch: 10/10..  Training Loss: 0.200..  Test Loss: 6.756..  Test Accuracy: 0.071
Done!


In [1]:
%matplotlib inline
import helper

images, labels = next(iter(train_loader))

img = images[0].view(1, 784)
# Turn off gradients to speed up this part
with torch.no_grad():
    logps = model(img)

# Output of the network are log-probabilities, need to take exponential for probabilities
ps = torch.exp(logps)
helper.view_classify(img.view(1, 28, 28), ps)