In [1]:
import torch
from torch.nn import functional as F
from torchvision import datasets, transforms
import helper

Exercise: Implement the validation loop below and print out the total accuracy after the loop. You can largely copy and paste the code from above, but I suggest typing it in because writing it out yourself is essential for building the skill. In general you'll always learn more by typing it rather than copy-pasting. You should be able to get an accuracy above 80%.

In [2]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

trainset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download = True, train = True, transform = transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size = 64, shuffle = True)

testset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download = True, train = False, transform = transform)
testloader = torch.utils.data.DataLoader(testset, batch_size = 64, shuffle = True)

In [3]:
from torch import nn, optim
import torch.nn.functional as F

class Classifier(nn.Module):
    
    def __init__(self):
        super().__init__()
        
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 10)
        
    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.log_softmax(self.fc4(x), dim = 1)
        
        return x

In [4]:
model = Classifier()

images, labels = next(iter(testloader))

ps = torch.exp(model(images))

print(ps.shape)

torch.Size([64, 10])


In [5]:
top_p, top_class = ps.topk(1, dim=1)
# Look at the most likely classes for the first 10 examples
print(top_class[:10,:])

tensor([[8],
        [0],
        [0],
        [0],
        [0],
        [8],
        [0],
        [7],
        [0],
        [0]])


In [6]:
equals = top_class == labels.view(*top_class.shape)

In [7]:
accuracy = torch.mean(equals.type(torch.FloatTensor))

In [8]:
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.003)

epochs = 15
steps = 0

train_losses, test_losses = [], []

for e in range(epochs):
    
    train_loss = 0
    test_loss = 0
    
    for images, labels in trainloader:
        
        optimizer.zero_grad()
        
        log_ps = model(images)
        loss = criterion(log_ps, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        
    with torch.no_grad():
        for images, labels in testloader:

            ps = model(images)
            loss = criterion(ps, labels)
            test_loss += loss.item()
            
            _, top_class = ps.topk(1, dim =1)
            equals = top_class == labels.view(*top_class.shape)
            accuracy = torch.mean(equals.type(torch.FloatTensor))
            
            
        print(f'Accuracy: {accuracy.item()*100}%')
        print(f"Training loss: {train_loss/len(trainloader)}")
        print(f"Test loss: {test_loss/len(testloader)}")
        
        train_losses.append(train_loss)
        test_losses.append(test_loss)

Accuracy: 87.5%
Training loss: 0.5133283532091549
Test loss: 0.4327923274913411
Accuracy: 87.5%
Training loss: 0.3930982140494562
Test loss: 0.43635325494465554
Accuracy: 93.75%
Training loss: 0.3609409800200447
Test loss: 0.3971940689975289
Accuracy: 81.25%
Training loss: 0.33569927110886777
Test loss: 0.3890457047019035
Accuracy: 87.5%
Training loss: 0.31594785996305663
Test loss: 0.4127750051249364
Accuracy: 93.75%
Training loss: 0.3060169090260702
Test loss: 0.3635257576016863
Accuracy: 93.75%
Training loss: 0.2904417391286603
Test loss: 0.4017871641049719
Accuracy: 81.25%
Training loss: 0.2824273955926839
Test loss: 0.3586532253368645
Accuracy: 81.25%
Training loss: 0.2729918110464364
Test loss: 0.36395327661447463
Accuracy: 81.25%
Training loss: 0.26957833429754796
Test loss: 0.3707465647607093
Accuracy: 93.75%
Training loss: 0.2603224001959888
Test loss: 0.3645322471857071
Accuracy: 87.5%
Training loss: 0.2515451711600523
Test loss: 0.37044148711831704
Accuracy: 93.75%
Training 

Exercise: Add dropout to your model and train it on Fashion-MNIST again. See if you can get a lower validation loss or higher accuracy.

In [9]:
## TODO: Define your model with dropout added
from torch import nn, optim
import torch.nn.functional as F

class Classifier(nn.Module):
    
    def __init__(self):
        super().__init__()
        
        self.fc1  = nn.Linear(784, 256)
        self.fc2  = nn.Linear(256, 128)
        self.fc3  = nn.Linear(128, 64)
        self.fc4  = nn.Linear(64, 10)
        
        self.drop = nn.Dropout(p = 0.2)
        
    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)
        
        x = self.drop(F.relu(self.fc1(x)))
        x = self.drop(F.relu(self.fc2(x)))
        x = self.drop(F.relu(self.fc3(x)))
        x = F.log_softmax(self.fc4(x), dim = 1)
        
        return x

In [17]:
## TODO: Train your model with dropout, and monitor the training progress with the validation loss and accuracy

model2 = Classifier()
optimizer = optim.Adam(model2.parameters(), lr = 0.003)
epochs = 15
for e in range(epochs):
    
    train_loss = 0
    accuracy_score = 0
    
    for images, labels in trainloader:

        optimizer.zero_grad()
        
        outputs = model2(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        
    print(train_loss/len(trainloader))
    
    with torch.no_grad():
        model2.eval()
        
        for images, labels in testloader:

            ps = torch.exp(model2(images))
            
            _, top_class = ps.topk(1, dim =1)
            equals = top_class == labels.view(*top_class.shape)
            accuracy = torch.mean(equals.type(torch.FloatTensor))
            
            accuracy_score += accuracy
            model2.train()
            
        print(accuracy_score, accuracy_score/len(testloader))

0.6065709929285782
tensor(127.3906) tensor(0.8114)
0.4840193868700121
tensor(129.4688) tensor(0.8246)
0.4527153757684775
tensor(130.6562) tensor(0.8322)
0.4377439555201703
tensor(130.4375) tensor(0.8308)
0.422101357026395
tensor(131.4062) tensor(0.8370)
0.4125680218277964
tensor(132.6094) tensor(0.8446)
0.4075646700698938
tensor(131.3281) tensor(0.8365)
0.4036918084568052
tensor(132.0625) tensor(0.8412)
0.3920085062064342
tensor(133.5312) tensor(0.8505)
0.3852792697102785
tensor(132.7656) tensor(0.8456)
0.3883709457319683
tensor(132.9688) tensor(0.8469)
0.3819753069645052
tensor(133.5156) tensor(0.8504)
0.37365271045423265
tensor(132.8281) tensor(0.8460)
0.3796292755490681
tensor(133.0156) tensor(0.8472)
0.3715645669937642
tensor(133.7500) tensor(0.8519)


In [12]:
model.train()

Classifier(
  (fc1): Linear(in_features=784, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=10, bias=True)
)

In [None]:
model2.train()

# SAVING MODELS

In [None]:
model = fc_model.Network(784, 10, [512, 256, 128]) # --> input, output, hidden layers
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
fc_model.train(model, trainloader, testloader, criterion, optimizer, epochs=2)

In [18]:
print("Our model: \n\n", model, '\n')
print("The state dict keys: \n\n", model.state_dict().keys())

Our model: 

 Classifier(
  (fc1): Linear(in_features=784, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=10, bias=True)
) 

The state dict keys: 

 odict_keys(['fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias', 'fc3.weight', 'fc3.bias', 'fc4.weight', 'fc4.bias'])


In [19]:
torch.save(model.state_dict(), 'checkpoint.pth')

In [20]:
state_dict = torch.load('checkpoint.pth')
print(state_dict.keys())

odict_keys(['fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias', 'fc3.weight', 'fc3.bias', 'fc4.weight', 'fc4.bias'])


In [21]:
model.load_state_dict(state_dict)

<All keys matched successfully>

In [None]:
checkpoint = {'input_size': 784,
              'output_size': 10,
              'hidden_layers': [each.out_features for each in model.hidden_layers],
              'state_dict': model.state_dict()}

torch.save(checkpoint, 'checkpoint.pth')

In [None]:
def load_checkpoint(filepath):
    checkpoint = torch.load(filepath)
    model = fc_model.Network(checkpoint['input_size'],
                             checkpoint['output_size'],
                             checkpoint['hidden_layers'])
    model.load_state_dict(checkpoint['state_dict'])
    
    return model

In [None]:
model = load_checkpoint('checkpoint.pth')
print(model)

In [None]:
model.