In [1]:
# Inference = make predictions
# Avoid overfitting through regularization such as dropout

import torch
from torchvision import datasets, transforms

# Define a transform
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Download and load the training data
trainset = datasets.FashionMNIST('~/.pytorch/F_MINIST_data/', download = True, train = True, transform = transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size = 64, shuffle = True)

# Download and load the test data
testset = datasets.FashionMNIST('~/.pytorch/F_MINIST_data', download = True, train = False, transform = transform)
testloader = torch.utils.data.DataLoader(testset, batch_size = 64, shuffle = True)

In [2]:
# create model
from torch import nn, optim

class Classifier(nn.Module):
    
    def __init__(self):
        super().__init__()
        n_features = 784
        n_hidden = [256, 128, 64]
        n_output = 10
        self.h1 = nn.Linear(n_features, n_hidden[0])
        self.h2 = nn.Linear(n_hidden[0], n_hidden[1])
        self.h3 = nn.Linear(n_hidden[1], n_hidden[2])
        self.output = nn.Linear(n_hidden[2], n_output)
        
        self.relu = nn.ReLU()
        self.softmax = nn.LogSoftmax(dim = 1)
        
    def forward(self, x):
            x = x.view(x.shape[0], -1)
            
            x = self.relu(self.h1(x))
            x = self.relu(self.h2(x))
            x = self.relu(self.h3(x))
            x = self.softmax(self.output(x))

            return x


In [3]:
# forward pass
model = Classifier()
images, labels = next(iter(testloader))
# get loss
ps = torch.exp(model(images))
ps.shape

torch.Size([64, 10])

In [4]:
top_p, top_class = ps.topk(1, dim = 1)
#print(top_class[:10,:])
#print(top_p[top_class[:10, :]])

# to equal the shapes : convert to vector 64,64 from 64,1 and 64
print(top_class.shape, labels.shape)
equals = top_class == labels
print(equals.shape, top_class.shape, labels.shape)
#print(top_class[0:10], "labels" , labels[0:10], equals[0:10])

equals = top_class == labels.view(*top_class.shape)
print(equals.shape)
print(top_class[0:15], "labels" , labels[0:15], equals[0:15])

torch.Size([64, 1]) torch.Size([64])
torch.Size([64, 64]) torch.Size([64, 1]) torch.Size([64])
torch.Size([64, 1])
tensor([[4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [2],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4],
        [4]]) labels tensor([3, 6, 3, 9, 7, 8, 1, 8, 0, 5, 5, 0, 4, 1, 4]) tensor([[0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [1],
        [0],
        [1]], dtype=torch.uint8)


In [5]:
accuracy = torch.mean(equals.type(torch.FloatTensor))
# get accuracy of untrained data
print(f'Accuracy: {accuracy.item()*100}%')

Accuracy: 21.875%


In [11]:
from torch import optim
epoch = 8
train_error = 0

optimizer = optim.SGD( model.parameters(), lr = 0.003)
criterion = nn.NLLLoss()

for i in range(epoch):
    train_error = 0
    for images, labels in trainloader:
        optimizer.zero_grad() #restart
        
        output = model(images) # forward pass
        loss = criterion(output, labels) # calculate error
        loss.backward() # back pass , get the gradients
        
        optimizer.step() # update weight
        
        train_error += loss.item()
    else:
        print(f'error sum {train_error/len(trainloader)}')


error sum 0.3684121687878678
error sum 0.36424563884703337
error sum 0.35963521714308366
error sum 0.3558622234220952
error sum 0.35185300039330014
error sum 0.34786177139038216
error sum 0.3441995717664517
error sum 0.34050450045893443
error sum 0.33744214358392044
error sum 0.3333165336932455
error sum 0.32995239128944465
error sum 0.32633348889569486
error sum 0.32339180637397236
error sum 0.3194832546529231
error sum 0.3173463902176062
error sum 0.31402613792909995
error sum 0.31114578038168106
error sum 0.30791513870424553
error sum 0.3053410117274154
error sum 0.30257028283309073


In [27]:
        
# check the test data

test_error = 0
for images, labels in testloader:
    output = model(images)
    loss = criterion(output, labels)
    test_error += loss
else:
    print(f'test error: {test_error/len(testloader)}')
    
train_error = 0
for images, labels in testloader:
    output = model(images)
    loss = criterion(output, labels)
    train_error += loss
else:
    print(f'train error: {train_error/len(trainloader)}')

test error: 0.35815516114234924
train error: 0.059776101261377335


In [29]:
# Early stopping strategy: stop in early step aprox 8 - 10 epochs
# Other way is DROP OUT

class Clasifier(nn.Module):
    
    def __init__(self):
        super().__init__()
        
        self.hid1 = nn.Linear(784, 256)
        self.hid2 = nn.Linear(256, 128)
        self.hid3 = nn.Linear(128, 64)
        self.out = nn.LogSoftmax(dim = 1)
        
        # dropout module with 0.2 drop probability
        self.dropout = nn.Dropout(p = 0.2)
        self.relu = nn.ReLU()
        self.softmax = nn.LogSoftmax(dim = 1)
        
    def forward(self, x):
        x = x.view(x.shape[0], -1)
        x = self.dropout(self.relu(self.hid1(x)))
        x = self.dropout(self.relu(self.hid2(x)))
        x = self.dropout(self.relu(self.hid3(x)))
        x = self.softmax(self.output(x))
        
        return x

### Using Dropout only for training to prevent overfitting, no validation

In [None]:
# training with dropout

from torch import optim
epoch = 8
train_error = 0

optimizer = optim.SGD( model.parameters(), lr = 0.003)
criterion = nn.NLLLoss()

for i in range(epoch):
    train_error = 0
    for images, labels in trainloader:
        optimizer.zero_grad() #restart
        
        output = model(images) # forward pass
        loss = criterion(output, labels) # calculate error
        loss.backward() # back pass , get the gradients
        
        optimizer.step() # update weight
        
        train_error += loss.item()
    else:
        print(f'error sum {train_error/len(trainloader)}')

In [31]:
# Use model.eval() to turn off dropout, return turn on with model.train()
test_error = 0
# turn off gradients
with torch.no_grad():

    # set model to evaluation mode
    model.eval()

    # validation pass here
    for images, labels in testloader:
        output = model(images)
        loss = criterion(output, labels)
        test_error += loss
    else:
        print(f'test error: {test_error/len(testloader)}')
        

Help on built-in function mean:

mean(...)
    .. function:: mean(input) -> Tensor
    
    Returns the mean value of all elements in the :attr:`input` tensor.
    
    Args:
        input (Tensor): the input tensor
    
    Example::
    
        >>> a = torch.randn(1, 3)
        >>> a
        tensor([[ 0.2294, -0.5481,  1.3288]])
        >>> torch.mean(a)
        tensor(0.3367)
    
    .. function:: mean(input, dim, keepdim=False, out=None) -> Tensor
    
    Returns the mean value of each row of the :attr:`input` tensor in the given
    dimension :attr:`dim`. If :attr:`dim` is a list of dimensions,
    reduce over all of them.
    
    
    If :attr:`keepdim` is ``True``, the output tensor is of the same size
    as :attr:`input` except in the dimension(s) :attr:`dim` where it is of size 1.
    Otherwise, :attr:`dim` is squeezed (see :func:`torch.squeeze`), resulting in the
    output tensor having 1 (or ``len(dim)``) fewer dimension(s).
    
    
    Args:
        input (Tensor): 