In [12]:
import torch
from torch import nn
from torchvision import datasets, transforms
from torch import optim

In [13]:
#Next, we will define transforms for the preprocessing of our image data:
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,)),])


In [14]:
#Let's define the batch_size to divide our dataset into chunks to be fed into the model:
batch_size = 64

In [15]:
# Next, we will pull the dataset from torchvision and apply the transform and create batches. 
# For this, we will first create a training dataset

trainset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/',download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)

In [57]:
# Now, let's create the testset:
testset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True)

In [17]:
#Now our main task is to define the neural network class, which has to be a subclass of nn.Module:
class FashionNetwork(nn.Module):
    
    #we define the init method for the class:
    def __init__(self):
        super().__init__()
        
        #We need to define the layers for our model within init. 
        #The first hidden layer looks like the following:
        self.hidden1 = nn.Linear(784, 256)
        
        #Now we will define the second hidden layer:
        self.hidden2 = nn.Linear(256, 128)
        
        #Then we will define our output layer:
        self.output = nn.Linear(128, 10)
        
        #We will define our softmax activation for our last layer:
        self.softmax = nn.Softmax(dim=1)
        
        #Finally, we will define the activation function in the inner layers:
        self.activation = nn.ReLU()
    
    #Let's start with the forward() method in the class, passing in the input:
    def forward(self, x):
        
        #Now we will move the input to the first hidden layer, with 256 nodes:
        x = self.hidden1(x)
        
        #Next, we pass the outputs from the first hidden layer through the activation
        #function, which in our case is ReLU:
        x = self.activation(x)
        
        #We will repeat the same for the second layer, which has 128 nodes, and pass it
        #through ReLU
        x = self.hidden2(x)
        x = self.activation(x)
        
        #Now we pass the last output layer, with 10 output classes:
        x = self.output(x)
        
        #Then we will push the output using the softmax function
        output = self.softmax(x)
        
        #Finally, we return the output tensor:
        return output 


In [18]:
#We will then create the network object:
model = FashionNetwork()

#Let's have a quick look at our model:
print(model)

FashionNetwork(
  (hidden1): Linear(in_features=784, out_features=256, bias=True)
  (hidden2): Linear(in_features=256, out_features=128, bias=True)
  (output): Linear(in_features=128, out_features=10, bias=True)
  (softmax): Softmax(dim=1)
  (activation): ReLU()
)


In [19]:
#We will start by importing the optim module:
from torch import optim

In [20]:
#Next, we will create an optimizer object. We will use the Adam optimizer and pass model parameters:
optimizer = optim.Adam(model.parameters())


In [21]:
#To check for the defaults of the optimizer, you can do the following:
optimizer.defaults

{'lr': 0.001,
 'betas': (0.9, 0.999),
 'eps': 1e-08,
 'weight_decay': 0,
 'amsgrad': False}

In [22]:
#You can also add the learning rate as an additional parameter:
optimizer = optim.Adam(model.parameters(), lr=3e-3)

#Now we will start training our model, starting with the number of epochs:
epoch = 10

criterion = nn.NLLLoss()

In [23]:
# We will then start the loop:
for _ in range(epoch):
    
    #We initialize running_loss as 0:
    running_loss = 0
    
    #We will iterate through each image in training the image loader, which we defined
    #in an earlier recipe in this chapter: Defining the neural network class:
    
    for image, label in trainloader:
        
        #We then reset the gradients to zero
        optimizer.zero_grad()
        
        #Next, we will reshape the image:
        image = image.view(image.shape[0],-1)
        
        #Then we get the prediction from the model:
        pred = model(image)
        
        #Then we calculate the loss/error:
        loss = criterion(pred, label)
        
        #Then we call the .backward() method on the loss:
        loss.backward()
        
        #Then we call the .step() method on the optimizer:
        optimizer.step()
        
        #Then we append to the running loss:
        running_loss += loss.item()
        
        #Finally, we will print the loss after each epoch:
    print(f'Training loss: {running_loss/len(trainloader):.4f}')



Training loss: -0.7199
Training loss: -0.7410
Training loss: -0.7368
Training loss: -0.7277
Training loss: -0.7356
Training loss: -0.7370
Training loss: -0.7388
Training loss: -0.7402
Training loss: -0.7363
Training loss: -0.7421


In [31]:
pred[3]

tensor([0., 1., 0., 0., 0., 0., 0., 0., 0., 0.], grad_fn=<SelectBackward>)

In [32]:
pred[2]

tensor([0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 2.7162e-35, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00],
       grad_fn=<SelectBackward>)

In [36]:
image.view(image.shape[0],-1)

torch.Size([32, 784])

In [37]:
image.shape[0]

32

In [41]:
len(trainloader)

938

In [43]:
label

tensor([8, 3, 5, 5, 3, 7, 9, 5, 3, 3, 7, 5, 8, 8, 4, 4, 3, 5, 9, 4, 5, 3, 3, 1,
        3, 7, 9, 0, 0, 8, 2, 5])

In [50]:
image.view(image.shape[0],-1).size()

torch.Size([32, 784])

In [51]:
image.size()

torch.Size([32, 1, 28, 28])

In [52]:
28*28

784