In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
import numpy as np
import time
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms
import helper

In [None]:
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))])
# Download and load the training data
trainset = datasets.FashionMNIST('F_MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Download and load the test data
testset = datasets.FashionMNIST('F_MNIST_data/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)

In [None]:
image, label = next(iter(trainloader))
helper.imshow(image[0,:]);

In [None]:
import torch.nn as nn
from collections import OrderedDict

model = nn.Sequential(OrderedDict([
    #reduce size of image from (28*28*1) to (28*28*32) due to padding
    ('conv1', nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)),
    ('relu1', nn.ReLU()),
    #reduce size of image from (28*28*32) to (28*28*64) due to padding
    ('conv2', nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)),
    ('relu2', nn.ReLU()),
    #reduce size of image from (28*28*64) to (14*14*64)
    ('maxpool1', nn.MaxPool2d(kernel_size=2, stride=2)),
    #to make it 1D vector for the Linear layer
    # The flatten layer converts the (14*14*64) tensor into a 1D vector of size 12544.
    ('flatten', nn.Flatten()),
    #input: 12544   output: 256
    ('fc1', nn.Linear(64 * 14 * 14, 256)),
    ('relu5', nn.ReLU()),
    #input: 256   output: 10
    ('fc2', nn.Linear(256, 10)),
]))

model

In [None]:
#Create the network, define the criterion and optimizer
from torch.optim import lr_scheduler
learning_rate = 0.001
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=learning_rate,)
scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.7)


In [None]:
#Train the network here
epochs = 20
print_every = 40
steps = 0
for e in range(epochs):
    running_loss = 0
    for images,labels in iter(trainloader):
        steps += 1
        # Clear gradients as they are accumulative
        optimizer.zero_grad()
        # Forward pass
        out = model(images)  # Use the model directly
        # Loss calculation
        loss = criterion(out, labels)
        # Backward pass
        loss.backward()
        # Update weights
        optimizer.step()
        
        #extra for printing the traing process and the loss
        running_loss += loss.item()
        if steps % print_every == 0:
            print("Epoch: {}/{}... ".format(e+1, epochs),
                  "Loss: {:.4f}".format(running_loss/print_every))
            
            running_loss = 0

In [None]:
# Test out the network
dataiter = iter(testloader)
images, labels = next(dataiter)
img = images[0]
# Convert 2D image to 1D vector
img = img.view(1, 1, 28, 28)

with torch.no_grad():
    logits = model.forward(img)

ps = F.softmax(logits, dim=1)

# Plot the image and probabilities
helper.view_classify(img.resize_(1, 28, 28), ps, version='Fashion')