In [22]:
import numpy as np
import matplotlib.pyplot as plt
import time
from PIL import Image

import torch
from torch import nn
from torch import optim
from torchvision import datasets, transforms, models

from collections import OrderedDict

In [23]:
tr_batchsize = 64
val_test_batchsize = 32
epochs = 60
lr = 0.001

In [24]:
# By defalt, set device to the CPU
deviceFlag = torch.device('cpu')

# Defalut is CPU, but as long as GPU is avaliable, then use GPU
if torch.cuda.is_available():
    print(f'Found {torch.cuda.device_count()} GPUs.')
    deviceFlag = torch.device('cuda:0') # Manually pick your cuda device. By default is 'cuda:0'

print(f'Now the deivce is set to {deviceFlag}')

Found 1 GPUs.
Now the deivce is set to cuda:0


# VALIDATION FUNCTION

In [25]:
def validation(model, validateloader, ValCriterion):
    
    val_loss_running = 0
    acc = 0
    
    # a dataloader object is a generator of batches, each batch contain image & label separately
    for images, labels in iter(validateloader):
        
        # Send the data onto choosen device
        images = images.to(deviceFlag)
        labels = labels.to(deviceFlag)
        
        output = model.forward(images)
        val_loss_running += ValCriterion(output, labels).item() # .item() to get a scalar in Torch.tensor out
        
        output = torch.exp(output) # as in the model we use the .LogSoftmax() output layer
        
        equals = (labels.data == output.max(dim = 1)[1])
        acc += equals.float().mean().item() # .flaot() is to transfer the tensor.cuda.float type onto cpu mode
        
    return val_loss_running / len(validateloader), acc / len(validateloader)

# TRAINING + VALIDATION

In [26]:
def train_eval(model, traindataloader, validateloader, TrCriterion, optimizer, epochs, deviceFlag_train):
    
    itrs = 0
    eval_itrs = 40
    
    # first setting the device used for training
    model.to(deviceFlag_train)
    
    print(f'The training batchsize is {tr_batchsize}.')
    
    # set the timer
    since = time.time()

    # ! THE EPOCH LOOP !
    for e in range(epochs):        
        itrs = 0
        
        # Set the model to the Train mode
        # Tell the model to activate its Training behavior (turn-on the dropout & BN behaviors)
        model.train()
        
        # re-initialize the running_loss to start every epoch
        training_loss_running = 0
        
        #  ! THE BATCH LOOP !
        for inputs, labels in iter(traindataloader):            
            itrs += 1
            # .to() method return a copy of the tensors on the targeted device
            inputs = inputs.to(deviceFlag_train)
            labels = labels.to(deviceFlag_train)
            
            # Clean the stored grads computed in the last iteration
            optimizer.zero_grad()
            
            # Forward Pass
            # As model has been shipped to the targeted device, so the output is on that device too
            outputs = model.forward(inputs)
            
            # Compute Loss
            train_loss = TrCriterion(outputs, labels)
            
            # BackProp to compute the grads (stored in each tensor.grad() attributes) along the way
            train_loss.backward()
            
            # Optimizer/Update params
            optimizer.step()
            
            training_loss_running += train_loss.item() #numeric ops, take the scalar out of the tensor by calling .item()
            
            # ----------- Perform Validation (Evaluation) Every eval_itrs iterations ---------- #
            if itrs % eval_itrs == 0:
                
                # Set the model to the Eval mode
                model.eval()
                
                # Turn-off gradient for validation to save memory & computation
                with torch.no_grad():
                    validation_loss, val_acc = validation(model, validateloader, TrCriterion)
                
                display = f'Epoch: {e + 1}/{epochs}, itrs: {itrs}, '
                display += f'Train_loss: {round(training_loss_running / eval_itrs, 4)}, '
                display += f'Valid_loss: {round(validation_loss, 4)}, '
                display += f'Valid_Acc: {round(val_acc, 4)}'
                print(display)
                
                training_loss_running = 0
                model.train()
                
        end = time.time()
        elapsed = end - since
        print(f'Epoch {e + 1} takes {round(elapsed, 4)} sec')

# TEST FUNCTION

In [27]:
def test_acc(model, test_loader, deviceFlag_test):

    # for testing, it is actually do validation on the test set
    model.eval()

    model.to(deviceFlag_test)

    since = time.time()

    # In .eval() mode, set the context manager to turn-off grads
    with torch.no_grad():
        acc = 0

        # iter() gives images and labels in batches
        for inputs, labels in iter(test_loader):
            
            inputs = inputs.to(deviceFlag_test)
            labels = labels.to(deviceFlag_test)

            # Do a forward pass
            output = model.forward(inputs)
            # convert the log likelihood to scalar
            prob = torch.exp(output)

            equals = (labels.data == prob.max(dim = 1)[1])

            acc += equals.type(torch.FloatTensor).mean().item()

        end = time.time()
        elapsed = end - since

        print(f'Test_acc: {round(acc, 4)}, tiem_spent: {round(elapsed, 2)} sec')

# Checkpoint Creation

In [28]:
def save_checkpoint(model, optimizer, trainingdataset, saved_pth):
    # set a new attr to the model object, which holds the class_to_idx conversion
    model.class_to_idx = trainingdataset.class_to_idx
    
    # Chkpt is a dictionary, can be modified to hold anything you need in the furture
    chkpt = {
    'arch': 'vgg19',
    'class_to_idx': model.class_to_idx,
    'model_state_dict': model.state_dict(),
   # 'optimizer_state_dict': optimizer.state_dict()
    }
    
    # Save with torch.save
    torch.save(chkpt, saved_pth)

# Checkpoint Loading

In [29]:
def load_checkpoint(chkpt_path):
    
    chkpt = torch.load(chkpt_path)
    
    # After loading, the elements stored in the chkpt can be accesses as in a dict with key & value
    if chkpt['arch'] == 'vgg19':
        # Re-initial a new network arch
        model = models.vgg19(pretrained = True)
        
        # Turn-off the .requires_grad attributes for all params in the feature extraction head
        for params in model.parameters():
            params.requires_grad = False
    
    else:
        print('------- Wrong Network Architecture is being used----------')
    
    model.class_to_idx = chkpt['class_to_idx']
    
    # Re-inital a new empty classisifer
    
    classifier = nn.Sequential(OrderedDict([
        ('fc1', nn.Linear(25088, 4096)),
        ('relu', nn.ReLU()),
        ('drop', nn.Dropout(p = 0.5)),
        ('fc2', nn.Linear(4096, 102)),
        ('output', nn.LogSoftmax(dim = 1))
    ]))
    
    # Attach the classifer head
    model.classifier = classifier
    
    # Load the params stored in the chkpt into the newly constructed empty model
    # model.load_state_dict() is a built-in method of the models object
    model.load_state_dict(chkpt['model_state_dict'])
    
    return model

# Image Preprocessing

In [30]:
def image_preprocessing(img_pth):
    '''
    Input a PIL image, output a numpy array with axes transposed to [Ch, H, W]
    '''
    pil_image = Image.open(img_pth)
    
    # -------- Resize with Aspect Ratio maintained--------- #
    # First fixing the short axes
    if pil_image.size[0] > pil_image.size[1]:
        pil_image.thumbnail((10000000, 256))
    else:
        pil_image.thumbnail((256, 100000000))
    
    # ---------Crop----------- #
    left_margin = (pil_image.width - 224) / 2
    bottom_margin = (pil_image.height - 224) / 2
    right_margin = left_margin + 224
    top_margin = bottom_margin + 224
    
    pil_image = pil_image.crop((left_margin, bottom_margin, right_margin, top_margin))
    
    # --------- Convert to np then Normalize ----------- #
    np_image = np.array(pil_image) / 255
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    np_image = (np_image -mean) / std
    
    # --------- Transpose to fit PyTorch Axes ----------#
    np_image = np_image.transpose([2, 0, 1])
    
    return np_image

# Image Display

In [31]:
def imshow(pt_image, ax = None, title = None):
    '''
    Takes in a PyTorch-compatible image with [Ch, H, W],
    Convert it back to [H, W, Ch], 
    Undo the preprocessing,
    then display it on a grid
    '''
    if ax is None:
        fig, ax = plt.subplots()
    
    # --------- Transpose ----------- #
    plt_image = pt_image.transpose((1, 2, 0))
    
    # --------- Undo the preprocessing --------- #
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    plt_image = plt_image * std + mean
    
    if title is not None:
        ax.set_title(title)
        
    # Image need to be clipped between 0 and 1 or it looks noisy
    plt_image = np.clip(plt_image, 0, 1)
    
    # this imshow is a function defined in the plt module
    ax.imshow(plt_image)
    
    return ax

# Prediction Function

In [32]:
def predict(img_pth, model, trainingdataset, topk):
    '''
    1. input a single img;
    2. forward pass on a model;
    3. use tensor.topk(k) to return the highest k probs and the correspodniung class idx;
    4. convert the idx to class names using the name_to_idx conversion.
    '''
    np_img = image_preprocessing(img_pth)
    
    # Convert np_img to PT tensor and send to GPU
    pt_img = torch.from_numpy(np_img).type(torch.cuda.FloatTensor)
    
    # Unsqueeze to get shape of tensor from [Ch, H, W] to [Batch, Ch, H, W]
    pt_img = pt_img.unsqueeze(0)

    # Run the model to predict
    output = model.forward(pt_img)
    
    probs = torch.exp(output)
    
    # Pick out the topk from all classes 
    top_probs, top_indices = probs.topk(topk)
    
    # Convert to list on CPU without grads
    top_probs = top_probs.detach().type(torch.FloatTensor).numpy().tolist()[0]
    top_indices = top_indices.detach().type(torch.FloatTensor).numpy().tolist()[0]
    
    # Invert the class_to_idx dict to a idx_to_class dict
    idx_to_class = {value: key for key, value in trainingdataset.class_to_idx.items()}
    
    top_classname = {idx_to_class[index] for index in top_indices}
    
    return top_probs, top_classname    

# Dataset Loading

In [33]:
training_transforms = transforms.Compose([
    transforms.RandomRotation(30),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

validation_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], # RGB mean & std estied on ImageNet
                         [0.229, 0.224, 0.225])
])

testing_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], # RGB mean & std estied on ImageNet
                         [0.229, 0.224, 0.225])
])

# Load the datasets with torchvision.datasets.ImageFolder object
train_dataset = datasets.Flowers102(root = './dataset', split = 'train', transform = training_transforms, download = True)
valid_dataset = datasets.Flowers102(root = './dataset', split = 'val', transform = validation_transforms, download = True)
test_dataset = datasets.Flowers102(root = './dataset', split = 'test', transform = testing_transforms, download = True)

# Instantiate loader objects to facilitate processing


# Define the torch.utils.data.DataLoader() object with the ImageFolder object
# Dataloader is a generator to read from ImageFolder and generate them into batch-by-batch
# Only shuffle during trianing, validation and testing no shuffles
# the batchsize for training and tesitng no need to be the same
train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = tr_batchsize,
                                           shuffle = True)

validate_loader = torch.utils.data.DataLoader(dataset = valid_dataset,
                                           batch_size = val_test_batchsize)


test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                           batch_size = val_test_batchsize)

In [34]:
class VGG16(nn.Module):
    def __init__(self, num_classes=102):
        super(VGG16, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU())
        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(), 
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer5 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU())
        self.layer6 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU())
        self.layer7 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer8 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer9 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer10 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer11 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer12 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer13 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(7*7*512, 4096),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = self.layer7(out)
        out = self.layer8(out)
        out = self.layer9(out)
        out = self.layer10(out)
        out = self.layer11(out)
        out = self.layer12(out)
        out = self.layer13(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

# Pretrained VGG

In future this will be replaced with our own model

In [35]:
model = VGG16()
model.to(deviceFlag)
model

VGG16(
  (layer1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (layer2): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (layer4): Sequential(
    (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=

In [36]:
for params in model.parameters():
    params.requries_grad = False

# Define Loss Function and Optimizer

In [38]:
# Negative Log Likelihood Loss
criterion = nn.NLLLoss()

# Cross Entropy Loss
# criterion = nn.CrossEntropyLoss()

# optimizer 1
optimizer = optim.Adam(model.parameters(), lr = lr)

# optimizer 2
# optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay = 0.005, momentum = 0.9) 

In [None]:
train_eval(model, train_loader, validate_loader, criterion, optimizer, epochs, deviceFlag)

The training batchsize is 64.


In [None]:
test_acc(model, test_loader, deviceFlag)

In [None]:
deviceFlag

In [None]:
# total_step = len(train_loader)

# for epoch in range(epochs):
#     for i, (images, labels) in enumerate(train_loader):  
#         # Move tensors to the configured device
#         images = images.to(deviceFlag)
#         labels = labels.to(deviceFlag)
        
#         # Forward pass
#         outputs = model(images)
#         loss = criterion(outputs, labels)
        
#         # Backward and optimize
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()

#     print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
#                    .format(epoch+1, epochs, i+1, total_step, loss.item()))
            
#     # Validation
#     with torch.no_grad():
#         correct = 0
#         total = 0
#         for images, labels in validate_loader:
#             images = images.to(deviceFlag)
#             labels = labels.to(deviceFlag)
#             outputs = model(images)
#             _, predicted = torch.max(outputs.data, 1)
#             total += labels.size(0)
#             correct += (predicted == labels).sum().item()
#             del images, labels, outputs
    
#         print('Accuracy of the network on the {} validation images: {} %'.format(total, 100 * correct / total)) 


In [None]:
# with torch.no_grad():
#     correct = 0
#     total = 0
#     for images, labels in test_loader:
#         images = images.to(deviceFlag)
#         labels = labels.to(deviceFlag)
#         outputs = model(images)
#         _, predicted = torch.max(outputs.data, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()
#         del images, labels, outputs

#     print('Accuracy of the network on the {} test images: {} %'.format(total, 100 * correct / total))   