In [47]:
import numpy as np
import matplotlib.pyplot as plt
import time
from PIL import Image

import torch
from torch import nn
from torch import optim
from torchvision import datasets, transforms, models

from collections import OrderedDict

In [48]:
tr_batchsize = 32
val_test_batchsize = 16
epochs = 60
lr = 0.001

In [49]:
# By defalt, set device to the CPU
deviceFlag = torch.device('cpu')

# Default is CPU, but as long as GPU is avaliable, then use GPU
if torch.cuda.is_available():
    print(f'Found {torch.cuda.device_count()} GPUs.')
    deviceFlag = torch.device('cuda:0') # Manually pick your cuda device. By default is 'cuda:0'

print(f'Now the deivce is set to {deviceFlag}')

Found 1 GPUs.
Now the deivce is set to cuda:0


# VALIDATION FUNCTION

In [50]:
def validation(model, validate_loader, val_criterion):
    
    val_loss_running = 0
    acc = 0
    
    # a dataloader object is a generator of batches, each batch contain image & label separately
    for images, labels in iter(validate_loader):
        
        # Send the data onto choosen device
        images = images.to(deviceFlag)
        labels = labels.to(deviceFlag)
        
        output = model.forward(images)
        val_loss_running += val_criterion(output, labels).item() #*images.size(0) # .item() to get a scalar in Torch.tensor out
        
        probabilities = torch.exp(output) # as in the model we use the .LogSoftmax() output layer

        equality = (labels.data == probabilities.max(dim=1)[1])
        acc += equality.type(torch.FloatTensor).mean()
        
    return val_loss_running, acc

# TRAINING + VALIDATION

In [51]:
import math

def train_eval(model, train_data_loader, validate_loader, train_criterion, optimizer, epoches, device_flag):

    eval_itrs = len(train_data_loader)

    # first setting the device used for training
    model.to(device_flag)

    batch_size = math.ceil(len(train_data_loader.dataset) / len(train_data_loader))

    print(f'The training batchsize is {batch_size}.')
    
    # set the timer
    since = time.time()

    total_images = len(train_data_loader.dataset)

    # ! THE EPOCH LOOP !
    for e in range(epoches):
        epoch_since = time.time()
        epoch_text = f"[{e + 1}/{epoches}]"
        itrs = 0

        next_validation = eval_itrs
        
        # Set the model to the Train mode
        # Tell the model to activate its Training behavior (turn-on the dropout & BN behaviors)
        model.train()
        
        # re-initialize the running_loss to start every epoch
        training_loss_running = 0

        images_used = 0
        
        #  ! THE BATCH LOOP !
        for inputs, labels in train_data_loader:
            iter_text = "\r{} {} / {} - {:.2f}%.".format(epoch_text,
                                                         images_used, total_images,
                                                         (images_used/total_images)*100)
            print("\r{} {} / {} - {:.2f}%."
                  .format(epoch_text,
                          images_used, total_images,
                          (images_used/total_images)*100), end="")
            itrs += 1
            # .to() method return a copy of the tensors on the targeted device
            inputs = inputs.to(device_flag)
            labels = labels.to(device_flag)
            
            # Clean the stored grads computed in the last iteration
            optimizer.zero_grad()
            
            # Forward Pass
            # As model has been shipped to the targeted device, so the output is on that device too
            outputs = model(inputs)
            
            # Compute Loss
            train_loss = train_criterion(outputs, labels)
            
            # BackProp to compute the grads (stored in each tensor.grad() attributes) along the way
            train_loss.backward()
            
            # Optimizer/Update params
            optimizer.step()

            #numeric ops, take the scalar out of the tensor by calling .item()
            training_loss_running += train_loss.item()
            
            # ----------- Perform Validation (Evaluation) Every eval_itrs iterations ----------

            if itrs >= next_validation:
                # Move the next validation check
                next_validation = itrs + eval_itrs

                # Set the model to the Eval mode
                model.eval()

                # Turn-off gradient for validation to save memory & computation
                with torch.no_grad():
                    valid_text = f"\r{epoch_text} Validating..."
                    print(f"\r{valid_text}{' ' * (len(iter_text) - len(valid_text))}", end="")

                    validation_loss, val_acc = validation(model, validate_loader, train_criterion)

                    batch_text = f"{epoch_text} Batch: {itrs}"
                    print("\r{1}{2}\n{0} | Train Loss: {3:.4f}\n{0} | Valid Loss: {4:.4f}\n{0} | Valid Accepted: {5:.10f}%\n".format(
                        epoch_text, batch_text,
                        " " * (len(valid_text) - len(batch_text)),
                        training_loss_running / itrs,
                        validation_loss,
                        val_acc*100
                    ))

                model.train()

            images_used += inputs.size(0)
                
        end = time.time()

        print('\r{} Epoch took {:.4f} sec ({:.4f} total so far), Average Loss of Batches: {:.4f}\n\n'.format(
            epoch_text,
            end - epoch_since,
            end - since,
            training_loss_running / itrs
        ))

# TEST FUNCTION

In [52]:
def test_acc(model, test_loader, device_flag):

    # for testing, it is actually do validation on the test set
    model.eval()

    model.to(device_flag)

    since = time.time()

    total_check = len(test_loader.dataset)

    # In .eval() mode, set the context manager to turn-off grads
    with torch.no_grad():
        acc = 0

        checked = 0

        # iter() gives images and labels in batches
        for inputs, labels in test_loader:
            print("\r{} / {} - {:.2f}%.".format(checked, total_check, (checked / total_check) * 100), end="")
            
            inputs = inputs.to(device_flag)
            labels = labels.to(device_flag)

            # Do a forward pass
            output = model(inputs)
            # convert the log likelihood to scalar

            _, predicted = torch.max(output.data, 1)

            acc += predicted.type(torch.FloatTensor).mean().item()

            checked += labels.size(0)

        end = time.time()
        elapsed = end - since
        print("\r{} / {} predicted correctly. The accuracy of the model is {:.4f}%. ({:.2f}s taken)".format(acc, checked, (acc / checked) * 100, elapsed))

# Checkpoint Creation

In [53]:
def save_checkpoint(model, optimizer, trainingdataset, saved_pth):
    # set a new attr to the model object, which holds the class_to_idx conversion
    model.class_to_idx = trainingdataset.class_to_idx
    
    # Chkpt is a dictionary, can be modified to hold anything you need in the furture
    chkpt = {
    'arch': 'vgg19',
    'class_to_idx': model.class_to_idx,
    'model_state_dict': model.state_dict(),
   # 'optimizer_state_dict': optimizer.state_dict()
    }
    
    # Save with torch.save
    torch.save(chkpt, saved_pth)

# Checkpoint Loading

In [54]:
def load_checkpoint(chkpt_path):
    
    chkpt = torch.load(chkpt_path)
    
    # After loading, the elements stored in the chkpt can be accesses as in a dict with key & value
    if chkpt['arch'] == 'vgg19':
        # Re-initial a new network arch
        model = models.vgg19(pretrained = True)
        
        # Turn-off the .requires_grad attributes for all params in the feature extraction head
        for params in model.parameters():
            params.requires_grad = False
    
    else:
        print('------- Wrong Network Architecture is being used----------')
    
    model.class_to_idx = chkpt['class_to_idx']
    
    # Re-inital a new empty classisifer
    
    classifier = nn.Sequential(OrderedDict([
        ('fc1', nn.Linear(25088, 4096)),
        ('relu', nn.ReLU()),
        ('drop', nn.Dropout(p = 0.5)),
        ('fc2', nn.Linear(4096, 102)),
        ('output', nn.LogSoftmax(dim = 1))
    ]))
    
    # Attach the classifer head
    model.classifier = classifier
    
    # Load the params stored in the chkpt into the newly constructed empty model
    # model.load_state_dict() is a built-in method of the models object
    model.load_state_dict(chkpt['model_state_dict'])
    
    return model

# Dataset Loading

In [55]:
training_transforms = transforms.Compose([
    transforms.RandomRotation(90),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

validation_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], # RGB mean & std estied on ImageNet
                         [0.229, 0.224, 0.225])
])

testing_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], # RGB mean & std estied on ImageNet
                         [0.229, 0.224, 0.225])
])

# Load the datasets with torchvision.datasets.ImageFolder object
train_dataset = datasets.Flowers102(root = './dataset', split = 'train', transform = training_transforms, download = True)
valid_dataset = datasets.Flowers102(root = './dataset', split = 'val', transform = validation_transforms, download = True)
test_dataset = datasets.Flowers102(root = './dataset', split = 'test', transform = testing_transforms, download = True)

# Instantiate loader objects to facilitate processing


# Define the torch.utils.data.DataLoader() object with the ImageFolder object
# Dataloader is a generator to read from ImageFolder and generate them into batch-by-batch
# Only shuffle during trianing, validation and testing no shuffles
# the batchsize for training and tesitng no need to be the same
train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = tr_batchsize,
                                           shuffle = True)

validate_loader = torch.utils.data.DataLoader(dataset = valid_dataset,
                                           batch_size = val_test_batchsize)


test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                           batch_size = val_test_batchsize)

In [56]:
class VGG16(nn.Module):
    def __init__(self, num_classes=102):
        super(VGG16, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            # nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
            # nn.BatchNorm2d(32),
            nn.ReLU(), 
            nn.MaxPool2d(kernel_size = 2, stride = 2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            # nn.BatchNorm2d(32), 
            nn.ReLU(), 
            nn.MaxPool2d(kernel_size = 2, stride = 2),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU() 
            # nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            # # nn.BatchNorm2d(64),
            # nn.ReLU(),
            # nn.MaxPool2d(kernel_size = 2, stride = 2))
        # self.layer2 = nn.Sequential(
            
        # self.layer3 = nn.Sequential(
            
        # self.layer4 = nn.Sequential(
            
        # self.layer5 = nn.Sequential(
        #     nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
        #     # nn.BatchNorm2d(128),
        #     nn.ReLU())
        # self.layer6 = nn.Sequential(
        #     nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
        #     # nn.BatchNorm2d(128),
        #     nn.ReLU())
        # self.layer7 = nn.Sequential(
        #     nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
        #     # nn.BatchNorm2d(256),
        #     nn.ReLU(),
        #     nn.MaxPool2d(kernel_size = 2, stride = 2))
        # self.layer8 = nn.Sequential(
        #     nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
        #     # nn.BatchNorm2d(512),
        #     nn.ReLU())
        # self.layer9 = nn.Sequential(
        #     nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
        #     # nn.BatchNorm2d(512),
        #     nn.ReLU())
        # self.layer10 = nn.Sequential(
        #     nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
        #     # nn.BatchNorm2d(512),
        #     nn.ReLU(),
        #     nn.MaxPool2d(kernel_size = 2, stride = 2))
        # self.layer11 = nn.Sequential(
        #     nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
        #     # nn.BatchNorm2d(512),
        #     nn.ReLU())
        # self.layer12 = nn.Sequential(
        #     nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
        #     # nn.BatchNorm2d(512),
        #     nn.ReLU())
        # self.layer13 = nn.Sequential(
        #     nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
        #     # nn.BatchNorm2d(512),
        #     nn.ReLU(),
        #     nn.MaxPool2d(kernel_size = 2, stride = 2))

        self.fc = nn.Sequential(
            nn.Flatten(),
            # nn.Dropout(0.5),
            # first linear must be image size * image size * last Conv2d out channel
            nn.Linear(7*7*4096, 1024),
            nn.ReLU(),
            # nn.Dropout(0.5),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, num_classes),
            nn.LogSoftmax(dim=1))
        # self.fc1 = nn.Sequential(

        # self.fc2= nn.Sequential(

        
    def forward(self, x):
        out = self.layer1(x)
        # out = self.layer2(out)
        # out = self.layer3(out)
        # out = self.layer4(out)
        # out = self.layer5(out)
        # out = self.layer6(out)
        # out = self.layer7(out)
        # out = self.layer8(out)
        # out = self.layer9(out)
        # out = self.layer10(out)
        # out = self.layer11(out)
        # out = self.layer12(out)
        # out = self.layer13(out)

        out = self.fc(out)
        # out = self.fc1(out)
        # out = self.fc2(out)
        return out

In [58]:
model = VGG16()
model.to(deviceFlag)
model

VGG16(
  (layer1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=200704, out_features=1024, bias=True)
    (2): ReLU()
    (3): Linear(in_features=1024, out_features=512, bias=True)
    (4): ReLU()
    (5): Linear(in_features=512, out_features=102, bias=True)
    (6): LogSoftmax(dim=1)
  )
)

In [59]:
# for params in model.parameters():
#     params.requries_grad = False

# Define Loss Function and Optimizer

In [60]:
# Negative Log Likelihood Loss
# criterion = nn.NLLLoss()

# Cross Entropy Loss
criterion = nn.CrossEntropyLoss()

# optimizer 1
optimizer = optim.Adam(model.parameters(), lr = lr)

# optimizer 2
# optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay = 0.005, momentum = 0.9)

In [None]:
def train_classifier():
      steps = 0
      print_every = 16

      model.to(deviceFlag)

      for e in range(epochs):
        
          model.train()
    
          running_loss = 0
    
          for images, labels in iter(train_loader):
        
              steps += 1
        
              images, labels = images.to(deviceFlag), labels.to(deviceFlag)
        
              optimizer.zero_grad()
        
              output = model.forward(images)
              loss = criterion(output, labels)
              loss.backward()
              optimizer.step()
        
              running_loss += loss.item()
        
              if steps % print_every == 0:
                
                  model.eval()
                
                  # Turn off gradients for validation, saves memory and computations
                  with torch.no_grad():
                      validation_loss, accuracy = validation(model, validate_loader, criterion)
            
                  print("Epoch: {}/{}.. ".format(e+1, epochs),
                        "Training Loss: {:.3f}... ".format(running_loss/print_every),
                        "Validation Loss: {:.3f}... ".format(validation_loss/len(validate_loader)),
                        "Validation Accuracy: {:.3f}".format(accuracy/len(validate_loader)))
            
                  running_loss = 0
                  model.train()
train_classifier()

Epoch: 1/60..  Training Loss: 5.121...  Validation Loss: 4.625...  Validation Accuracy: 0.010
Epoch: 1/60..  Training Loss: 4.627...  Validation Loss: 4.624...  Validation Accuracy: 0.010
Epoch: 2/60..  Training Loss: 4.610...  Validation Loss: 4.562...  Validation Accuracy: 0.011
Epoch: 2/60..  Training Loss: 4.531...  Validation Loss: 4.453...  Validation Accuracy: 0.022
Epoch: 3/60..  Training Loss: 4.430...  Validation Loss: 4.380...  Validation Accuracy: 0.019
Epoch: 3/60..  Training Loss: 4.361...  Validation Loss: 4.225...  Validation Accuracy: 0.027
Epoch: 4/60..  Training Loss: 4.256...  Validation Loss: 4.154...  Validation Accuracy: 0.037
Epoch: 4/60..  Training Loss: 4.184...  Validation Loss: 4.052...  Validation Accuracy: 0.041
Epoch: 5/60..  Training Loss: 4.046...  Validation Loss: 3.943...  Validation Accuracy: 0.047
Epoch: 5/60..  Training Loss: 4.093...  Validation Loss: 4.042...  Validation Accuracy: 0.035
Epoch: 6/60..  Training Loss: 3.983...  Validation Loss: 4.0

In [None]:
# train_eval(model, train_loader, validate_loader, criterion, optimizer, epochs, deviceFlag)

In [None]:
# test_acc(model, test_loader, deviceFlag)

In [None]:
def test_accuracy(model, test_loader):

    # Do validation on the test set
    model.eval()
    model.to(deviceFlag)

    with torch.no_grad():
    
        accuracy = 0
    
        for images, labels in iter(test_loader):
    
            images, labels = images.to(deviceFlag), labels.to(deviceFlag)
    
            output = model.forward(images)

            probabilities = torch.exp(output)
        
            equality = (labels.data == probabilities.max(dim=1)[1])
        
            accuracy += equality.type(torch.FloatTensor).mean()
        
        print("Test Accuracy: {}".format(accuracy/len(test_loader)))    
        
        
test_accuracy(model, test_loader)


In [None]:
# torch.save(model.state_dict(), "2023-05-06--03-00-model.pt")

In [None]:
# total_step = len(train_loader)

# for epoch in range(epochs):
#     for i, (images, labels) in enumerate(train_loader):  
#         # Move tensors to the configured device
#         images = images.to(deviceFlag)
#         labels = labels.to(deviceFlag)
        
#         # Forward pass
#         outputs = model(images)
#         loss = criterion(outputs, labels)
        
#         # Backward and optimize
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()

#     print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
#                    .format(epoch+1, epochs, i+1, total_step, loss.item()))
            
#     # Validation
#     with torch.no_grad():
#         correct = 0
#         total = 0
#         for images, labels in validate_loader:
#             images = images.to(deviceFlag)
#             labels = labels.to(deviceFlag)
#             outputs = model(images)
#             _, predicted = torch.max(outputs.data, 1)
#             total += labels.size(0)
#             correct += (predicted == labels).sum().item()
#             del images, labels, outputs
    
#         print('Accuracy of the network on the {} validation images: {} %'.format(total, 100 * correct / total)) 


In [None]:
# with torch.no_grad():
#     correct = 0
#     total = 0
#     for images, labels in test_loader:
#         images = images.to(deviceFlag)
#         labels = labels.to(deviceFlag)
#         outputs = model(images)
#         _, predicted = torch.max(outputs.data, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()
#         del images, labels, outputs

#     print('Accuracy of the network on the {} test images: {} %'.format(total, 100 * correct / total))   