## Optimisers example using Iris dataset ##

Use this script to experiment with different optimizer settings using the well known Iris dataset

Run the first code cell to do all the set up. Then you can try various optimisers (and other settings) in the code cell after that

In [None]:
#
# Get dataset
#
!wget -nv https://github.com/IS-pillar-3/datasets/raw/main/iris.csv
#
# Libraries
#
import torch
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import math
import torch.nn as nn
import torch.nn.functional as F
#
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#
# Data loader class
#
class IrisDataSet(torch.utils.data.Dataset): 
    #
    def __init__(self, data_path, test_split, vali_split, batch_size):
        #
        # Store parameters
        #
        self.test_split = test_split
        self.vali_split = test_split
        self.batch_size = batch_size
        #
        # Get the data
        #
        self.iris_df = pd.read_csv(data_path)
        self.iris_df = self.iris_df.sample(frac=1).reset_index(drop=True)
        self.no_rows = self.iris_df.shape[0]
        #
        # Recode the categorical dependent variable
        #
        le = LabelEncoder()
        self.iris_df["class"] = le.fit_transform(self.iris_df["class"])
        #
        # Split into x and y and store as default train set
        #
        self.x_train = torch.from_numpy(np.array(self.iris_df.iloc[:, 0:4], dtype=np.float32))
        self.y_train = torch.from_numpy(np.array(self.iris_df.iloc[:, 4]))
        #
        # Free memory
        #
        self.iris_df = None
        #
        # Calculate and store split sizes
        #
        self.test_size = int(np.floor(self.test_split * self.no_rows))
        self.vali_size = int(np.floor(self.vali_split * (self.no_rows - self.test_size)))
    #
    #
    def get_test(self):
        #
        # Splits out a test set
        #
        idxs = torch.randperm(self.no_rows)
        #
        x_test = torch.index_select(self.x_train, 0, idxs[0:self.test_size:])
        y_test = torch.index_select(self.y_train, 0, idxs[0:self.test_size:])
        #
        self.x_train = torch.index_select(self.x_train, 0, idxs[self.test_size:])
        self.y_train = torch.index_select(self.y_train, 0, idxs[self.test_size:])
        #
        return x_test, y_test
    #
    #
    def get_vali(self):
        #
        # Splits out a validation set
        #
        new_train_size = self.x_train.shape[0] - self.vali_size
        #
        idxs = torch.randperm(self.x_train.shape[0])
        #
        x_vali = torch.index_select(self.x_train, 0, idxs[0:self.vali_size])
        y_vali = torch.index_select(self.y_train, 0, idxs[0:self.vali_size])
        #
        self.x_train = torch.index_select(self.x_train, 0, idxs[self.vali_size:])
        self.y_train = torch.index_select(self.y_train, 0, idxs[self.vali_size:])
        #
        return x_vali, y_vali
    #
    #
    def __len__(self):
        #
        # Rows in dataframe
        #
        return self.x_train.shape[0]
    #
    #
    def __getitem__(self, batch_idx):
        #
        # Given a batch_idx return a batch from the training set
        #
        leng       = self.__len__()
        next_item  = batch_idx * self.batch_size % leng
        passes     = math.ceil(1 + (self.batch_size - (leng - next_item)) / leng)
        remaining  = self.batch_size
        #
        x = None
        y = None
        #
        for pix in range(passes):
            #
            last_item = 0
            #
            while last_item < leng and remaining > 0:
                #
                last_item  = min(leng, next_item + remaining)
                remaining -= last_item - next_item
                #
                x_ap = self.x_train[next_item:last_item]
                y_ap = self.y_train[next_item:last_item]
                #
                if x is None:
                    x = x_ap
                    y = y_ap
                else:
                    x = torch.cat((x, x_ap), axis=0)
                    y = torch.cat((y, y_ap), axis=0)
                #
                if last_item == leng:
                    next_item = 0
                else:
                    next_item = last_item
                #
            #
        #
        return x, y
    #
    #
#
# Class that defines the model structure
#
class Model(nn.Module):
    #
    # Class defining network structure
    #
    def __init__(self):
        #
        # Start parent class
        #
        super(Model, self).__init__()
        #
        # This network is (input=4, ReLU=5, Softmax=5)
        #
        # Here we have the layers that have learned parameters
        #
        # NB: default for linear layer is bias=True
        #
        self.fc1 = nn.Linear(4, 5)
        self.fc2 = nn.Linear(5, 3)
        #
    #
    #
    def forward(self, x):
        #
        # Here we have the sequence of operations. 
        #
        # Note that Softmax is handled by cross entropy loss
        #
        x = self.fc1(x)
        x = torch.sigmoid(x)
        x = self.fc2(x)
        #
        return x
    #
#
# Function to get validation loss
#
def validate(model, x_vali, y_vali):
    #
    # x_vali, y_vali = iris_loader.get_vali()
    #
    model.eval()
    #
    output = model(x_vali)  # only forward pass - NO gradients!!
    #
    loss = loss_function(output, y_vali)
    #
    return loss.data.item()
#
#

### Model training ###

Vary the optimizer (and other parameters) and see how this affects the training plot

In [None]:
#
# Train the model
# Instantiate model
#
model = Model()
#
# Define SGD optimizer
#
optimizer = torch.optim.SGD(model.parameters(), lr=0.4)
optimizer = torch.optim.SGD(model.parameters(), lr=0.4, momentum=0.9)
optimizer = torch.optim.SGD(model.parameters(), lr=0.4, momentum=0.9, nesterov=True)
#
# RMSprop optimizer
#
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.04, alpha=0.99, eps=1e-08, weight_decay=0.1,
                                momentum=0.5, centered=False)
#
#
# Define Adam optimizer
#
optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), eps=1e-08, weight_decay=0, lr=0.4)
#
# Define loss
#
loss_function = torch.nn.CrossEntropyLoss()
#
# Instantiate data loader
#
iris_loader = IrisDataSet("./iris.csv", 0.2, 0.2, int(96 / 2))
#
# Get a test set
#
#x_test, y_test = iris_loader.get_test()
#
# Get a validation set
#
#x_vali, y_vali = iris_loader.get_vali()
#
# Train function
#
def train(model, iris_loader, epoch, batches_in_epoch, report_batches):
    #
    # Train one epoch
    #
    model.train()
    #
    losses = []
    #
    for batch_number, (x, y) in enumerate(iris_loader):
        #
        if batch_number + 1 > batches_in_epoch:
            break
        #
        # Copy data to GPU if available
        #
        x = x.to(device)
        y = y.to(device)
        #
        optimizer.zero_grad() 
        #
        output = model(x)
        #
        loss = loss_function(output, y)
        #
        loss.backward()
        #
        optimizer.step()
        #
        if batch_number % report_batches == 0:
            #
            # Comment in to see the detail
            #
            #print("Epoch", epoch, ", batch_number", batch_number, ", loss.data.item()",
            #     loss.data.item())
            #
            losses.append(loss.data.item())
        #
    #
    return losses
#
# Do training
#
train_losses = []
epoch_losses = []
#
for epoch in range(500):
    #
    this_epoch = train(model, iris_loader, epoch, batches_in_epoch=2, report_batches=10)
    #
    train_losses.append(this_epoch)
    epoch_losses.append(this_epoch[-1])
#
# Plot loss function
#
plt.plot(epoch_losses, color="red")
plt.xlabel("Epoch number")
plt.ylabel("Loss")
plt.show()
#