## Optimisers example using Iris dataset ##

Use this script to experiment with different optimizer settings using the well known Iris dataset

Run the first code cell to do all the set up. Then you can try various optimisers (and other settings) in the code cell after that

In [None]:
#
# GET DATA & LIBRARIES
#
# Get dataset
#
!wget -nv https://github.com/IS-pillar-3/datasets/raw/main/iris.csv
#
# Libraries
#
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np
import math
import random
from datetime import datetime
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
#
# GPU
#
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#
# Make seed quite random
#
random.seed(datetime.now().microsecond)
#
# DATA SET CLASS
#
class IrisDataset(torch.utils.data.Dataset):
    #
    def __init__(self, data_path):
        #
        # Get the data, shuffle it and encode the classes
        #
        self.iris_df = pd.read_csv(data_path)
        self.iris_df = self.iris_df.sample(frac=1).reset_index(drop=True)
        #
        le = LabelEncoder()
        self.iris_df["class"] = le.fit_transform(self.iris_df["class"])
    #
    #
    def __len__(self):
        #
        # Rows in dataframe
        #
        return self.iris_df.shape[0]
    #
    #
    def __getitem__(self, idx):
        #
        # Return row
        #
        x = np.array(self.iris_df.iloc[idx, 0:4], dtype=np.float32)
        y = np.array(self.iris_df.iloc[idx, 4])
        #
        return torch.from_numpy(x), torch.from_numpy(y)
        #
    #
#
#
# Test cases
#
iris_dset = IrisDataset("./iris.csv")     
#
print(iris_dset.__len__())
#
x, y = iris_dset.__getitem__(0)
print(x, y)
#
#
x, y = iris_dset.__getitem__(12)
print(x, y)
#
# DATA LOADERS
#
# Creating data loaders for training, test and validation split
#
# Thanks to: https://stackoverflow.com/questions/50544730/
#            how-do-i-split-a-custom-dataset-into-training-and-test-datasets
#
# vali split is portion of train after split from test
#
test_split = 0.2
vali_split = 0.2
#
no_rows = len(iris_dset)
idxs    = list(range(no_rows))
#
# Shuffle and then split indices
#
np.random.shuffle(idxs)
#
test_size  = int(np.floor(test_split * no_rows))
train_size = no_rows - test_size
temp_rows  = idxs[0:train_size]
test_rows  = idxs[train_size:]
#
vali_size  = int(np.floor(vali_split * train_size))
train_size = train_size - vali_size
train_rows = temp_rows[0:train_size]
vali_rows  = temp_rows[train_size:]
#
print(train_size, test_size, vali_size)
#
# Set up samplers and loaders
#
train_sampler     = torch.utils.data.SubsetRandomSampler(train_rows)
iris_train_loader = torch.utils.data.DataLoader(iris_dset, batch_size=10, sampler=train_sampler)
#
batch_nos = []
for batch_number, (x, y) in enumerate(iris_train_loader):
        batch_nos.append(batch_number)
#
print("Train batches")
print(batch_nos)
#
if test_size > 0:
    test_sampler     = torch.utils.data.SubsetRandomSampler(test_rows)
    iris_test_loader = torch.utils.data.DataLoader(iris_dset, batch_size=test_size, sampler=test_sampler)
    #
    batch_nos = []
    for batch_number, (x_test, y_test) in enumerate(iris_test_loader):
        batch_nos.append(batch_number)
    #
    print("Test batches")
    print(batch_nos)
#
if vali_size > 0:
    vali_sampler      = torch.utils.data.SubsetRandomSampler(vali_rows)
    iris_vali_loader  = torch.utils.data.DataLoader(iris_dset, batch_size=vali_size, sampler=vali_sampler)
    #
    batch_nos = []
    for batch_number, (x_vali, y_vali) in enumerate(iris_vali_loader):
        batch_nos.append(batch_number)
    #
    print("Vali batches")
    print(batch_nos)
#
# MODEL
#
# Class that defines the model structure
#
class Model(nn.Module):
    #
    # Class defining network structure
    #
    def __init__(self):
        #
        # Start parent class
        #
        super(Model, self).__init__()
        #
        # This network is (input=4, ReLU=5, Softmax=5)
        #
        # Here we have the layers that have learned parameters
        #
        # NB: default for linear layer is bias=True
        #
        self.fc1 = nn.Linear(4, 5)
        self.fc2 = nn.Linear(5, 3)
        #
    #
    #
    def forward(self, x):
        #
        # Here we have the sequence of operations. 
        #
        # Note that Softmax is handled by cross entropy loss
        #
        x = self.fc1(x)
        x = torch.sigmoid(x)
        x = self.fc2(x)
        #
        return x
    #
#
# Instantiate the model
#
model = Model()
#

### Model training ###

Vary the optimizer (and other parameters) and see how this affects the training and validation plot

In [None]:
#
# Train the model
#
# Define optimizer and loss functions
#
#
# Define SGD optimizer
#
optimizer = torch.optim.SGD(model.parameters(), lr=0.05)
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9)
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, nesterov=True)
#
# RMSprop optimizer
#
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.005, alpha=0.99, eps=1e-08, weight_decay=0,
                                momentum=0.9, centered=False)
#
# Define Adam optimizer
#
ooptimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), eps=1e-08, weight_decay=0, lr=0.005)
#
# Define loss function
#
loss_function = torch.nn.CrossEntropyLoss()
#
# Function to get validation loss  
#
def validate(model, x_vali, y_vali):
    #
    # x_vali, y_vali = iris_loader.get_vali()
    #
    model.eval()
    #
    output = model(x_vali)  # only forward pass - NO gradients!!
    #
    loss = loss_function(output, y_vali)
    #
    return loss.data.item()
#
#
# Train function
#
def train(model, iris_train_loader, epoch, verbose, report_batches):
    #
    # Train one epoch
    #
    model.train()
    #
    losses = []
    #
    for batch_number, (x, y) in enumerate(iris_train_loader):
        #
        # Copy data to GPU if available
        #
        x = x.to(device)
        y = y.to(device)
        #
        optimizer.zero_grad() 
        #
        output = model(x)
        #
        loss = loss_function(output, y)
        #
        loss.backward()
        #
        optimizer.step()
        #
        if batch_number % report_batches == 0:
            if verbose:
                print("Epoch", epoch, ", batch_number", batch_number, ", loss.data.item()",
                      loss.data.item())
            #
            losses.append(loss.data.item())
        #
    #
    return losses
#
# Run the epochs
#
epoch_losses = []
vali_losses  = []
#
for epoch in range(500):
    epoch_losses.append(train(model, iris_train_loader, epoch, False, 10)[-1])
    #
    vali_losses.append(validate(model, x_vali, y_vali))
#
# Plot loss functions
#
plt.plot(epoch_losses, color="red")
plt.plot(vali_losses, color="blue")
plt.xlabel("Epoch number")
plt.ylabel("Loss")
#
red_patch  = mpatches.Patch(color='red',  label="train")
blue_patch = mpatches.Patch(color='blue', label="vali")
plt.legend(handles=[red_patch, blue_patch], loc="upper left")
#
plt.show()
#