In [None]:
# %% Deep learning - Section 9.80
#    Code challenge 7: effects of mini-batch size

#    1) Copy code from Sec_09_078_batching_regularisation.ipynb file
#    2) Run a mini-batch size parametric experiment by setting the batch size to 2^n, n=1,..6
#    3) Set the learning rate to 0.001
#    4) Store train and test accuracies over epoch, for each batch size
#    5) Plot the accuracies

# This code pertains a deep learning course provided by Mike X. Cohen on Udemy:
#   > https://www.udemy.com/course/deeplearning_x
# The "base" code in this repository is adapted (with very minor modifications)
# from code developed by the course instructor (Mike X. Cohen), while the
# "exercises" and the "code challenges" contain more original solutions and
# creative input from my side. If you are interested in DL (and if you are
# reading this statement, chances are that you are), go check out the course, it
# is singularly good.


In [None]:
# %% Libraries and modules
import numpy               as np
import matplotlib.pyplot   as plt
import torch
import torch.nn            as nn
import seaborn             as sns
import copy
import torch.nn.functional as F

from torch.utils.data                 import DataLoader,TensorDataset
from sklearn.model_selection          import train_test_split
from google.colab                     import files
from torchsummary                     import summary
from IPython                          import display
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')


In [None]:
# %% Import Iris dataset

iris = sns.load_dataset('iris')

# Convert from pandas df to tensor
data = torch.tensor(iris[iris.columns[0:4]].values).float()

# Species to numbers
labels = torch.zeros(len(data),dtype=torch.long)
labels[iris.species=='setosa']     = 0
labels[iris.species=='versicolor'] = 1
labels[iris.species=='virginica']  = 2


In [None]:
# Plotting

iris.plot(marker='o',linestyle='none',figsize=(12,6))

plt.xlabel('Sample number')
plt.ylabel('Value')
plt.title('Iris dataset features')

plt.savefig('figure42_code_challenge_7.png')

plt.show()

files.download('figure42_code_challenge_7.png')


In [None]:
# %% Split into train and test data

# Split with scikitlearn
train_data,test_data,train_labels,test_labels = train_test_split(data,labels,test_size=0.2)

# Convert into PyTorch datasets
train_data = TensorDataset(train_data,train_labels)
test_data  = TensorDataset(test_data,test_labels)

# Convert into DataLoader objects (test data are not partitioned, we don't regularise in testing)
test_loader = DataLoader(test_data,batch_size=test_data.tensors[0].shape[0])
# > the train_loader is moved inside the train_model() function to allow a parametric test of the batch size


In [None]:
# %% Function to generate the model

def gen_model():

    # Architecture
    ANN = nn.Sequential(
             nn.Linear(4,64),
             nn.ReLU(),
             nn.Linear(64,64),
             nn.ReLU(),
             nn.Linear(64,3))

    # Loss function
    loss_fun = nn.CrossEntropyLoss()

    # Optimizer (small lr for illustration purpose)
    optimizer = torch.optim.SGD(ANN.parameters(),lr=0.0005)

    return ANN,loss_fun,optimizer


In [None]:
# %% Function to train the model

# Parameters
num_epochs = 500

def train_model():

    # Initialise accuracies
    train_acc = []
    test_acc  = []
    losses    = []

    # Loop over epochs
    for epoch_i in range(num_epochs):

        batch_acc  = []
        batch_loss = []

        for X,y in train_loader:

            # Forward propagation and loss
            yHat = ANN(X)
            loss = loss_fun(yHat,y)

            # Only now do backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Batch training accuracy
            batch_acc.append( 100*torch.mean((torch.argmax(yHat,axis=1)==y).float()).item() )
            batch_loss.append(loss.item())

        # Average accuracy from batch
        train_acc.append(np.mean(batch_acc))
        losses.append(np.mean(batch_loss))

        # Test accuracy
        X,y = next(iter(test_loader))
        pred_labels = torch.argmax(ANN(X),axis=1)
        test_acc.append(  100*torch.mean((pred_labels==y).float()).item() )

    # Function output
    return train_acc,test_acc,losses


In [None]:
# %% Parametric experiment over mini-batches size

batch_size_exp = np.arange(1,7)
test_loader  = DataLoader(test_data,batch_size=test_data.tensors[0].shape[0])

train_acc   = np.zeros((num_epochs,len(batch_size_exp)))
test_acc    = np.zeros((num_epochs,len(batch_size_exp)))
losses      = np.zeros((num_epochs,len(batch_size_exp)))

for i,exp_i in enumerate(batch_size_exp):

        batch_size   = int(2**exp_i)
        train_loader = DataLoader(train_data,batch_size=batch_size,shuffle=True,drop_last=True)

        ANN,loss_fun,optimizer = gen_model()
        train_acc[:,i],test_acc[:,i],losses[:,i] = train_model()


In [None]:
# %% Functions for 1D smoothing filter

# Improved for edge effects - adaptive window
def smooth_adaptive(x,k):
    smoothed = np.zeros_like(x)
    half_k   = k // 2

    for i in range(len(x)):
        start       = max(0, i-half_k)
        end         = min(len(x), i+half_k + 1)
        smoothed[i] = np.mean(x[start:end])

    return smoothed


In [None]:
# %% Plotting

fig,ax = plt.subplots(1,2,figsize=(17,7))

cmaps = plt.cm.plasma(np.linspace(.1,.9,len(batch_size_exp)))
for i in range(len(batch_size_exp)):
    ax[0].plot(smooth_adaptive(train_acc[:,i],20),color=cmaps[i])
    ax[1].plot(smooth_adaptive(test_acc[:,i],20),color=cmaps[i])

ax[0].set_title('Train accuracy')
ax[1].set_title('Test accuracy')

# Make the legend easier to read
leglabels = [2**int(i) for i in batch_size_exp]

# Common features
for i in range(2):
    ax[i].legend(leglabels)
    ax[i].set_xlabel('Epoch')
    ax[i].set_ylabel('Accuracy (%)')
    ax[i].set_ylim([50,101])
    ax[i].grid()

plt.savefig('figure43_code_challenge_7.png')

plt.show()

files.download('figure43_code_challenge_7.png')


In [None]:
# %% Exercise 1
#    Why are the minibatch sizes specified in powers of 2? That's partly because DL is developed by math/computer nerds,
#    but it's also partly an attempt to optimize computation speed, because computer memory comes in powers of two.
#    But 2**N is not a specific requirement. Adjust the code to use batch sizes corresponding to six linearly spaced
#    integers between 2 and 50.

# Easily done by changing the right variable

# %% Modified parametric experiment over mini-batches size

batch_size_exp = np.linspace(2,50,6,dtype=int)
test_loader    = DataLoader(test_data,batch_size=test_data.tensors[0].shape[0])

train_acc   = np.zeros((num_epochs,len(batch_size_exp)))
test_acc    = np.zeros((num_epochs,len(batch_size_exp)))
losses      = np.zeros((num_epochs,len(batch_size_exp)))

for i,val in enumerate(batch_size_exp):

        batch_size   = int(val)
        train_loader = DataLoader(train_data,batch_size=batch_size,shuffle=True,drop_last=True)

        ANN,loss_fun,optimizer = gen_model()
        train_acc[:,i],test_acc[:,i],losses[:,i] = train_model()


In [None]:
# %% Exercise 2
#    Why did I ask you to set the learning rate to .001? Try this experiment again using higher and lower learning rates.
#    What do you conclude about batch sizes and learning rate?

# Assuming a constant number of epochs of 500, varying the learning rate (e.g., to 0.01 and 0.0005) makes of course
# the model accuracy collapse for small lr vales, however, there seems to be a complex interaction with the batch
# size; with a larger lr, the learning curve is relatively stable (except for very large batches), while for a
# smaller lr, the learning curve is highly dependent on the batch size. However, one must also take into account
# that the Iris dataset is quite omogenous (i.e., small batches work better)
