In [3]:
import numpy as np

In [4]:
def sigmoid(z):
    '''
    Function to calculate sigmoid function of z
    '''
    return 1.0/(1+np.exp(-z))


def predict_probability(X, theta):
    '''
    Function to calculate probability of being positive class
    '''
    z = np.matmul(X, theta)
    return sigmoid(z)


def predict_class(probability):
    '''
    Function to classify as positive if probability greater or equal than threshold of 0.5
    '''
    prediction = np.where(probability>=0.5, 1, 0)
    return prediction


def loss_function(predictions, labels):
    '''
    Log loss function
    '''
    loss = -labels * np.log(predictions) - (1-labels)*np.log(1-predictions)
    return np.mean(loss)


def init_params(dims):
    '''
    Function to initialise the weight parameters to random numbers and the momentum to 0s
    '''
    theta = np.random.standard_normal(dims)
    v_t = np.zeros_like(theta)
    return theta, v_t


def plotLosses(train_losses, val_losses):
    '''
    Function to plot the training and validation losses acquired during training
    '''
    plt.figure(figsize=(10,5))
    epochs = np.arange(0,len(train_losses))
    plt.plot(np.arange(0,len(train_losses)),train_losses, color='b', label='Training')
    plt.plot(np.arange(0,len(val_losses)),val_losses, color='r', label='Validation')
    plt.xticks(np.arange(0,len(train_losses), step=round(len(train_losses)/10)))
    plt.legend()
    plt.show()
    return

In [5]:
def train_LogReg(X_train, y_train, X_val, y_val, max_epochs=200, lr=0.01, gamma=0.9):
    '''
    Function to train a logisitc regression model
    '''
    #initialise weights to random numbers, and velocity to all 0s
    theta, v_t = init_params(X_train.shape[1])

    #track model parameters
    model_dict = {
        'best_loss' : np.inf,
        'best_epoch' : 0,
        'best_weights' : 1,
        'train_losses' : [],
        'val_losses' : []
    }

    #train and val losses
    train_losses = []
    val_losses = []

    #begin training
    tik = time.time()
    for epoch in range(0,max_epochs):

        #training set probability predictions and loss calculation
        probs = predict_probability(X_train, theta)
        train_loss = loss_function(probs, y_train)
        train_losses.append(train_loss)

        #calculate gradient
        grad = np.matmul(X_train.T,(probs-y_train)) / len(X_train)

        #momentum
        v_t = gamma*v_t + lr*grad

        #update weights
        #theta -= lr*grad
        theta -= v_t

        #validation set probability predictions and loss calculation
        val_probs = predict_probability(X_val, theta)
        val_loss = loss_function(val_probs, y_val)
        val_losses.append(val_loss)

        
        #update best weights when validation loss improves
        if val_loss < model_dict['best_loss']:
            model_dict['best_loss'] = val_loss
            model_dict['best_epoch'] = epoch
            model_dict['best_weights'] = theta
            model_dict['train_losses'] = train_losses
            model_dict['val_losses'] = val_losses
        
        if epoch%round(max_epochs/10)==0:
            print("[Epoch: {}] \t Training loss: {:.6f} \t Validation loss: {:.6f}".format(epoch, train_loss, val_loss))

        #convergence criteria kicks in after epochs above 5
        if epoch>20:
            #criteria (1): validation loss less than it was 10 epochs ago
            if val_losses[-1] > val_losses[-11]:
                print('Convergence criteria (1) met: Increase in validation loss, as compared to 11 updates prior')
                return model_dict
            #criteria (2): validation loss decreased less than 0.01% from last epoch
            if ((val_losses[-2] - val_losses[-1]) / val_losses[-2]) < 0.0001:
                print('Convergence criteria (2) met: Decrease in validation loss less than 0.01%')
                return model_dict
    
    return model_dict

In [None]:
model = train_LogReg(X_train, y_train, X_val, y_val, max_epochs=200, lr=0.01, gamma=0.9)
plotLosses(model['train_losses'], model['val_losses'])