#06 - Training Loop

Once the model environment and data collection functions were set up, I implemented a loop to train my network on the normalized wine quality features. This involved cycling through training data for multiple epochs, performing gradient descent at every step. In all iterations of the loop, I made sure to measure my desired metrics for both accuracy and loss curvature, and to log these values into my data frame. This way, I was able to evaluate the progressive improvement of my model with time, seeing how both activation functions contributed to training speed.

In [None]:
#Importing all existing network configurations
import sys, pathlib
sys.path.append(str(pathlib.Path.cwd().parent / "src"))
from utils import make_mlp, set_seed, plot_loss

In [None]:
def train_and_log(activation_name, activation_fn):
    #Re-establishing key hyperparameters
    factor = 0.95
    model = SimpleNeuralNetwork(activation=activation_fn)
    optimizer = optim.Adam(model.parameters(), lr=5e-4)
    criterion = nn.BCELoss()

    logs = []
    train_loss = None

    #Training over multiple epochs
    for epoch in range(num_epochs):
        model.train()
        for step, (batch_x, batch_y) in enumerate(dataloader):
            #Calculating Lipschitz metrics at the start of each training step
            optimizer.zero_grad()

            lower = lipschitz_lower_bound(model, batch_x)
            upper = lipschitz_upper_bound(model, 0.25)

            #Calculating loss and performing backpropagation
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()

            optimizer.step()

            #Calculating largest Hessian eigenvalue
            lam_max = max_hessian_eigval(model, criterion, batch_x.detach(), batch_y.detach())

            if train_loss is None:
                train_loss = loss.detach().item()
            else:
                train_loss = factor * train_loss + (1 - factor) * loss.detach().item()

            #Logging metrics per step on data frame
            logs.append({
                'epoch': epoch,
                'step': step,
                'activation': activation_name,
                'train_loss': np.nan,
                'val_loss': np.nan,
                'f1_score': np.nan,
                'lipschitz_upper': upper,
                'lipschitz_lower': lower,
                'max_hessian_eigval': lam_max
            })

        #Running model in evaluation mode against validation data
        model.eval()
        with torch.no_grad():
            val_labels = []
            val_predictions = []
            val_outputs = []
            val_losses = []

            #Calculating loss metrics for the validation set
            for val_x, val_y in val_dataloader:
                outputs = model(val_x)
                loss = criterion(outputs, val_y)
                predictions = (outputs >= 0.5).int()
                val_labels.append(val_y)
                val_predictions.append(predictions)
                val_outputs.append(outputs)
                val_losses.append(loss)

            val_labels = torch.cat(val_labels, dim=0)
            val_predictions = torch.cat(val_predictions, dim=0)
            val_outputs = torch.cat(val_outputs, dim=0)
            val_loss = torch.stack(val_losses).mean().item()

            best_f1, best_threshold = find_best_f1(val_outputs, val_labels)

        #Condensing step-level logs into an epoch-level log
        logs.append({
            'epoch': epoch,
            'step': np.nan,
            'activation': activation_name,
            'train_loss': train_loss,
            'val_loss': val_loss,
            'f1_score': best_f1,
            'lipschitz_upper': np.nan,
            'lipschitz_lower': np.nan,
            'max_hessian_eigval': np.nan
        })

        print(f'Activation: {activation_name}, Epoch {epoch}: train_loss={train_loss:.4f}, val_loss={val_loss:.4f}, val_f1={best_f1:.3f}, threshold={best_threshold}')

    return logs

#Training and logging for both activation functions
all_logs = []

for act_name, act_fn in [('relu', F.relu), ('sigmoid', torch.sigmoid)]:
    logs = train_and_log(act_name, act_fn)
    all_logs.extend(logs)

#Creating a combined data frame after all runs
df_all = pd.DataFrame(all_logs)

print(df_all.head())

Activation: relu, Epoch 0: train_loss=0.5689, val_loss=0.5453, val_f1=0.346, threshold=0.3
Activation: relu, Epoch 1: train_loss=0.4739, val_loss=0.4890, val_f1=0.409, threshold=0.2
Activation: relu, Epoch 2: train_loss=0.4374, val_loss=0.4602, val_f1=0.456, threshold=0.2
Activation: relu, Epoch 3: train_loss=0.4166, val_loss=0.4408, val_f1=0.490, threshold=0.2
Activation: relu, Epoch 4: train_loss=0.4072, val_loss=0.4283, val_f1=0.518, threshold=0.2
Activation: relu, Epoch 5: train_loss=0.3954, val_loss=0.4182, val_f1=0.528, threshold=0.2
Activation: relu, Epoch 6: train_loss=0.3816, val_loss=0.4119, val_f1=0.531, threshold=0.2
Activation: relu, Epoch 7: train_loss=0.3727, val_loss=0.4087, val_f1=0.533, threshold=0.2
Activation: relu, Epoch 8: train_loss=0.3803, val_loss=0.4049, val_f1=0.536, threshold=0.2
Activation: relu, Epoch 9: train_loss=0.3861, val_loss=0.4039, val_f1=0.524, threshold=0.2
Activation: relu, Epoch 10: train_loss=0.3664, val_loss=0.4029, val_f1=0.519, threshold=0.