In [None]:
################################################################################
# Description: 
# Trainer for ArchOneCNN, performing k-fold validation and undersampling of the
# training set. 
#  
# Cell 1: 
# Set up of model hyperparameters and dataset. 
# Cell 2: 
# K-fold training procedure. 
# Cell 3: 
# Testing of model on partitioned test set.  
# Cell 4: 
# Evaluation of results. 
# 
################################################################################
from arch1_cnn import *
from arch1_cnn import ArchOneCNN

# Constants 
CATEGORIES = 14                    # Number of categories in classification 
MODEL_NAME = "Architecture-1 CNN"  # Model name for graphing 
# Setup: initialize the hyperparameters/variables
num_epochs = 5              # Number of full passes through the dataset
batch_size = 16             # Number of samples in each minibatch
learning_rate = 0.0001  
seed = np.random.seed(1)    # Seed the random number generator for reproducibility
p_test = 0.2                # Percent of the overall dataset to reserve for testing

# Training Variables 
N = 140                     # Number of minibatchs before storing metrics / performing validation (getting data point)
early_stop = True           # Perform early stopping if increase on validation set loss 
early_stop_evals = 7        # How many consecutive increases in loss before ending training
k = 2                       # Number of folds for validation 

# Convert to Tensor - you can later add other transformations, such as Scaling here
transform = transforms.Compose([transforms.Resize([512,512]), transforms.ToTensor(), ])

# Check if your system supports CUDA
use_cuda = torch.cuda.is_available()

# Setup GPU optimization if CUDA is supported
if use_cuda:
    computing_device = torch.device("cuda")
    extras = {"num_workers": 1, "pin_memory": True}
    print("CUDA is supported")
else: # Otherwise, train on the CPU
    computing_device = torch.device("cpu")
    extras = False
    print("CUDA NOT supported")

# Setup the k-validation sets and test set, addresses class imbalance by training set manipulation  
k_loader, test_loader = create_k_loaders(batch_size, seed, transform=transform, 
                                                             k=k, p_test=p_test,
                                                             shuffle=True, show_sample=False, 
                                                             extras=extras)

# Instantiate an ArchOneCNN to run on the GPU or CPU based on CUDA support
model = ArchOneCNN()
model = model.to(computing_device)
print("Model on CUDA?", next(model.parameters()).is_cuda)

# Define the loss criterion and instantiate the gradient descent optimizer
criterion = nn.BCELoss() # loss criteria are defined in the torch.nn package

#TODO: Instantiate the gradient descent optimizer - use Adam optimizer with default parameters
optimizer = optim.Adam(model.parameters(), lr=learning_rate) # optimizers are defined in the torch.optim package

In [None]:
# Training 

# Holds training accuracy for k-sets, loss averaged over last N minibatches
training_acc = [[] for i in range(k)]
training_loss = [[] for i in range(k)]
# Hold validation accuracy, loss for total set after each N minibatches
validation_acc = [[] for i in range(k)]
validation_loss = [[] for i in range(k)]
# Holds number of data points collected 
num_data_points = [0 for i in range(k)]

# Use kset as validation for each k-fold 
for kset in range(k): 
    isDone = False     # Set by early stop 
    loss_increase = 0  # Tracks number of validations where loss increases

    # Begin training procedure 
    for epoch in range(num_epochs):
        if isDone:
            break
        N_minibatch_acc = N_minibatch_loss = 0.0    
        # Iterate over each loader in the kset, using as training set
        for t_number, train_loader in enumerate(k_loader[:kset] + k_loader[kset + 1:], 0):
            # Get the next minibatch of images, labels for training
            for minibatch_count, (images, labels) in enumerate(train_loader, 0):
                
                # Put the minibatch data in CUDA Tensors and run on the GPU if supported
                images, labels = images.to(computing_device), labels.to(computing_device)
                # Zero out the stored gradient (buffer) from the previous iteration
                optimizer.zero_grad()

                # Perform the forward pass through the network and compute the loss
                outputs = model(images)
                loss = criterion(outputs, labels)

                # Update accuracy and loss for this one minibatch 
                correct = ((outputs > 0.5) == labels.byte()).sum().item()
                N_minibatch_acc += correct / (batch_size * CATEGORIES)
                N_minibatch_loss += loss.item()

                # Calculate loss and accuracy over past N minibatches, then validate 
                if (minibatch_count + (t_number * len(train_loader))) % N == 0:
                    num_data_points[kset] += 1
                    # For the first minibatch, don't divide by N since it's one minibatch 
                    if (minibatch_count + (t_number * len(train_loader))) != 0: 
                        N_minibatch_acc /= N 
                        N_minibatch_loss /= N

                    # Add the averaged and accuracy for training set
                    training_acc[kset].append(N_minibatch_acc)
                    training_loss[kset].append(N_minibatch_loss)

                    print(f'K-Set {kset + 1}, Epoch {epoch + 1}: Minibatch ' + 
                          f'{minibatch_count + (t_number * len(train_loader))} processed. ' +
                          f'Accuracy, Loss: {round(N_minibatch_acc, 4)}, {round(N_minibatch_loss, 2)}')
                    # Reset metric for next batch of minibatches 
                    N_minibatch_acc = N_minibatch_loss = 0.0 

                    # Validation
                    val_loss = 0.0
                    val_correct = 0.0
                    # Sum total correct and loss over validation set 
                    with torch.no_grad(): 
                        for val_images, val_labels in k_loader[kset]:
                            # Put the minibatch data in CUDA Tensors and run on the GPU if supported
                            val_images, val_labels = val_images.to(computing_device), val_labels.to(computing_device)
                            # Perform the forward pass through the network and compute the loss and correct 
                            val_outputs = model(val_images)
                            val_loss += criterion(val_outputs, val_labels).item()  
                            val_correct += ((val_outputs > 0.5) == val_labels.byte()).sum().item()
                        # Add metrics to list 
                    validation_acc[kset].append(val_correct / (len(k_loader[kset]) * batch_size * CATEGORIES))
                    validation_loss[kset].append(val_loss / len(k_loader[kset]))  
                    print(f'\tValidation Accuracy, Loss: {round(validation_acc[kset][-1], 4)}, {round(validation_loss[kset][-1], 5)}')
                    # Perform validation loss check to early stop 
                    if early_stop:
                        if min(validation_loss[kset]) < validation_loss[kset][-1]: 
                            # Save state of model as soon as loss goes up 
                            if loss_increase == 0: 
                                torch.save(model.state_dict(), f'Lowest_Loss_{Model_Name}_K{kset + 1}.pt')
                            loss_increase += 1
                        else:
                            loss_increase = 0
                        # Save model state where loss started increasing, return 
                        if loss_increase >= early_stop_evals: 
                            print(f'\nEarly Stop at Epoch {epoch}: Minibatch {minibatch_count + (t_number * len(train_loader))}')
                            print(f'\tBest Model Saved for K{kset}, {early_stop_evals * N} Minibatches Prior\n')
                            isDone = True
                            break
                # End loss, accuracy, and validation of past N minibatches 

                if not isDone: 
                    # Automagically compute the gradients and backpropagate the loss through the network
                    loss.backward()
                    # Update the weights
                    optimizer.step()
            # End iteration of ksets for k-validation 
            if isDone:
                break
        # End iteration of minibatches (1 epoch) 
    # End processing of all epochs 
# End processing of all ksets 

# Retrieve the model with the lowest loss over ksets, save only its information 
lowest_loss = [min(l_) for l_ in validation_loss]
best_k = lowest_loss.index(min(lowest_loss))

# Always saves lowest loss model, regardless of early stopping 
print(f'Retrieving K{best_k + 1} Model, Loss of: {min(lowest_loss)}')
model.load_state_dict(torch.load(f'Lowest_Loss_{Model_Name}_K{best_k + 1}.pt'))
training_acc = training_acc[best_k]
training_loss = training_loss[best_k]
validation_acc = validation_acc[best_k]
validation_loss = validation_loss[best_k]
num_data_points = num_data_points[best_k]

In [None]:
# Testing over test set 

# Class based performance, stores total values over test set 
true_pos = torch.zeros(CATEGORIES, dtype=torch.int64, device=computing_device)
false_pos = true_pos.clone()
false_neg = true_pos.clone()
total_corr = true_pos.clone()
confusion_matrix = torch.zeros((CATEGORIES, CATEGORIES), dtype=torch.float64, device=computing_device)
# Used to prevent divide by zero 
epsilon = 0.00000001
# Decimal Precision Desired 
decimal_prec = 6
# Holds first 3 batches of classification 
batch_classification = [] 

print("Testing Class Weighted Performance ...")
for batch, (images, labels) in enumerate(test_loader, 0):
    # Put the minibatch data in CUDA Tensors and run on the GPU if supported
    images, labels = images.to(computing_device), labels.to(computing_device)
    
    # Find sigmoid labelled classification 
    prediction = (model(images) > 0.5)
    
    # Cast for ease of calculations 
    prediction = prediction.type(torch.int8)
    
    # Save first 3 batches of classification 
    if batch < 3: 
        batch_classification.append(prediction.int().cpu().numpy().tolist())
        
    labels = labels.type(torch.int8)
    # Calculate peformance over batch, adding in place to totals 
    true_pos.add_(torch.sum(prediction + labels == 2, dim=0))
    false_pos.add_(torch.sum(prediction - labels == 1, dim=0))
    false_neg.add_(torch.sum(prediction - labels == -1, dim=0))
    total_corr.add_(torch.sum(prediction == labels, dim=0))
    
    # Calculate confusion matrix 
    # Add correctly activated elements to confusion matrix diagonal 
    confusion_matrix.add_(torch.diag(torch.sum(prediction + labels == 2, dim=0)).double())
    # Add incorrectly activated elements, per example 
    conf = (labels - prediction == 1).double()
    for i in range(len(conf)): 
        confusion_matrix.add_(torch.matmul(
            torch.reshape(conf[i].double(), (CATEGORIES, 1)), 
            torch.reshape(
                prediction[i].double() / (torch.sum(prediction[i].double()) + epsilon), (1, CATEGORIES))))
    
# Convert tensors to cpu numpy arrays to access elements 
true_pos = true_pos.cpu().numpy()
false_pos = false_pos.cpu().numpy()
false_neg = false_neg.cpu().numpy()
total_corr = total_corr.cpu().numpy()
confusion_matrix = confusion_matrix.cpu().numpy().tolist()

# Calculate performance metrics per class 
performance = {'Accuracy': total_corr / (len(test_loader) * batch_size), 
               'Precision': true_pos / (false_pos + true_pos + epsilon), 
               'Recall': true_pos / (true_pos + false_neg + epsilon)}
performance['BCR'] = (performance['Precision'] + performance['Recall']) / 2.0

# Calculate aggregated performance metrics 
agg_performance = {'Aggregated ' + k: sum(v) / CATEGORIES for k, v in performance.items()}

# Convert to lists and round 
for k, v in performance.items(): 
    performance[k] =  [round(i, decimal_prec) for i in v]
for k, v in agg_performance.items(): 
    agg_performance[k] = round(float(v), decimal_prec)
for r in range(len(confusion_matrix)): 
    for c in range(len(confusion_matrix[0])): 
        confusion_matrix[r][c] = round(confusion_matrix[r][c], decimal_prec)

In [None]:
# Displaying graphs and metrics 

# Display filters from first 3 layers 
all_filters = [model.conv1_1.weight.data.cpu().numpy(),
               model.conv2_1.weight.data.cpu().numpy(), 
               model.conv3_1.weight.data.cpu().numpy()]
# For each layer 
for l, layer in enumerate(all_filters, 1):
    # Get two filters (second and third filter of layer) 
    # PyTorch indexs layers as such: (input channel, subfilter, dim, dim)
    for filt_index in range(1, 3): 
        _, ax = plt.subplots()
        # Get second and third filter, first subfilter 
        ax.imshow(layer[filt_index][0])
        ax.set_title(f'{MODEL_NAME} - Layer {l}: Filter {filt_index}')
        plt.show()

# Plot accuracy for training and validation 
batch_range = [i for i in range(num_data_points)]
line1, = plt.plot(batch_range[1:], validation_acc[1:], 'g', label="Validation")
line2, = plt.plot(batch_range[1:], training_acc[1:], 'b', label="Training")
plt.legend(handles=[line1, line2])
plt.xlabel(f'{N} Minibatches')
plt.ylabel('Percent Correctly Classified')
plt.title(f'{MODEL_NAME} - Average Accuracy over Past {N} Minibatches')
plt.show()
# Plot loss for training and validation 
line1, = plt.plot(batch_range[1:], validation_loss[1:], 'g', label="Validation")
line2, = plt.plot(batch_range[1:], training_loss[1:], 'b', label="Training")
plt.legend(handles=[line1, line2])
plt.xlabel(f'{N} Minibatches')
plt.ylabel('Canonical Loss')
plt.title(f'{MODEL_NAME} - Loss over Past {N} Minibatches')
plt.show()

# Print final test data 
print('RAW BY CLASS')
print(f'\tTrue Positives\n{true_pos}')
print(f'\tFalse Positives\n{false_pos}')
print(f'\tFalse Negatives\n{false_neg}') 
print(f'\tTotal Correct\n{total_corr}')
print() 

print('CALCULATED METRICS BY CLASS')
for k, v in performance.items():
    print(f'\t{k}')
    print(v)
print()

print('CALCULATED METRICS EVENLY WEIGHTED')
for k, v in agg_performance.items():
    if k == 'Aggregated BCR':
        print(f'{k}:\t\t{v}')
    else: 
        print(f'{k}:\t{v}')
print()

print('CONFUSION MATRIX VALUES BY ROW')
for row in confusion_matrix: 
    print(row) 
print() 

# Shows example classifications, this is what our model is putting out 
print('FIRST 3 BATCHES OF CLASSIFICATIONS FROM TEST')
for i in range(len(batch_classification)):
    print(f'Classification #{i + 1}')
    for row in batch_classification[i]: 
        print(row)
