# **Import Libraries** #
Import any packages required for the project.

In [None]:
#For Import Dataset
from google.colab import drive

# For Datasets and Dataloader
import torch
import os
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, models, transforms
from torchvision.io import read_image

#For EfficientNet Model Architecture
! pip install efficientnet_pytorch
import pandas as pd
import json
from PIL import Image
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from efficientnet_pytorch import EfficientNet

#For ensemble models
import collections

#For Training & Testing
import time
import copy
import numpy as np
from sklearn.model_selection import StratifiedKFold

#**Directories, Folders, Paths**
Create directories & folders. Initialise file paths.

**Upload LeukemiaData.zip (found on github) to mounted drive first.

In [None]:
#Mount google drive to store datasets and results in.
drive.mount('/content/drive')

In [None]:
#Directory to store base EfficientNet model information
!mkdir /content/baseEfficientNet
!mkdir /content/baseEfficientNet/training_results
!mkdir /content/baseEfficientNet/training_results/weights #Weights
!mkdir /content/baseEfficientNet/training_results/stats #Training stats
!mkdir /content/baseEfficientNet/testing_results/baseStats #Testing stats for base models
!mkdir /content/baseEfficientNet/testing_results/ensembleStats #Testing stats for ensemble model

#Directory to store Noisy Student model information
!mkdir /content/noisyStudent
!mkdir /content/noisyStudent/training_results
!mkdir /content/noisyStudent/training_results/weights #Weights
!mkdir /content/noisyStudent/training_results/stats #Training stats
!mkdir /content/noisyStudent/testing_results/baseStats #Testing stats for base models
!mkdir /content/noisyStudent/testing_results/ensembleStats #Testing stats for ensemble model

In [6]:
#File paths for training.
train_labels = "LeukemiaData/train_labels.csv"
train_images = "LeukemiaData/train_images"
test_labels = "LeukemiaData/testing/test_labels.csv"
test_images = "LeukemiaData/testing/test_images"

#EfficientNet model
baseEN_weights = "baseEfficientNet/training_results/weights"
baseEN_stats = "baseEfficientNet/training_results/stats"
baseEN_test = "/content/baseEfficientNet/testing_results/baseStats"
ensembleEN_test = "/content/baseEfficientNet/testing_results/ensembleStats"

#Noisy Student model
baseNS_weights = "/content/noisyStudent/training_results/weights"
baseNS_stats = "/content/noisyStudent/training_results/stats"
baseNS_test = "/content/noisyStudent/testing_results/baseStats"
ensembleNS_test = "/content/noisyStudent/testing_results/ensembleStats"

# **Import Leukemia Images** # 

In [None]:
#Unzip file containing leukemia images.
!unzip /content/drive/MyDrive/LeukemiaData.zip -d .

# **Classes & Class Utils** #

##CellsDataset Class##
Custom class for image dataset, required for usage of dataloaders.

In [8]:
#Custom CellsDataset class for dataloader to process images.
class CellsDataset(Dataset):
    def __init__(self, label_file, img_dir):
        #Initialise image directory
        self.img_dir = img_dir

        #Initialise labels
        try:
            self.img_labels = pd.read_csv(label_file) #Csv files.
        except:
            self.img_labels = label_file #Df files.
        
        # remove rows without an image path
        files = os.listdir(self.img_dir)
        self.img_labels = self.img_labels.loc[self.img_labels['Patient_ID'].isin(files)]

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 1])
        
        #Normalises images according to pretrained EfficientNet-B0 values
        transform_data = transforms.Compose(
            [transforms.ToTensor(),
             transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

        image = Image.open(img_path)
        image = transform_data(image)
        label = self.img_labels.iloc[idx, 2]

        return image, label

##EfficientNet Class##
Custom class for base EfficientNet model.

In [9]:
class BaseEfficientNet(nn.Module):
  def __init__(self):
    super(BaseEfficientNet,self).__init__()
    self.model = EfficientNet.from_pretrained('efficientnet-b0') #Import pre-trained EfficientNet-B0.

    self.avgpool = nn.AdaptiveAvgPool2d(1)
    self.dropout = nn.Dropout(0.5)
    self.fc = nn.Linear(1280, 2) 

  def forward(self, x):
    x = x.float()
    x = self.model.extract_features(x)
    x = self.avgpool(x)
    x = x.flatten(start_dim=1)
    x = self.dropout(x) #Randomly remove nodes to prevent overfitting
    x = self.fc(x)

    return x

## Ensemble Model Utils ##
Helper functions to create ensemble model and perform ensemble voting.

In [10]:
#Function to create ensemble model, returns a dictionary of {model name: model weights}.
def createEnsemble(model_class, weights_path):
    #Pull files containing base model weights.
    file_paths = os.listdir(weights_path)

    #Retrieve weights of each base model.
    model_weights = list()
    for pth in file_paths:
        if 'pth' not in pth:
          continue
        path = f"{weights_path}/{pth}"
        model = model_class
        model.load_state_dict(torch.load(path, map_location = torch.device('cpu')), strict = False)
        model_weights.append(model)

    #Store base weights in dictionary.
    models = dict()
    models['model1'] = model_weights[0]
    models['model2'] = model_weights[1]
    models['model3'] = model_weights[2]
    models['model4'] = model_weights[3]
    models['model5'] = model_weights[4]

    #Delete intermediate variables to save memory
    del model_weights
    del model

    return models

In [11]:
#Function for ensemble prediction via majority voting classification.
def get_ensembleVoting(ensemble_model, test_input):
  #Initialise base models of ensemble model.
  model1 = ensemble_model['model1']
  model2 = ensemble_model['model2']
  model3 = ensemble_model['model3']
  model4 = ensemble_model['model4']
  model5 = ensemble_model['model5']
  
  #Send models to CPU/GPU 
  model1.to(device)
  model2.to(device)
  model3.to(device)
  model4.to(device)
  model5.to(device)

  #Set model to evaluation mode
  model1.eval()
  model2.eval()
  model3.eval()
  model4.eval()
  model5.eval()
  
  #Obtain base model predictions.
  preds = list()
  model1_output = model1(test_input)
  _, model1_pred = torch.max(model1_output.data, 1)
  preds.append(model1_pred)

  model2_output = model2(test_input)
  _, model2_pred = torch.max(model2_output.data, 1)
  preds.append(model2_pred)

  model3_output = model3(test_input)
  _, model3_pred = torch.max(model3_output.data, 1)
  preds.append(model3_pred)

  model4_output = model4(test_input)
  _, model4_pred = torch.max(model4_output.data, 1)
  preds.append(model4_pred)

  model5_output = model5(test_input)
  _, model5_pred = torch.max(model5_output.data, 1)
  preds.append(model5_pred)

  #Output final ensemble prediction.
  votes = collections.Counter(preds) #Counts frequency of votes
  common_vote = votes.most_common(1) #Retrive most common vote
  final_pred = common_vote[0][0]

  return final_pred

# **Training Base Models**

## **Initialise Training Parameters** ##

In [None]:
#Training parameters.
learning_rate = 0.001
momentum = 0.9 # SGD momentum
step_size = 7 # scheduler step size
gamma = 0.1 # learning rate decay
seed = torch.manual_seed(42)
batch_size = 5
num_workers = 5
CV_states = 42
num_fold = 5
epochs = 50

#EfficientNet and Noisy Student model parameters.
base_model = BaseEfficientNet()
criterion = nn.CrossEntropyLoss() #Loss function.
optimizer = optim.SGD(base_model.parameters(), 
                      lr=learning_rate, 
                      momentum=momentum) #Stochastic Gradient descent.
scheduler = StepLR(optimizer, 
                   step_size=step_size, 
                   gamma=gamma) #Decay learning rate.
patience = 50 #Early stopping.

#Specify GPU/CPU usage.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
base_model.to(device)
print(f"Device in use: {device}")

## **Initialise K-Fold CVs** ##

In [17]:
# Define stratified K-fold cross-validation function.
strat_kfold = StratifiedKFold(n_splits=num_fold, shuffle=True, random_state=CV_states)

#Obtain indexes for stratified k fold.
trainLabel_df = pd.read_csv(train_labels)
X = np.zeros(len(trainLabel_df))
y = trainLabel_df['labels']

## **Training base EfficientNet Model** ##



In [18]:
#Training function for EfficientNet model.
def efficientNet_training(model, criterion, optimizer, scheduler, num_epochs=epochs, fold=0): 
    # specify output files
    log_file = f"{baseEN_stats}/fold{fold+1}_log.log"
    weight_file = f"{baseEN_weights}/fold{fold+1}_weights.pth"

    since = time.time()

    #Initialise model.
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    best_epoch = 0

    #Epoch loop.
    for epoch in range(num_epochs):
        epoch_start = time.time()
        epoch_no = epoch+1
        print('Epoch {}/{}'.format(epoch_no, num_epochs))
        print('-' * 10)
        with open(log_file, 'a') as log:
          log.write('Epoch {}/{}, '.format(epoch_no, num_epochs))
        
        for phase in ['training', 'validation']:
            if phase == 'training':
                model.train() #Training mode.
                
            else:
                model.eval() #Validation mode.

            p_count = 0
            lowest_loss = 0.0
            running_loss = 0.0
            running_corrects = 0

            # Iterate over images.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                #Set parameter gradients to 0.
                optimizer.zero_grad()

                #Training phase.
                #Iterate forward.
                with torch.set_grad_enabled(phase == 'training'):
                    outputs = model(inputs) #Model prediction.
                    soft_preds = torch.softmax(outputs, dim = -1) #Soft labels
                    max_prob, hard_preds = torch.max(soft_preds, dim = -1) #Hard labels
                    loss = criterion(soft_preds, labels) #Calculate loss.

                    #Backward propagate. 
                    if phase == 'training':
                        loss.backward()
                        optimizer.step()

                #Training statistics.
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(hard_preds == labels.data)
            
            if phase == 'training':
                scheduler.step()

            #Epoch statistics.
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            #Output training statistics.
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))
            with open(log_file, 'a') as log:
              log.write('{} Loss: {:.4f} Acc: {:.4f}, '.format(
                phase, epoch_loss, epoch_acc))
              
            #Validation phase.
            if phase == 'validation':
              with open(log_file, 'a') as log:
                  log.write('\n')

              #Add patience for early stopping.
              if (lowest_loss <= epoch_loss):
                p_count += 1
                if p_count > patience:
                  print(f'val loss did not decrease after {patience} epochs')
                  break

              if lowest_loss > epoch_loss:
                p_count = 0
                lowest_loss = epoch_loss

              #Save model weights if epoch gives best accuracy.
              if epoch_acc > best_acc:
                best_acc = epoch_acc
                best_epoch = epoch
                best_model_wts = copy.deepcopy(model.state_dict())

        #Output epoch statistics.
        epoch_end = time.time()
        epoch_time = epoch_end - epoch_start
        print('Epoch Time {:.0f}m {:.0f}s\n'.format(
          epoch_time // 60, epoch_time % 60))
        with open(log_file, 'a') as log:
          log.write('Time {:.0f}m {:.0f}s\n'.format(
          epoch_time // 60, epoch_time % 60))
        print()
    
    #Output runtime.
    time_elapsed = time.time() - since
    print('Time {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    with open(log_file, 'a') as log:
      log.write('Time {:.0f}m {:.0f}s\n'.format(
          time_elapsed // 60, time_elapsed % 60))
      log.write('Best val Acc: {:4f}'.format(best_acc))

    #Save weights of the best epoch model as file.
    torch.save(best_model_wts, weight_file)

    return model

In [None]:
#Fold code for EfficientNet training.
for fold, (train_idx, valid_idx) in enumerate(strat_kfold.split(X, y)):
    fold_no = fold+1
    print(f"FOLD {fold_no}") # Keep track of fold number 

    #Convert training and validation data to custom class.
    train_df = trainLabel_df.iloc[train_idx][["Patient_no", "Patient_ID", "labels"]]
    valid_df = trainLabel_df.iloc[valid_idx][["Patient_no", "Patient_ID", "labels"]]
    train_set = CellsDataset(train_df, train_images)
    valid_set = CellsDataset(valid_df, train_images)

    #Load training and validation sets to dataloader.
    valid_loader = DataLoader(valid_set, batch_size=batch_size, 
                              num_workers=num_workers, shuffle=True)
    train_loader = DataLoader(train_set, batch_size=batch_size, 
                              num_workers=num_workers, shuffle=True)
    dataloaders = {'training': train_loader, 'validation': valid_loader}
    dataset_sizes = {'training': len(train_set), 'validation': len(valid_set)}
    class_names = [0, 1] #Binary class labels.
    
    #Train EfficientNet model.
    efficientNet_training(base_model, criterion, optimizer, scheduler, 
                          num_epochs = epochs, fold = num_fold)

## **Training Noisy Student** ##

In [23]:
#Training function for Noisy Student model.
def noisyStudentTraining(model, dataloader, dataset_sizes, criterion, optimizer, scheduler, num_epochs = 50, fold = 0):
    # specify output files
    log_file = f"{baseNS_stats}/fold{fold+1}_log.log"
    weight_file = f"{baseNS_weights}/fold{fold+1}_weights.pth"

    since = time.time()

    #Initialise model.
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    best_epoch = 0

    #Epoch loop.
    for epoch in range(num_epochs):
        epoch_start = time.time()
        epoch_no = epoch+1
        print('Epoch {}/{}'.format(epoch_no, num_epochs))
        print('-' * 10)
        with open(log_file, 'a') as log:
          log.write('Epoch {}/{}, '.format(epoch_no, num_epochs))
        
        for phase in ['training', 'validation']:
            if phase == 'training':
                model.train()  #Training mode.
                
            else:
                model.eval()   #Validation mode.

            p_count = 0
            lowest_loss = 0.0
            running_loss = 0.0
            running_corrects = 0

            #Iterate over images.
            for inputs, labels in dataloader[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                #Set parameter gradients to 0.
                optimizer.zero_grad()

                #Training phase: Iterate forward.
                with torch.set_grad_enabled(phase == 'training'):
                    outputs = model(inputs) #Model prediction.
                    soft_preds = torch.softmax(outputs, dim = -1) #Soft labels.
                    max_prob, hard_preds = torch.max(soft_preds, dim = -1) #Hard labels.
                    loss = criterion(soft_preds, labels) #Calculate loss.

                    #Backward propagate. 
                    if phase == 'training':
                        loss.backward()
                        optimizer.step()

                #Training statistics.
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(hard_preds == labels.data)
            
            if phase == 'training':
                scheduler.step()

            #Epoch statistics.
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            #Output training statistics.
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))
            with open(log_file, 'a') as log:
              log.write('{} Loss: {:.4f} Acc: {:.4f}, '.format(
                phase, epoch_loss, epoch_acc))
              
            #Validation phase.
            if phase == 'validation':
              with open(log_file, 'a') as log:
                  log.write('\n')

              #Add patience for early stopping.
              if (lowest_loss <= epoch_loss):
                p_count += 1
                if p_count > patience:
                  print(f'val loss did not decrease after {patience} epochs')
                  break

              if lowest_loss > epoch_loss:
                p_count = 0
                lowest_loss = epoch_loss

              #Save model weights if epoch gives best accuracy.
              if epoch_acc > best_acc:
                best_acc = epoch_acc
                best_epoch = epoch
                best_model_wts = copy.deepcopy(model.state_dict())

        #Output epoch statistics.
        epoch_end = time.time()
        epoch_time = epoch_end - epoch_start
        print('Epoch Time {:.0f}m {:.0f}s\n'.format(
          epoch_time // 60, epoch_time % 60))
        with open(log_file, 'a') as log:
          log.write('Time {:.0f}m {:.0f}s\n'.format(
          epoch_time // 60, epoch_time % 60))
        print()
    
    #Output runtime.
    time_elapsed = time.time() - since
    print('Time {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    #Save accuracy and runtime as log file.
    with open(log_file, 'a') as log:
      log.write('Time {:.0f}m {:.0f}s\n'.format(
          time_elapsed // 60, time_elapsed % 60))
      log.write('Best val Acc: {:4f}'.format(best_acc))

    #Save weights of the best epoch model as file.
    torch.save(best_model_wts, weight_file)

    return model

In [None]:
#Fold code for Noisy Student training.
for fold, (train_idx, valid_idx) in enumerate(strat_kfold.split(X, y)):
    fold_no = fold+1
    print("FOLD {}".format(fold_no+1))
    
    #Split training set into labelled(50%) & unlabelled(50%) sets.
    labelled_idx = []
    unlabelled_idx = []

    for idx in range(len(train_idx)):
      split_idx = train_idx[idx]
      if (split_idx % 2 == 0):
        labelled_idx.append(split_idx)
      else:
        unlabelled_idx.append(split_idx)
    
    #Convert labelled, unlabelled and validation sets to custom class.
    labelled_df = trainLabel_df.iloc[labelled_idx][["Patient_no", "Patient_ID", "labels"]]
    unlabelled_df = trainLabel_df.iloc[unlabelled_idx][["Patient_no", "Patient_ID", "labels"]]
    pseudolabel_df = unlabelled_df[["Patient_no", "Patient_ID"]]
    valid_df = trainLabel_df.iloc[valid_idx][["Patient_no", "Patient_ID", "labels"]]
    valid_set = CellsDataset(valid_df, train_images)
    labelled_set = CellsDataset(labelled_df, train_images)
    unlabelled_set = CellsDataset(unlabelled_df, train_images)

    #Load labelled, unlabelled and validation sets to dataloader.
    valid_loader = DataLoader(valid_set, batch_size=batch_size, num_workers=num_workers, 
                              pin_memory=True, shuffle=True)
    label_loader = DataLoader(labelled_set, batch_size=batch_size, num_workers=num_workers, 
                              pin_memory=True, shuffle=True)
    unlabel_loader = DataLoader(unlabelled_set, batch_size=batch_size, num_workers=num_workers, 
                                pin_memory=True, shuffle=True)
    label_dataloader = {'training': label_loader, 'validation': valid_loader}
    dataset_sizes_l = {'training': len(labelled_set), 'validation': len(valid_set)}
    class_names = [0, 1] #Binary class labels.

    #Noisy student loop.
    NS_iterations = 3
    for iteration in range(NS_iterations):
      if iteration == 0:
        print("TRAINING TEACHER:")
        noisyStudentTraining(base_model, label_dataloader, dataset_sizes_l, 
                             criterion, optimizer, scheduler, 
                             num_epochs = epochs, fold=num_fold) #Train first teacher model on labelled images.

      else:
        base_model.eval()

        #Predict pseudolabels from unlabelled images.
        infer_pseudolabels = []
        for inputs, _ in unlabel_loader:
            inputs = inputs.to(device)
            with torch.no_grad():
              output = base_model(inputs) #Teacher model prediction.
              soft_preds = torch.softmax(output, dim = -1) #Soft labels.
              max_prob, hard_preds = torch.max(soft_preds, dim = -1) #Hard labels.
              hard_preds = hard_preds.cpu().detach().numpy()
              infer_pseudolabels.extend(hard_preds)

        #Attach pseudolabels with unlabelled images and send to dataloader.
        pseudolabel_df['labels'] = infer_pseudolabels
        pseudolabel_set = CellsDataset(pseudolabel_df, unlabelled_images)
        pseudolabel_loader = torch.utils.data.DataLoader(pseudolabel_set, batch_size=batch_size, shuffle=True)
        pseudo_dataloader = {'training': pseudolabel_loader, 'validation': valid_loader}
        dataset_sizes_u = {'training': len(pseudolabel_set), 'validation': len(valid_set)}

        print(f"TRAINING STUDENT ITERATION: {iteration}")
        noisyStudentTraining(base_model, pseudo_dataloader, dataset_sizes_u, 
                             criterion, optimizer, scheduler, 
                             num_epochs = epochs, fold = num_fold) #Train student model on labelled images.


## **Create Ensemble Models** ##
Combine base models after training to form ensemble models.

In [None]:
#Create ensemble models from base models.
ensembleEN_model = createEnsemble(BaseEfficientNet(), baseEN_weights)
ensembleNS_model = createEnsemble(BaseEfficientNet(), baseNS_weights)

# **Testing and Evaluation** #

## **Testing Function** ##

In [None]:
#Testing function for base and ensemble models.
def test_model(model, stats_path, ensemble=False, fold = ''):
  #Message to indicate base or ensemble model evaluation.
  if ensemble == False:
    print("Evaluating base models...")
  else:
    print("Evaluating ensemble model...")
  
  #Create empty confusion matrix.
  confusion_matrix = np.zeros((2,2), dtype=int)
  
  with torch.no_grad():
    #Iterate over testing images.
    for inputs, labels in test_dataloader:
      inputs = inputs.to(device)
      labels = labels.to(device)
    
      if ensemble == False:
          output = model(inputs) #Base model prediction.
          _, prediction = torch.max(output.data, 1) #Hard labels.

      if ensemble == True:
          prediction = get_ensembleVoting(model, inputs) #Ensemble voting.

      #Add predictions to confusion matrix.
      for j in range(inputs.size()[0]): 
          if prediction[j]==1 and labels[j]==1:
              term='TP'
              confusion_matrix[0][0]+=1

          elif prediction[j]==1 and labels[j]==0:
              term='FP'
              confusion_matrix[1][0]+=1

          elif prediction[j]==0 and labels[j]==1:
              term='FN'
              confusion_matrix[0][1]+=1
        
          elif prediction[j]==0 and labels[j]==0:
              term='TN'
              confusion_matrix[1][1]+=1

      #Obtain results from confusion matrix.
      TP = confusion_matrix[0][0]
      FP = confusion_matrix[1][0]
      FN = confusion_matrix[0][1]
      TN = confusion_matrix[1][1]

      #Test statistics.
      accuracy = 100*(TP+TN)/ (TP+FP+TN+FN)
      sensitivity = (100*TP)/(TP+FP)
      specificity = (100*TN)/(TN+FN)
      PPV = (100*TP)/(TP+FN)
      NPV = (100*TN)/(TN+FP)
      F1 = 2*(PPV*sensitivity)/(PPV+sensitivity) 

    #Print test statistics.
    print('-----------------------')
    print('PREDICTION STATISTICS')
    print('-----------------------')
    print('True Positives: ' + str(TP))
    print('False Positives: ' + str(FP))
    print('False Negatives: ' + str(FN))
    print('True Negatives: ' + str(TN))

    print('-----------------------')
    print('EVALUATION STATISTICS')
    print('-----------------------')
    print('Accuracy: %f %%' % (accuracy))
    print('Sensitivity: %f %%' % (sensitivity))
    print('Specificity: %f %%' % (specificity))
    print('PPV: %f %%' % (PPV))
    print('NPV: %f %%' % (NPV))
    print('F1 Score: %f %%' % (F1))

    #Save test statistics as log files.
    stat_file = f"{stats_path}_stats.log"
    if ensemble:
      with open(stat_file, 'a') as log:
        log.write('ENSEMBLE MODEL:' + '\n')
    else:
      with open(stat_file, 'a') as log:
        log.write(fold + '\n')
        
    with open(stat_file, 'a') as log:
      log.write(
      'True Positives: ' + str(TP) + '\n'
      'False Positives: ' + str(FP) + '\n'
      'False Negatives: ' + str(FN) + '\n'
      'True Negatives: ' + str(TN) + '\n' + '\n'
      'Accuracy: %f %%' % (accuracy) +'\n'
      'Sensitivity: %f %%' % (sensitivity) + '\n'
      'Specificity: %f %%' % (specificity) + '\n'
      'PPV: %f %%' % (PPV) + '\n'
      'NPV: %f %%' % (NPV) + '\n'
      'F1 Score: %f %%' % (F1) + '\n')

## Evaluating Models

In [None]:
#Convert testing set to custom class.
test_df = pd.read_csv(test_labels)[["Patient_no", "Patient_ID", "labels"]]
test_set = CellsDataset(test_df, test_images)

#Send testing set to dataloader.
test_dataloader = DataLoader(test_set, batch_size=batch_size, shuffle=True)

#Evaluate base EfficientNet models.
EN_weights = os.listdir(baseEN_weights)
for weight in EN_weights:
  pth = weight_dir + '/'+ weight
  model = BaseEfficientNet()
  model.load_state_dict(torch.load(pth))
  model.to(device) #load model to gpu

  path = baseEN_stats + '/' + weight.split('.')[0]
  test_model(model, stats_path=path, ensemble=False)

#Evaluate base Noisy Student models.
NS_weights = os.listdir(baseNS_weights)
for weight in NS_weights:
  pth = weight_dir + '/'+ weight
  model = BaseEfficientNet()
  model.load_state_dict(torch.load(pth))
  model.to(device) #load model to gpu

  path = baseNS_stats + '/' + weight.split('.')[0]
  test_model(model, stats_path=path, ensemble=False)

#Evaluate ensemble models.
test_model(ensembleEN_model, stats_path=ensembleEN_test, ensemble=True)
test_model(ensembleNS_model, stats_path=ensembleNS_test, ensemble=True)