In [35]:
# ! pip install kornia
# ! pip install kornia[x]

In [1]:
# Importing stock ml libraries
import os
import time
import numpy as np
import pandas as pd
from sklearn import metrics
import transformers
import torch
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertModel, BertConfig, BertForSequenceClassification
from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
from kornia.losses import BinaryFocalLossWithLogits

In [2]:
# Setting up the device for GPU usage
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
device

'cuda'

In [3]:
# Root label (source = ASRS coding forms) : order = by descending frequency
ANOMALY_LABELS = ['Deviation / Discrepancy - Procedural',
                    'Aircraft Equipment',
                    'Conflict',
                    'Inflight Event / Encounter',
                    'ATC Issue',
                    'Deviation - Altitude',
                    'Deviation - Track / Heading',
                    'Ground Event / Encounter',
                    'Flight Deck / Cabin / Aircraft Event',
                    'Ground Incursion',
                    'Airspace Violation',
                    'Deviation - Speed',
                    'Ground Excursion',
                    'No Specific Anomaly Occurred']

In [4]:
def load_data(path, labels, add_other=False, pp_path=None):
    loaded_data = pd.read_pickle(path)[0]

    # Drop Anomaly NaN's
    loaded_data = loaded_data.dropna(subset=['Anomaly'])#.reset_index(drop=True)

    # Convert the 'Anomaly' column to a list of lists
    anomaly_series = loaded_data['Anomaly']
    anomaly_list = anomaly_series.str.split(';').apply(lambda x: [item.strip() for item in x])

    # Initialize a DataFrame to hold the one-hot-encoded anomalies
    anomaly_df = pd.DataFrame(index=loaded_data.index)

    # Populate the DataFrame with one-hot-encoded columns for each prefix
    for prefix in labels:
        anomaly_df[prefix] = anomaly_list.apply(lambda anomalies: any(anomaly.startswith(prefix) for anomaly in anomalies)).astype(int)

    # Add the 'Other' category
    if add_other:
        anomaly_df['Other'] = (anomaly_df.sum(axis=1) == 0).astype(int)

    # Assign the one-hot-encoded anomalies as a new column 'labels' to 'loaded_data'
    loaded_data['labels'] = anomaly_df.apply(lambda row: row.tolist(), axis=1)

    # Now, 'loaded_data' is a DataFrame that includes both the 'text' and 'labels' columns
    if pp_path is None:
        loaded_data['text'] = loaded_data["Narrative"]
    else:
        loaded_data['text'] = pd.read_pickle(pp_path)

    # If you want to create a new DataFrame with just 'text' and 'labels':
    final_df = loaded_data[['text', 'labels']]
    return final_df

In [5]:
# from google.colab import drive
# drive.mount('/content/drive')

In [9]:
# Defining some key variables that will be used later on in the training
MODEL_NAME = None
MODEL_DIRECTORY = "model_save"


ABBREVIATION = True
# ABBREVIATION = False


LOSS_TYPE = 'BCE'
# LOSS_TYPE = 'BinaryFocal'

# BALANCED = False
BALANCED = True

# LAYERS_TO_UNFREEZE = None
LAYERS_TO_UNFREEZE = [8, 9, 10, 11]

# ENCODER_NAME = 'bert-base-uncased'
ENCODER_NAME = 'NASA-AIML/MIKA_SafeAeroBERT'
# ENCODER_NAME = 'allenai/longformer-base-4096'

MAX_LEN = 512
# MAX_LEN = 1024

TRAIN_EFFECTIVE_BATCH_SIZE = 32 # 32 Effective size for NASA
TRAIN_BATCH_SIZE = 32
ACCUMULATION_STEPS = TRAIN_EFFECTIVE_BATCH_SIZE / TRAIN_BATCH_SIZE
VALID_BATCH_SIZE = TRAIN_BATCH_SIZE
EPOCHS = 5 # 5 Epochs for NASA
LEARNING_RATE = 1e-05 * 2 # 0.00002 Rate for NASA

drop the NaN values in Anomaly?

In [6]:
if ABBREVIATION:
    train_df = load_data("./data/train_data_final.pkl", ANOMALY_LABELS, pp_path="./data/train_data_processed2.pkl")
else: 
    train_df = load_data("./data/train_data_final.pkl", ANOMALY_LABELS)
train_df

Unnamed: 0_level_0,text,labels
ACN,Unnamed: 1_level_1,Unnamed: 2_level_1
1163382,I pilot flying performing takeoff . During tak...,"[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
893734,We 6 shipments dry ice flight ; cooling fresh ...,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
991883,I seen lot mistakes every flight since changed...,"[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]"
1590076,It first time flying KEUG I pilot flying . The...,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
1715282,I writing report bring attention second depart...,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
...,...,...
622204,WE WERE En Route IN Lateral Navigation AT FL31...,"[1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
622205,CLRED BY Tower Control TO CROSS Runway 8R/26L ...,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]"
661202,WHILE WORKING NUMEROUS CVG AND CMH DEPS AT A C...,"[0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
733747,ON MIDNIGHT SHIFT ; Approximately XA00 Local T...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"


In [7]:
if ABBREVIATION:
    test_df = load_data("./data/test_data_final.pkl", ANOMALY_LABELS, pp_path="./data/test_data_processed2.pkl")
else: 
    test_df = load_data("./data/test_data_final.pkl", ANOMALY_LABELS)
    
test_df

Unnamed: 0_level_0,text,labels
ACN,Unnamed: 1_level_1,Unnamed: 2_level_1
1014798,Flying SLC DELTA THREE Area Navigation arrival...,"[1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]"
1806744,ORD busy east flow arrival push . The weather ...,"[1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
1044902,B737-800 vectored Instrument Landing System Ru...,"[1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
1764093,We 6 mile final tower cleared Cirrus land fron...,"[1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0]"
1786435,During Climb Leveled 17 ; 000 departure switch...,"[1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]"
...,...,...
1310569,FO flying visual approach runway 26 ZZZ . Wind...,"[1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
1482118,While assembling GE C2 transfer gearbox ; I no...,"[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
1565471,Nearing end hot ; bumpy four-hour Instrument F...,"[1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
980316,On approach gear went noticed yellow hash mark...,"[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"


In [10]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = self.data.labels
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text.iloc[index])
        text = " ".join(text.split())

        inputs = self.tokenizer(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]


        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets.iloc[index], dtype=torch.float)
        }


In [11]:
class SequenceClassificationModel(torch.nn.Module):
    def __init__(self, model_name='bert-base-uncased', num_labels=15):
        super(SequenceClassificationModel, self).__init__()
        self.original_name = model_name
        self.model_name = model_name.replace("/", "_")
        self.l1 = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

    def forward(self, ids, mask, token_type_ids):
        output = self.l1(ids, attention_mask=mask, token_type_ids=token_type_ids)
        return output.logits
    
    def tokenizer(self):
        return AutoTokenizer.from_pretrained(self.original_name)
    
    def _set_layer_trainable(self, layer, trainable):
        for param in layer.parameters():
            param.requires_grad = trainable

    def _find_and_set_encoder_layers(self, module, layer_nums, trainable):
        if hasattr(module, 'encoder'):
            for layer_num in layer_nums:
                try:
                    self._set_layer_trainable(module.encoder.layer[layer_num], trainable)
                except IndexError:
                    print(f"Layer {layer_num} not found in the encoder.")
            return True
        else:
            for child in module.children():
                if self._find_and_set_encoder_layers(child, layer_nums, trainable):
                    return True
        return False

    def set_trainable_layers(self, layer_nums=None):
        if layer_nums is not None:
            self.model_name = f'{self.model_name}_Unfrozen{layer_nums}'
            
        # Freeze all parameters first
        for param in self.parameters():
            param.requires_grad = False

        # Unfreeze classifier layers
        if hasattr(self.l1, 'classifier'):
            self._set_layer_trainable(self.l1.classifier, True)

        # Attempt to find and unfreeze encoder layers
        if not self._find_and_set_encoder_layers(self.l1, layer_nums or [], True):
            print("Encoder layers not found in the model.")



In [15]:
num_labels = len(test_df.labels.iloc[0])
model = SequenceClassificationModel(ENCODER_NAME, num_labels=num_labels)

if ABBREVIATION:
    model.model_name += '_Abbreviated'
model.set_trainable_layers(LAYERS_TO_UNFREEZE)
model.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at NASA-AIML/MIKA_SafeAeroBERT and are newly initialized: ['bert.pooler.dense.bias', 'classifier.bias', 'bert.pooler.dense.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


SequenceClassificationModel(
  (l1): BertForSequenceClassification(
    (bert): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(30522, 768, padding_idx=0)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0-11): 12 x BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_fea

In [16]:
# Creating the dataset and dataloader for the neural network
print("TRAIN Dataset: {}".format(train_df.shape))
print("TEST Dataset: {}".format(test_df.shape))

tokenizer = model.tokenizer()
training_set = CustomDataset(train_df, tokenizer, MAX_LEN)
testing_set = CustomDataset(test_df, tokenizer, MAX_LEN)

TRAIN Dataset: (96986, 2)
TEST Dataset: (10805, 2)


In [17]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 2
                }

test_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': False,
                'num_workers': 2
                }

training_loader = DataLoader(training_set, **train_params)
testing_loader = DataLoader(testing_set, **test_params)

In [18]:
torch.cuda.empty_cache()

In [19]:
def binary_accuracy_per_label(y_true, y_pred):
    correct = y_pred == y_true
    accuracy_per_label = correct.float().mean(axis=0)
    return accuracy_per_label

def binary_accuracy_averaged(y_true, y_pred):
    accuracy_per_label = binary_accuracy_per_label(y_true, y_pred)
    accuracy_averaged = accuracy_per_label.mean()
    return accuracy_averaged

def custom_classification_report(y_true, y_pred):
    report = metrics.classification_report(y_true, y_pred, output_dict=True, target_names=ANOMALY_LABELS, zero_division=0)
    accuracy = binary_accuracy_per_label(y_true, y_pred)
    extended_accuracy_new = np.append(accuracy, [accuracy.mean()] * (len(report) - len(accuracy)))

    updated_report = {}
    for i, class_label in enumerate(report.keys()):
        # Create a new dictionary for the class with binary accuracy
        class_dict = {'binary_accuracy': extended_accuracy_new[i]}
        
        # Merge this dictionary with the existing metrics for the class
        class_dict.update(report[class_label])

        # Update the main report dictionary
        updated_report[class_label] = class_dict

    return updated_report


In [21]:
def loss(model, name='BCE', balance=False, dataset=None, dataloader=None, device='cpu'):
    model.model_name += f'_{name}'
    pos_weight = None
    
    if balance:
        if dataset is None or dataloader is None:
            raise ValueError("balance is set to True, but the data is not given")
        
        # Compute weights for loss function
        num_labels = len(dataset[0]['targets'])
        pos_num = torch.zeros(num_labels).to(device)
        for _, data in enumerate(dataloader, 0):
            targets = data['targets'].to(device)
            pos_num += torch.sum(targets, axis=0)
        nobs = len(dataloader.dataset)
        pos_weight = (nobs - pos_num) / pos_num

        model.model_name += "-Balanced"
    
    if name == 'BCE':
        return torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    elif name == 'BinaryFocal':
        return BinaryFocalLossWithLogits(pos_weight=pos_weight, gamma=0.5, alpha=1, reduction='mean')
    else:
        raise ValueError("loss not known")
    


In [22]:
loss_fn = loss(model, LOSS_TYPE, BALANCED, training_set, training_loader, device)
optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE)
metrics_dict = {"Custom Classifcation Report": lambda y_true, y_pred: custom_classification_report(y_true, y_pred)
    # "Binary Accuracy Macro": lambda outputs, targets: binary_accuracy_averaged(targets, outputs, threshold=0.5),
    # "Binary Accuracy per Class": binary_accuracy_per_label,
    # "F1 Score Micro": lambda y_true, y_pred: metrics.f1_score(y_true, y_pred, average='micro', zero_division=1),
    # "F1 Score Macro": lambda y_true, y_pred: metrics.f1_score(y_true, y_pred, average='macro', zero_division=1),
    # "F1 Scores per Class": lambda y_true, y_pred: metrics.f1_score(y_true, y_pred, average=None, zero_division=1)
}


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [23]:
def save_model(model, epoch, directory='model_save', model_name=None):
    """
    Saves the model state.

    Args:
    model (torch.nn.Module): The model to save.
    epoch (int): The current epoch number.
    file_path (str): Base directory to save the models.
    """
    if model_name is None:
        model_name = model.model_name

    if not os.path.exists(directory):
        os.makedirs(directory)
    
    file_path = os.path.join(directory, f"{model_name}_epoch_{epoch}.pth")

    torch.save(model.state_dict(), file_path)
    print(f'Model saved at {file_path}')

In [24]:
def load_model(model, directory='model_save', model_name=None, epoch=None):
    """
    Loads the model state.

    Args:
    model (torch.nn.Module): The model to load state into.
    file_path (str): Path to the saved model file.
    """
    if model_name is None:
        model_name = model.model_name

    if epoch is None:
        epoch = find_last_saved_epoch(directory, model_name)
        if epoch == -1:
            print("No saved model found.")
            return
    
    file_path = os.path.join(directory, f"{model_name}_epoch_{epoch}.pth")
    if not os.path.exists(file_path):
        print(f"No model file found at {file_path}")
        return

    model.load_state_dict(torch.load(file_path))
    model.to(device)
    print(f'Model loaded from {file_path}')

In [60]:
def find_last_saved_epoch(directory='model_save', model_name=None):
    """
    Finds the last saved epoch number in the specified directory.

    Args:
    file_path (str): The directory where models are saved.

    Returns:
    int: The last saved epoch number. Returns -1 if no saved model is found.
    """
    if model_name is None:
        model_name = model.model_name

    # Check if the directory exists, and create it if it doesn't
    if not os.path.exists(directory):
        return -1

    saved_epochs = []
    for filename in os.listdir(directory):
        parts = filename.split('_epoch_')
        model_name_file = parts[0]
        epoch_number = parts[1].split('.')[0]
        if model_name == model_name_file:
            saved_epochs.append(int(epoch_number))
    
    return max(saved_epochs, default=-1)

In [26]:
def process_batch(model, batch_data, device, loss_fn, mode, optimizer=None, accumulate_gradients=False):
    ids = batch_data['ids'].to(device, dtype=torch.long)
    mask = batch_data['mask'].to(device, dtype=torch.long)
    token_type_ids = batch_data['token_type_ids'].to(device, dtype=torch.long)
    targets = batch_data['targets'].to(device, dtype=torch.float)

    if mode == 'train':
        outputs = model(ids, mask, token_type_ids)
        loss = loss_fn(outputs, targets)
        loss.backward()
        if not accumulate_gradients:
            optimizer.step()
            optimizer.zero_grad()
    else:
        with torch.no_grad():
            outputs = model(ids, mask, token_type_ids)
            loss = loss_fn(outputs, targets)

    return outputs, targets, loss


In [27]:
def calculate_metrics(metrics_dict, targets, outputs, is_logit=True, thresholds=0.5, percentile=None):
    results = {}
    labels = ANOMALY_LABELS
    if is_logit:
        outputs = torch.sigmoid(outputs)

    if thresholds is None:
        thresholds = 0.5
    # Calculate percentile is specified
    if percentile is not None:
        thresholds = []
        for i in range(outputs.shape[1]):  # Iterate over each label
            label_scores = outputs[:, i].detach().cpu().numpy()
            threshold = np.percentile(label_scores, percentile)
            thresholds.append(threshold)
        thresholds = np.array(thresholds)

    # Apply thresholds to outputs
    outputs = (outputs >= torch.tensor(thresholds, device=outputs.device)).float()

    for metric_name, metric_fn in metrics_dict.items():
        if metric_name in ["F1 Scores per Class", "Binary Accuracy per Class"]:
            metric_scores = metric_fn(targets.cpu(), outputs.cpu())  # Assuming targets and outputs are tensors
            for i, score in enumerate(metric_scores):
                label = labels[i] if i < len(labels) else f"Class {i}"
                results[f"{metric_name} - {label}"] = score
        else:
            results[metric_name] = metric_fn(targets.cpu(), outputs.cpu())

    return results


In [28]:
def format_value(val):
    """Helper function to format the value for printing."""
    if isinstance(val, (float, np.float16, np.float32, np.float64)):
        return f"{val:.4f}"
    elif isinstance(val, torch.Tensor) and val.dtype in [torch.float16, torch.float32, torch.float64]:
        return f"{val.item():.4f}"
    else:
        return val

def print_metrics_results(metrics_results):
    # First, print scalar values and simple dictionaries
    for metric, value in metrics_results.items():
        if isinstance(value, dict) and not any(isinstance(v, dict) for v in value.values()):
            # Print simple dictionaries on a single line
            dict_values = ", ".join([f"{k}: {format_value(v)}" for k, v in value.items()])
            print(f"{metric}: {dict_values}")
        elif not isinstance(value, dict):
            # Print scalar values
            print(f"{metric}: {format_value(value)}")

    # Then, print nested dictionaries
    for metric, value in metrics_results.items():
        if isinstance(value, dict) and any(isinstance(v, dict) for v in value.values()):
            # Print nested dictionaries
            print(f"\n{metric}:")
            # Find the longest key length for formatting
            max_key_length = max(len(str(k)) for k in value.keys())
            for sub_key, sub_dict in value.items():
                formatted_key = f"{sub_key}:".ljust(max_key_length + 2)
                dict_values = ", ".join([f"{k}: {format_value(v)}" for k, v in sub_dict.items()])
                print(f"  {formatted_key} {dict_values}")


In [29]:
def print_batch_results(mode, epoch, batch, dataset_size, loss, start_time, batch_start_time, batch_size):
    current_time = time.time()
    elapsed_time = current_time - start_time
    batch_time_ms = (current_time - batch_start_time) * 1000

    current = (batch + 1) * batch_size
    epoch_str = f"Epoch: {epoch+1}, " if epoch is not None else ""
    
    print(f"\r{mode.capitalize()} - {epoch_str}Batch: {batch+1} [{current:>5d}/{dataset_size:>5d}], "
          f"Time: {elapsed_time:.0f}s {batch_time_ms:.0f}ms/step, Loss: {loss:>7f}", end="")


In [30]:
def process_batches(mode, model, loader, device, loss_fn, optimizer=None, epoch=None, accumulation_steps=None):
    total_loss = 0.0
    all_targets = []
    all_outputs = []
    start_time = time.time()

    for batch, data in enumerate(loader, 0):
        batch_start_time = time.time()
        
        logits, targets, loss = process_batch(model, data, device, loss_fn, mode, optimizer)
        total_loss += loss.item()

        if mode == 'train':
            if accumulation_steps is not None and (batch + 1) % accumulation_steps == 0:
                optimizer.step()
                optimizer.zero_grad()

        # Detach from the (gradient) computation graph to save on memory
        all_outputs.append(logits.detach())
        all_targets.append(targets.detach())

        batch_size = targets.shape[0]
        print_batch_results(mode, epoch, batch, len(loader.dataset), loss.item(), start_time, batch_start_time, batch_size)

    if mode == 'train' and optimizer is not None and accumulation_steps is not None:
        # Ensure any remaining gradients are applied
        optimizer.step()
        optimizer.zero_grad()
    
    all_outputs = torch.cat(all_outputs, dim=0)
    all_targets = torch.cat(all_targets, dim=0)

    print()
    avg_loss = total_loss / len(loader)
    return avg_loss, all_outputs, all_targets

In [31]:
def evaluate(model, validation_loader, loss_fn, metrics_dict, device, hyperparameters=None):
    model.eval()
    avg_val_loss, val_outputs, val_targets = process_batches('evaluate', model, validation_loader, device, loss_fn)

    # Set default values
    thresholds = None
    percentile = None

    # Update values based on hyperparameters if provided
    if hyperparameters:
        thresholds = hyperparameters.get("thresholds", thresholds)
        percentile = hyperparameters.get("percentile", percentile)

    metrics_results = calculate_metrics(metrics_dict, val_targets, val_outputs, thresholds=thresholds, percentile=percentile)

    print(f"Evaluation Results:")
    print(f"Average Loss: {avg_val_loss:.4f}")
    print_metrics_results(metrics_results)

    return avg_val_loss, metrics_results


In [32]:
def train(model, epoch, training_loader, validation_loader, optimizer, loss_fn, metrics_dict, device, accumulation_steps=1):
    print(f"Training Epoch {epoch + 1}")

    # Training phase
    model.train()
    if optimizer is not None:
        optimizer.zero_grad()

    avg_train_loss, train_outputs, train_targets = process_batches('train', model, training_loader, device, loss_fn, optimizer, epoch, accumulation_steps)

    metrics_results = calculate_metrics(metrics_dict, train_targets, train_outputs)

    print(f"Train Results:")
    print(f"Average Training Loss for Epoch {epoch + 1}: {avg_train_loss:.4f}")
    print_metrics_results(metrics_results)

    # Validation phase
    if validation_loader is not None:
        avg_val_loss, val_metrics_results = evaluate(model, validation_loader, loss_fn, metrics_dict, device)
    else:
        avg_val_loss = None
        val_metrics_results = {}

    return avg_train_loss, avg_val_loss, val_metrics_results


In [49]:
model.model_name


'NASA-AIML_MIKA_SafeAeroBERT_Unfrozen[8, 9, 10, 11]_BCE-Balanced'

In [61]:
last_saved_epoch = find_last_saved_epoch(directory=MODEL_DIRECTORY, model_name=MODEL_NAME)

start_epoch = last_saved_epoch + 1 if last_saved_epoch != -1 else 0
if last_saved_epoch != -1:
    load_model(model, directory=MODEL_DIRECTORY, model_name=MODEL_NAME, epoch=last_saved_epoch)
    print(f"Loaded model training from epoch {start_epoch}")
else:
    print("No saved model found.")

Model loaded from model_save/NASA-AIML_MIKA_SafeAeroBERT_Unfrozen[8, 9, 10, 11]_BCE-Balanced_epoch_4.pth
Loaded model training from epoch 5


In [62]:
if start_epoch < EPOCHS:
    print(f"Resuming training from epoch {start_epoch + 1}")

for epoch in range(start_epoch, EPOCHS):
    train_loss, val_loss, val_metrics = train(model, epoch, training_loader, testing_loader, optimizer, loss_fn, metrics_dict, device, accumulation_steps=8)
    save_model(model, epoch, directory=MODEL_DIRECTORY, model_name=MODEL_NAME)
    # Additional epoch-level processing if needed

# Testing phase
avg_test_loss, test_metrics_results = evaluate(model, testing_loader, loss_fn, metrics_dict, device)
print(f"Test Results:")
print(f"Average Loss: {avg_test_loss:.4f}")
print_metrics_results(test_metrics_results)


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Evaluate - Batch: 338 [ 7098/10805], Time: 108s 207ms/step, Loss: 0.359812
Evaluation Results:
Average Loss: 0.4948

Custom Classifcation Report:
  Deviation / Discrepancy - Procedural:  binary_accuracy: 0.7577, precision: 0.8409, recall: 0.7243, f1-score: 0.7782, support: 6343.0000
  Aircraft Equipment:                    binary_accuracy: 0.9036, precision: 0.8701, recall: 0.8940, f1-score: 0.8819, support: 4351.0000
  Conflict:                              binary_accuracy: 0.9259, precision: 0.7195, recall: 0.9404, f1-score: 0.8152, support: 1879.0000
  Inflight Event / Encounter:            binary_accuracy: 0.8724, precision: 0.6680, recall: 0.8568, f1-score: 0.7507, support: 2423.0000
  ATC Issue:                             binary_accuracy: 0.8822, precision: 0.6891, recall: 0.9025, f1-score: 0.7815, support: 2522.0000
  Deviation - Altitude:                  binary_accuracy: 0.8909, precision: 0.4457, recall: 0.9252, f1-score: 0.6016, support: 962.0000
  Deviation - Track / Headi

In [63]:
def optimize_thresholds(logits, targets, metrics_dict, num_labels):
    best_global_metric = -np.inf
    best_thresholds = [0.5] * num_labels

    # Iterate over a range of thresholds for each label
    for label in range(num_labels):
        for threshold in np.linspace(0, 1, 101):  # Example range and step size
            temp_thresholds = best_thresholds.copy()
            temp_thresholds[label] = threshold
            metrics_results = calculate_metrics(metrics_dict, targets, logits, thresholds=temp_thresholds)
            current_metric = metrics_results["Optimization Metric"]

            if current_metric > best_global_metric:
                best_global_metric = current_metric
                best_thresholds = temp_thresholds

    metrics_results = calculate_metrics(metrics_dict, targets, logits, thresholds=best_thresholds)
    return best_thresholds, metrics_results


In [64]:
# Run the model to get logits
_, logits, targets = process_batches('evaluate', model, testing_loader, device, loss_fn)


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Evaluate - Batch: 338 [ 7098/10805], Time: 108s 201ms/step, Loss: 0.359812


In [65]:
opt_metrics_dict = {
    "Optimization Metric": lambda y_true, y_pred: metrics.f1_score(y_true, y_pred, average='macro', zero_division=0)
}
opt_metrics_dict.update(metrics_dict)

# Optimize thresholds
best_thresholds, metrics_results = optimize_thresholds(logits, targets, opt_metrics_dict, num_labels=len(ANOMALY_LABELS))

print("Optimized Thresholds:", best_thresholds)
print_metrics_results(metrics_results)

# # Use these thresholds in your evaluation
# avg_test_loss, test_metrics_results = evaluate(model, testing_loader, loss_fn, metrics_dict, device, hyperparameters=best_thresholds)
# print("Test Results with Optimized Thresholds:")
# print_metrics_results(test_metrics_results)