In [3]:
# Importing stock ml libraries
import os
import time
import numpy as np
import pandas as pd
from sklearn import metrics
import transformers
import torch
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertModel, BertConfig, BertForSequenceClassification
from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification


In [4]:
# Setting up the device for GPU usage
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
device

'cuda'

In [5]:
# Root label (source = ASRS coding forms) : order = by descending frequency
ANOMALY_LABELS = ['Deviation / Discrepancy - Procedural',
                    'Aircraft Equipment',
                    'Conflict',
                    'Inflight Event / Encounter',
                    'ATC Issue',
                    'Deviation - Altitude',
                    'Deviation - Track / Heading',
                    'Ground Event / Encounter',
                    'Flight Deck / Cabin / Aircraft Event',
                    'Ground Incursion',
                    'Airspace Violation',
                    'Deviation - Speed',
                    'Ground Excursion',
                    'No Specific Anomaly Occurred']

In [6]:
def load_data(path, labels, add_other=False, pp_path=None):
    loaded_data = pd.read_pickle(path)[0]

    # Drop Anomaly NaN's
    loaded_data = loaded_data.dropna(subset=['Anomaly'])#.reset_index(drop=True)

    # Convert the 'Anomaly' column to a list of lists
    anomaly_series = loaded_data['Anomaly']
    anomaly_list = anomaly_series.str.split(';').apply(lambda x: [item.strip() for item in x])

    # Initialize a DataFrame to hold the one-hot-encoded anomalies
    anomaly_df = pd.DataFrame(index=loaded_data.index)

    # Populate the DataFrame with one-hot-encoded columns for each prefix
    for prefix in labels:
        anomaly_df[prefix] = anomaly_list.apply(lambda anomalies: any(anomaly.startswith(prefix) for anomaly in anomalies)).astype(int)

    # Add the 'Other' category
    if add_other:
        anomaly_df['Other'] = (anomaly_df.sum(axis=1) == 0).astype(int)

    # Assign the one-hot-encoded anomalies as a new column 'labels' to 'loaded_data'
    loaded_data['labels'] = anomaly_df.apply(lambda row: row.tolist(), axis=1)

    # Now, 'loaded_data' is a DataFrame that includes both the 'text' and 'labels' columns
    if pp_path is None:
        loaded_data['text'] = loaded_data["Narrative"]
    else:
        loaded_data['text'] = pd.read_pickle(pp_path)

    # If you want to create a new DataFrame with just 'text' and 'labels':
    final_df = loaded_data[['text', 'labels']]
    return final_df

In [7]:
# from google.colab import drive
# drive.mount('/content/drive')

drop the NaN values in Anomaly?

In [6]:
train_df = load_data("./data/train_data_final.pkl", ANOMALY_LABELS, pp_path="./data/train_data_processed2.pkl")
train_df

Unnamed: 0_level_0,text,labels
ACN,Unnamed: 1_level_1,Unnamed: 2_level_1
1163382,I pilot flying performing takeoff . During tak...,"[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
893734,We 6 shipments dry ice flight ; cooling fresh ...,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
991883,I seen lot mistakes every flight since changed...,"[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]"
1590076,It first time flying KEUG I pilot flying . The...,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
1715282,I writing report bring attention second depart...,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
...,...,...
622204,WE WERE En Route IN Lateral Navigation AT FL31...,"[1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
622205,CLRED BY Tower Control TO CROSS Runway 8R/26L ...,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]"
661202,WHILE WORKING NUMEROUS CVG AND CMH DEPS AT A C...,"[0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
733747,ON MIDNIGHT SHIFT ; Approximately XA00 Local T...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"


In [7]:
test_df = load_data("./data/test_data_final.pkl", ANOMALY_LABELS, pp_path="./data/test_data_processed2.pkl")
test_df

Unnamed: 0_level_0,text,labels
ACN,Unnamed: 1_level_1,Unnamed: 2_level_1
1014798,Flying SLC DELTA THREE Area Navigation arrival...,"[1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]"
1806744,ORD busy east flow arrival push . The weather ...,"[1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
1044902,B737-800 vectored Instrument Landing System Ru...,"[1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
1764093,We 6 mile final tower cleared Cirrus land fron...,"[1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0]"
1786435,During Climb Leveled 17 ; 000 departure switch...,"[1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]"
...,...,...
1310569,FO flying visual approach runway 26 ZZZ . Wind...,"[1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
1482118,While assembling GE C2 transfer gearbox ; I no...,"[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
1565471,Nearing end hot ; bumpy four-hour Instrument F...,"[1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
980316,On approach gear went noticed yellow hash mark...,"[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"


In [8]:

train_df = load_data("./data/train_data_final.pkl", ANOMALY_LABELS)
train_df

In [9]:
test_df = load_data("./data/test_data_final.pkl", ANOMALY_LABELS)
test_df

Unnamed: 0,text,labels
0,Flying into SLC on the DELTA THREE RNAV arriva...,"[1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]"
1,ORD was on a very busy east flow arrival push....,"[1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
2,B737-800 was vectored to an ILS Runway 16L app...,"[1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
3,We were on a 6 mile final when tower cleared a...,"[1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0]"
4,During Climb we Leveled at 17;000 departure sw...,"[1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]"
...,...,...
10800,FO was flying a visual approach to runway 26 i...,"[1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
10801,While assembling a GE C2 transfer gearbox; I n...,"[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
10802,Nearing the end of a hot; bumpy four-hour IFR ...,"[1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
10803,On approach gear went down and noticed yellow ...,"[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"


In [10]:
MODEL_NAME = None
MODEL_DIRECTORY = "model_save"


# Sections of configBertTokenizer
# Defining some key variables that will be used later on in the training
BALANCED = False
LAYERS_TO_UNFREEZE = None
# LAYERS_TO_UNFREEZE = [8, 9, 10, 11]

MAX_LEN = 512
# MAX_LEN = 1024

TRAIN_EFFECTIVE_BATCH_SIZE = 32 # 32 Effective size for NASA
TRAIN_BATCH_SIZE = 32
ACCUMULATION_STEPS = TRAIN_EFFECTIVE_BATCH_SIZE / TRAIN_BATCH_SIZE
VALID_BATCH_SIZE = TRAIN_BATCH_SIZE
EPOCHS = 5 # 5 Epochs for NASA
LEARNING_RATE = 1e-05 * 2 # 0.00002 Rate for NASA


A Dataframe with 10824 entries has been loaded


Unnamed: 0_level_0,Date,Local Time Of Day,Locale Reference,State Reference,Relative Position.Angle.Radial,Relative Position.Distance.Nautical Miles,Altitude.AGL.Single Value,Altitude.MSL.Single Value,Flight Conditions,Weather Elements / Visibility,...,Result,Contributing Factors / Situations,Primary Problem,Narrative,Callback,Narrative.1,Callback.1,Synopsis,Year,anomaly_encoding
ACN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1014798,201206,0601-1200,SLC.Airport,UT,,,,11300.0,VMC,,...,General None Reported / Taken,Aircraft; Human Factors,Aircraft,Flying into SLC on the DELTA THREE RNAV arriva...,The Reporter stated that his aircraft is equip...,,,A CE750 Captain noted that his aircraft's FMS ...,2012,"[1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
1806744,202105,1201-1800,ORD.Airport,IL,,,,3900.0,,,...,Flight Crew FLC complied w / Automation / Advi...,Human Factors; Procedure; Airspace Structure; ...,Airspace Structure,ORD was on a very busy east flow arrival push....,,,,C90TRACON Controller reported they did not not...,2021,"[1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
1044902,201210,0001-0600,S46.TRACON,WA,,,,,,,...,General None Reported / Taken,ATC Equipment / Nav Facility / Buildings,ATC Equipment / Nav Facility / Buildings,B737-800 was vectored to an ILS Runway 16L app...,,,,S46 Controller expressed concern regarding the...,2012,"[1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
1764093,202009,0601-1200,ZZZ.Tower,US,,,400.0,,VMC,,...,Flight Crew Executed Go Around / Missed Approach,Human Factors; Procedure,Human Factors,We were on a 6 mile final when tower cleared a...,,While on about a six mile final tower cleared ...,,CRJ-200 flight crew reported failing to retrac...,2020,"[1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]"
1786435,202102,1201-1800,ZZZ.ARTCC,US,,,,17000.0,,,...,Air Traffic Control Issued New Clearance; Flig...,Environment - Non Weather Related; Human Facto...,Environment - Non Weather Related,During Climb we Leveled at 17;000 departure sw...,,after copilot (pf) leved at 17000'; dfw depart...,,Air carrier First Officer reported an altitude...,2021,"[1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]"


In [17]:
#
# MODEL_NAME = "model"
MODEL_NAME = None
MODEL_DIRECTORY = "model_save"


# Sections of configBertTokenizer
# Defining some key variables that will be used later on in the training
BALANCED = True
# LAYERS_TO_UNFREEZE = None
LAYERS_TO_UNFREEZE = [8, 9, 10, 11]

MAX_LEN = 512
TRAIN_BATCH_SIZE = 32 # 32 Size for NASA
VALID_BATCH_SIZE = 32
EPOCHS = 5 # 5 Epochs for NASA
LEARNING_RATE = 1e-05 * 2 # 0.00002 Rate for NASA
# tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
tokenizer = AutoTokenizer.from_pretrained("NASA-AIML/MIKA_SafeAeroBERT")

In [12]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.text = dataframe.text
        self.targets = self.data.labels
        self.max_len = max_len

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text.iloc[index])
        text = " ".join(text.split())

        inputs = self.tokenizer(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]


        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets.iloc[index], dtype=torch.float)
        }


In [13]:
num_labels = len(test_df.labels[0])

model = SequenceClassificationModel('bert-base-uncased', num_labels=num_labels)
# model = SequenceClassificationModel('NASA-AIML/MIKA_SafeAeroBERT', num_labels=num_labels)
# model = SequenceClassificationModel('allenai/longformer-base-4096', num_labels=num_labels)

model.set_trainable_layers(LAYERS_TO_UNFREEZE)
model.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


SequenceClassificationModel(
  (l1): BertForSequenceClassification(
    (bert): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(30522, 768, padding_idx=0)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0-11): 12 x BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_fea

In [12]:
# Creating the dataset and dataloader for the neural network
print("TRAIN Dataset: {}".format(train_df.shape))
print("TEST Dataset: {}".format(test_df.shape))

tokenizer = model.tokenizer()
training_set = CustomDataset(train_df, tokenizer, MAX_LEN)
testing_set = CustomDataset(test_df, tokenizer, MAX_LEN)

TRAIN Dataset: (96986, 2)
TEST Dataset: (10805, 2)


In [14]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 2
                }

test_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': False,
                'num_workers': 2
                }

training_loader = DataLoader(training_set, **train_params)
testing_loader = DataLoader(testing_set, **test_params)

In [15]:
torch.cuda.empty_cache()

In [18]:
num_labels = len(test_df.labels.iloc[0])

# model = SequenceClassificationModel('bert-base-uncased', num_labels=num_labels)
model = SequenceClassificationModel('NASA-AIML/MIKA_SafeAeroBERT', num_labels=num_labels)
# model = SequenceClassificationModel('allenai/longformer-base-4096', num_labels=num_labels)

model.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at NASA-AIML/MIKA_SafeAeroBERT and are newly initialized: ['classifier.weight', 'bert.pooler.dense.weight', 'classifier.bias', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BERTClass(
  (l1): BertForSequenceClassification(
    (bert): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(30522, 768, padding_idx=0)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0-11): 12 x BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_features=768, bias=Tr

In [19]:
class SafeAeroBERTClass(torch.nn.Module):
    def __init__(self, num_labels=15):
        super(SafeAeroBERTClass, self).__init__()
        self.l1 = AutoModelForSequenceClassification.from_pretrained("NASA-AIML/MIKA_SafeAeroBERT", num_labels=num_labels,)

    def forward(self, ids, mask, token_type_ids):
        output = self.l1(ids, attention_mask=mask, token_type_ids=token_type_ids)
        return output.logits

model = SafeAeroBERTClass()

# Freeze all layers in the model
for param in model.parameters():
    param.requires_grad = False

# Unfreeze the classifier and pooler layers
for param in model.l1.classifier.parameters():
    param.requires_grad = True

for param in model.l1.bert.pooler.parameters():
    param.requires_grad = True

model.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at NASA-AIML/MIKA_SafeAeroBERT and are newly initialized: ['classifier.bias', 'bert.pooler.dense.weight', 'classifier.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


SafeAeroBERTClass(
  (l1): BertForSequenceClassification(
    (bert): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(30522, 768, padding_idx=0)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0-11): 12 x BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_features=768,

In [20]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 2
                }

test_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': False,
                'num_workers': 2
                }

training_loader = DataLoader(training_set, **train_params)
testing_loader = DataLoader(testing_set, **test_params)

In [14]:
torch.cuda.empty_cache()

In [15]:
def binary_accuracy_per_label(y_true, y_pred):
    correct = y_pred == y_true
    accuracy_per_label = correct.float().mean(axis=0)
    return accuracy_per_label

def binary_accuracy_averaged(y_true, y_pred):
    accuracy_per_label = binary_accuracy_per_label(y_true, y_pred)
    accuracy_averaged = accuracy_per_label.mean()
    return accuracy_averaged

def custom_classification_report(y_true, y_pred):
    report = metrics.classification_report(y_true, y_pred, output_dict=True, target_names=ANOMALY_LABELS, zero_division=0)
    accuracy = binary_accuracy_per_label(y_true, y_pred)
    extended_accuracy_new = np.append(accuracy, [accuracy.mean()] * (len(report) - len(accuracy)))

    updated_report = {}
    for i, class_label in enumerate(report.keys()):
        # Create a new dictionary for the class with binary accuracy
        class_dict = {'binary_accuracy': extended_accuracy_new[i]}
        
        # Merge this dictionary with the existing metrics for the class
        class_dict.update(report[class_label])

        # Update the main report dictionary
        updated_report[class_label] = class_dict

    return updated_report


In [16]:
pos_weight = None
if BALANCED:
    # Compute weights for loss function
    num_labels = len(training_set[0]['targets'])
    pos_num = torch.zeros(num_labels).to(device)
    for _, data in enumerate(training_loader, 0):
        targets = data['targets'].to(device)
        pos_num += torch.sum(targets, axis=0)
    nobs = len(training_loader.dataset)
    pos_weight = (nobs - pos_num) / pos_num

    model.model_name += "_BCE-Balanced"

loss_fn = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)  # compute weighted loss for unbalanced dataset
optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE)
metrics_dict = {
    "Accuracy": metrics.accuracy_score,
    "F1 Micro Score": lambda y_true, y_pred: metrics.f1_score(y_true, y_pred, average='micro', zero_division=0),
    "F1 Macro Score": lambda y_true, y_pred: metrics.f1_score(y_true, y_pred, average='macro', zero_division=0)
}

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [21]:
pos_weight = None
if BALANCED:
    # Compute weights for loss function
    num_labels = len(training_set[0]['targets'])
    pos_num = torch.zeros(num_labels).to(device)
    for _, data in enumerate(training_loader, 0):
        targets = data['targets'].to(device)
        pos_num += torch.sum(targets, axis=0)
    nobs = len(training_loader.dataset)
    pos_weight = (nobs - pos_num) / pos_num

    model.model_name += "_BCE-Balanced"

loss_fn = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)  # compute weighted loss for unbalanced dataset
optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE)
metrics_dict = {"Custom Classifcation Report": lambda y_true, y_pred: custom_classification_report(y_true, y_pred)
    # "Binary Accuracy Macro": lambda outputs, targets: binary_accuracy_averaged(targets, outputs, threshold=0.5),
    # "Binary Accuracy per Class": binary_accuracy_per_label,
    # "F1 Score Micro": lambda y_true, y_pred: metrics.f1_score(y_true, y_pred, average='micro', zero_division=1),
    # "F1 Score Macro": lambda y_true, y_pred: metrics.f1_score(y_true, y_pred, average='macro', zero_division=1),
    # "F1 Scores per Class": lambda y_true, y_pred: metrics.f1_score(y_true, y_pred, average=None, zero_division=1)
}


In [17]:
def save_model(model, epoch, directory='model_save', model_name=None):
    """
    Saves the model state.

    Args:
    model (torch.nn.Module): The model to save.
    epoch (int): The current epoch number.
    file_path (str): Base directory to save the models.
    """
    if model_name is None:
        model_name = model.model_name

    if not os.path.exists(directory):
        os.makedirs(directory)
    
    file_path = os.path.join(directory, f"{model_name}_epoch_{epoch}.pth")

    torch.save(model.state_dict(), file_path)
    print(f'Model saved at {file_path}')


In [22]:
def load_model(model, directory='model_save', model_name=None, epoch=None):
    """
    Loads the model state.

    Args:
    model (torch.nn.Module): The model to load state into.
    file_path (str): Path to the saved model file.
    """
    if model_name is None:
        model_name = model.model_name

    if epoch is None:
        epoch = find_last_saved_epoch(directory, model_name)
        if epoch == -1:
            print("No saved model found.")
            return
    
    file_path = os.path.join(directory, f"{model_name}_epoch_{epoch}.pth")
    if not os.path.exists(file_path):
        print(f"No model file found at {file_path}")
        return

    model.load_state_dict(torch.load(file_path))
    model.to(device)
    print(f'Model loaded from {file_path}')

In [19]:
def find_last_saved_epoch(directory='model_save', model_name=None):
    """
    Finds the last saved epoch number in the specified directory.

    Args:
    file_path (str): The directory where models are saved.

    Returns:
    int: The last saved epoch number. Returns -1 if no saved model is found.
    """
    if model_name is None:
        model_name = model.model_name

    # Check if the directory exists, and create it if it doesn't
    if not os.path.exists(directory):
        return -1

    saved_epochs = []
    for filename in os.listdir(directory):
        if model_name is None or filename == model_name:
            parts = filename.replace('.pth', '').split('_')
            if parts[-2] == 'epoch':
                try:
                    saved_epochs.append(int(parts[-1]))
                except ValueError:
                    pass
    
    return max(saved_epochs, default=-1)

In [24]:
def process_batch(model, batch_data, device, loss_fn, mode, optimizer=None, accumulate_gradients=False):
    ids = batch_data['ids'].to(device, dtype=torch.long)
    mask = batch_data['mask'].to(device, dtype=torch.long)
    token_type_ids = batch_data['token_type_ids'].to(device, dtype=torch.long)
    targets = batch_data['targets'].to(device, dtype=torch.float)

    if mode == 'train':
        outputs = model(ids, mask, token_type_ids)
        loss = loss_fn(outputs, targets)
        loss.backward()
        if not accumulate_gradients:
            optimizer.step()
            optimizer.zero_grad()
    else:
        with torch.no_grad():
            outputs = model(ids, mask, token_type_ids)
            loss = loss_fn(outputs, targets)

    return outputs, targets, loss


In [25]:
def calculate_metrics(metrics_dict, targets, outputs):
    return {metric_name: metric_fn(targets, outputs) for metric_name, metric_fn in metrics_dict.items()}


In [26]:
def calculate_metrics(metrics_dict, targets, outputs, is_logit=True, thresholds=0.5, percentile=None):
    results = {}
    labels = ANOMALY_LABELS
    if is_logit:
        outputs = torch.sigmoid(outputs)

    if thresholds is None:
        thresholds = 0.5
    # Calculate percentile is specified
    if percentile is not None:
        thresholds = []
        for i in range(outputs.shape[1]):  # Iterate over each label
            label_scores = outputs[:, i].detach().cpu().numpy()
            threshold = np.percentile(label_scores, percentile)
            thresholds.append(threshold)
        thresholds = np.array(thresholds)

    # Apply thresholds to outputs
    outputs = (outputs >= torch.tensor(thresholds, device=outputs.device)).float()

    for metric_name, metric_fn in metrics_dict.items():
        if metric_name in ["F1 Scores per Class", "Binary Accuracy per Class"]:
            metric_scores = metric_fn(targets.cpu(), outputs.cpu())  # Assuming targets and outputs are tensors
            for i, score in enumerate(metric_scores):
                label = labels[i] if i < len(labels) else f"Class {i}"
                results[f"{metric_name} - {label}"] = score
        else:
            results[metric_name] = metric_fn(targets.cpu(), outputs.cpu())

    return results


In [27]:
def format_value(val):
    """Helper function to format the value for printing."""
    if isinstance(val, (float, np.float16, np.float32, np.float64)):
        return f"{val:.4f}"
    elif isinstance(val, torch.Tensor) and val.dtype in [torch.float16, torch.float32, torch.float64]:
        return f"{val.item():.4f}"
    else:
        return val

def print_metrics_results(metrics_results):
    # First, print scalar values and simple dictionaries
    for metric, value in metrics_results.items():
        if isinstance(value, dict) and not any(isinstance(v, dict) for v in value.values()):
            # Print simple dictionaries on a single line
            dict_values = ", ".join([f"{k}: {format_value(v)}" for k, v in value.items()])
            print(f"{metric}: {dict_values}")
        elif not isinstance(value, dict):
            # Print scalar values
            print(f"{metric}: {format_value(value)}")

    # Then, print nested dictionaries
    for metric, value in metrics_results.items():
        if isinstance(value, dict) and any(isinstance(v, dict) for v in value.values()):
            # Print nested dictionaries
            print(f"\n{metric}:")
            # Find the longest key length for formatting
            max_key_length = max(len(str(k)) for k in value.keys())
            for sub_key, sub_dict in value.items():
                formatted_key = f"{sub_key}:".ljust(max_key_length + 2)
                dict_values = ", ".join([f"{k}: {format_value(v)}" for k, v in sub_dict.items()])
                print(f"  {formatted_key} {dict_values}")


In [28]:
def print_batch_results(mode, epoch, batch, dataset_size, loss, start_time, batch_start_time, batch_size):
    current_time = time.time()
    elapsed_time = current_time - start_time
    batch_time_ms = (current_time - batch_start_time) * 1000

    current = (batch + 1) * batch_size
    epoch_str = f"Epoch: {epoch+1}, " if epoch is not None else ""
    
    print(f"\r{mode.capitalize()} - {epoch_str}Batch: {batch+1} [{current:>5d}/{dataset_size:>5d}], "
          f"Time: {elapsed_time:.0f}s {batch_time_ms:.0f}ms/step, Loss: {loss:>7f}", end="")


In [29]:
def process_batches(mode, model, loader, device, loss_fn, optimizer=None, epoch=None, accumulation_steps=None):
    total_loss = 0.0
    all_targets = []
    all_outputs = []
    start_time = time.time()

    for batch, data in enumerate(loader, 0):
        batch_start_time = time.time()
        
        logits, targets, loss = process_batch(model, data, device, loss_fn, mode, optimizer)
        total_loss += loss.item()

        if mode == 'train':
            if accumulation_steps is not None and (batch + 1) % accumulation_steps == 0:
                optimizer.step()
                optimizer.zero_grad()

        # Detach from the (gradient) computation graph to save on memory
        all_outputs.append(logits.detach())
        all_targets.append(targets.detach())

        batch_size = targets.shape[0]
        print_batch_results(mode, epoch, batch, len(loader.dataset), loss.item(), start_time, batch_start_time, batch_size)

    if mode == 'train' and optimizer is not None and accumulation_steps is not None:
        # Ensure any remaining gradients are applied
        optimizer.step()
        optimizer.zero_grad()
    
    all_outputs = torch.cat(all_outputs, dim=0)
    all_targets = torch.cat(all_targets, dim=0)

    print()
    avg_loss = total_loss / len(loader)
    return avg_loss, all_outputs, all_targets

In [25]:
def evaluate(model, validation_loader, loss_fn, metrics_dict, device, hyperparameters=None):
    model.eval()
    avg_val_loss, val_outputs, val_targets = process_batches('evaluate', model, validation_loader, device, loss_fn)

    # Set default values
    thresholds = None
    percentile = None

    # Update values based on hyperparameters if provided
    if hyperparameters:
        thresholds = hyperparameters.get("thresholds", thresholds)
        percentile = hyperparameters.get("percentile", percentile)

    metrics_results = calculate_metrics(metrics_dict, val_targets, val_outputs, thresholds=thresholds, percentile=percentile)

    print(f"Evaluation Results:")
    print(f"Average Loss: {avg_val_loss:.4f}")
    print_metrics_results(metrics_results)

    return avg_val_loss, metrics_results


In [26]:
def train(model, epoch, training_loader, validation_loader, optimizer, loss_fn, metrics_dict, device, accumulation_steps=1):
    print(f"Training Epoch {epoch + 1}")

    # Training phase
    model.train()
    if optimizer is not None:
        optimizer.zero_grad()

    avg_train_loss, train_outputs, train_targets = process_batches('train', model, training_loader, device, loss_fn, optimizer, epoch, accumulation_steps)

    metrics_results = calculate_metrics(metrics_dict, train_targets, train_outputs)

    print(f"Train Results:")
    print(f"Average Training Loss for Epoch {epoch + 1}: {avg_train_loss:.4f}")
    print_metrics_results(metrics_results)

    # Validation phase
    if validation_loader is not None:
        avg_val_loss, val_metrics_results = evaluate(model, validation_loader, loss_fn, metrics_dict, device)
    else:
        avg_val_loss = None
        val_metrics_results = {}

    return avg_train_loss, avg_val_loss, val_metrics_results


In [27]:
model.model_name


'NASA-AIML_MIKA_SafeAeroBERT_Unfrozen[8, 9, 10, 11]_BCE-Balanced'

In [28]:
last_saved_epoch = find_last_saved_epoch(directory=MODEL_DIRECTORY, model_name=MODEL_NAME)

start_epoch = last_saved_epoch + 1 if last_saved_epoch != -1 else 0
if last_saved_epoch != -1:
    load_model(model, directory=MODEL_DIRECTORY, model_name=MODEL_NAME, epoch=last_saved_epoch)
    print(f"Loaded model training from epoch {start_epoch}")
else:
    print("No saved model found.")

No saved model found. Starting training from the beginning.
Training Epoch 1
Train - Epoch: 1, Batch: 20 [  640/97417], Time: 112s 5555ms/step, Loss: 0.560862, Accuracy: 0.0625, F1 Micro Score: 0.4000, F1 Macro Score: 0.0490

KeyboardInterrupt: 

In [29]:
if start_epoch < EPOCHS:
    print(f"Resuming training from epoch {start_epoch + 1}")
else:
    print("No saved model found.")

Resuming training from epoch 1
Training Epoch 1


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Train - Epoch: 1, Batch: 3031 [78806/96986], Time: 1546s 404ms/step, Loss: 0.599502
Train Results:
Average Training Loss for Epoch 1: 0.5714

Custom Classifcation Report:
  Deviation / Discrepancy - Procedural:  binary_accuracy: 0.7017, precision: 0.7904, recall: 0.6860, f1-score: 0.7345, support: 58337.0000
  Aircraft Equipment:                    binary_accuracy: 0.8489, precision: 0.7715, recall: 0.8718, f1-score: 0.8186, support: 37932.0000
  Conflict:                              binary_accuracy: 0.8692, precision: 0.5867, recall: 0.9095, f1-score: 0.7133, support: 17342.0000
  Inflight Event / Encounter:            binary_accuracy: 0.8129, precision: 0.4833, recall: 0.7895, f1-score: 0.5996, support: 17203.0000
  ATC Issue:                             binary_accuracy: 0.8069, precision: 0.4528, recall: 0.9072, f1-score: 0.6041, support: 15751.0000
  Deviation - Altitude:                  binary_accuracy: 0.8486, precision: 0.3683, recall: 0.9151, f1-score: 0.5252, support: 8876.0

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Evaluate - Batch: 338 [ 7098/10805], Time: 100s 191ms/step, Loss: 0.416931
Evaluation Results:
Average Loss: 0.5172

Custom Classifcation Report:
  Deviation / Discrepancy - Procedural:  binary_accuracy: 0.7338, precision: 0.8097, recall: 0.7145, f1-score: 0.7591, support: 6343.0000
  Aircraft Equipment:                    binary_accuracy: 0.8634, precision: 0.7830, recall: 0.9140, f1-score: 0.8435, support: 4351.0000
  Conflict:                              binary_accuracy: 0.9162, precision: 0.6939, recall: 0.9276, f1-score: 0.7939, support: 1879.0000
  Inflight Event / Encounter:            binary_accuracy: 0.8571, precision: 0.6349, recall: 0.8535, f1-score: 0.7282, support: 2423.0000
  ATC Issue:                             binary_accuracy: 0.8566, precision: 0.6379, recall: 0.8921, f1-score: 0.7439, support: 2522.0000
  Deviation - Altitude:                  binary_accuracy: 0.8529, precision: 0.3726, recall: 0.9532, f1-score: 0.5358, support: 962.0000
  Deviation - Track / Headi

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Train - Epoch: 2, Batch: 2710 [86720/96986], Time: 1404s 546ms/step, Loss: 0.413910

In [33]:
if start_epoch < EPOCHS:
    print(f"Resuming training from epoch {start_epoch + 1}")

for epoch in range(start_epoch, EPOCHS):
    train_loss, val_loss, val_metrics = train(model, epoch, training_loader, testing_loader, optimizer, loss_fn, metrics_dict, device, accumulation_steps=8)
    save_model(model, epoch, directory=MODEL_DIRECTORY, model_name=MODEL_NAME)
    # Additional epoch-level processing if needed

# Testing phase
avg_test_loss, test_metrics_results = evaluate(model, testing_loader, loss_fn, metrics_dict, device)
print(f"Test Results:")
print(f"Average Loss: {avg_test_loss:.4f}")
print_metrics_results(test_metrics_results)


Resuming training from epoch 1
Training Epoch 1
Train - Epoch: 1, Batch: 61 [ 1952/96986], Time: 83s 959ms/step, Loss: 0.6018489

In [None]:
def optimize_thresholds(logits, targets, metrics_dict, num_labels):
    best_global_metric = -np.inf
    best_thresholds = [0.5] * num_labels

    # Iterate over a range of thresholds for each label
    for label in range(num_labels):
        for threshold in np.linspace(0, 1, 110):  # Example range and step size
            temp_thresholds = best_thresholds.copy()
            temp_thresholds[label] = threshold
            metrics_results = calculate_metrics(metrics_dict, targets, logits, thresholds=temp_thresholds)
            current_metric = metrics_results["Optimization Metric"]

            if current_metric > best_global_metric:
                best_global_metric = current_metric
                best_thresholds = temp_thresholds

    metrics_results = calculate_metrics(metrics_dict, targets, logits, thresholds=best_thresholds)
    return best_thresholds, metrics_results


In [None]:
# Run the model to get logits
_, logits, targets = process_batches('evaluate', model, testing_loader, device, loss_fn)


Evaluate - Batch: 5 [  160/10805], Time: 2s 293ms/step, Loss: 0.190116

Evaluate - Batch: 338 [ 7098/10805], Time: 100s 191ms/step, Loss: 0.131393


In [None]:
opt_metrics_dict = {
    "Optimization Metric": lambda y_true, y_pred: metrics.f1_score(y_true, y_pred, average='macro', zero_division=0)
}
opt_metrics_dict.update(metrics_dict)

# Optimize thresholds
best_thresholds, metrics_results = optimize_thresholds(logits, targets, opt_metrics_dict, num_labels=len(ANOMALY_LABELS))

print("Optimized Thresholds:", best_thresholds)
print_metrics_results(metrics_results)

# # Use these thresholds in your evaluation
# avg_test_loss, test_metrics_results = evaluate(model, testing_loader, loss_fn, metrics_dict, device, hyperparameters=best_thresholds)
# print("Test Results with Optimized Thresholds:")
# print_metrics_results(test_metrics_results)

Optimized Thresholds: [0.28440366972477066, 0.44036697247706424, 0.6880733944954129, 0.3577981651376147, 0.5, 0.47706422018348627, 0.48623853211009177, 0.3853211009174312, 0.41284403669724773, 0.5321100917431193, 0.3119266055045872, 0.3853211009174312, 0.3944954128440367, 0.06422018348623854]
Optimization Metric: 0.7312

Custom Classifcation Report:
  Deviation / Discrepancy - Procedural:  binary_accuracy: 0.7810, precision: 0.7721, recall: 0.8895, f1-score: 0.8267, support: 6343.0000
  Aircraft Equipment:                    binary_accuracy: 0.9139, precision: 0.8901, recall: 0.8970, f1-score: 0.8935, support: 4351.0000
  Conflict:                              binary_accuracy: 0.9526, precision: 0.8641, recall: 0.8632, f1-score: 0.8637, support: 1879.0000
  Inflight Event / Encounter:            binary_accuracy: 0.9001, precision: 0.7684, recall: 0.7941, f1-score: 0.7810, support: 2423.0000
  ATC Issue:                             binary_accuracy: 0.9079, precision: 0.7726, recall: 0.8