In [1]:
from transformers import BertTokenizer, BertModel
import torch
from torch import nn

In [3]:
class MyBert(nn.Module):
    def __init__(self):
        super(MyBert,self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.Dropout = torch.nn.Dropout(0.2)
        self.classifier = nn.Linear(self.bert.config.hidden_size, 34)

    
    def forward(self, input_ids, attention_mask, token_type_ids):      
        output = self.bert(input_ids, attention_mask, token_type_ids)     
        cls = output.pooler_output  
        output_1 = self.Dropout(cls)
        logits= self.classifier(output_1)
        return logits

In [4]:
def load_ckp( model, optimizer, checkpoint_path):
    checkpoint = torch.load(checkpoint_path, map_location='cpu')
    model.load_state_dict(checkpoint['state_dict']) 
    optimizer.load_state_dict(checkpoint['optimizer'])
    epoch = checkpoint['epoch']
    loss = checkpoint['valid_loss_min']
    return model, optimizer, epoch, loss

In [5]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
checkpoint_path = './/best_model.pt'
model = MyBert()
optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-05)
model, optimizer, epoch, loss=load_ckp(model, optimizer, checkpoint_path)
model.eval()

  checkpoint = torch.load(checkpoint_path, map_location='cpu')


MyBert(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine

In [6]:
import numpy as np
labels=['ADRM','AMAN', 'ARC', 'ATM', 'BIRD', 'CABIN','CFIT', 'CTOL', 'EVAC', 'EXTL', 'F-NI', 'F-POST', 'FUEL',
'GCOL',  'GTOW',  'ICE',  'LALT',  'LOC-G',  'LOC-I', 'LOLI', 'MAC', 'OTHERS','RAMP', 'RE',
'RI',  'SCF-NP',  'SCF-PP',  'SEC',  'TURB',  'UIMC',  'UNK', 'USOS','WILD', 'WSTRW']
def predict(text):
    with torch.no_grad():
        inputs = tokenizer.encode_plus(text=text,
                                    add_special_tokens=True,
                                    max_length=512,
                                    truncation=True,
                                    padding=True,
                                    return_token_type_ids=True,
                                    return_attention_mask=True,
                                    return_tensors='pt')
        token_ids = inputs['input_ids']
        attention_masks = inputs['attention_mask']
        token_type_ids = inputs['token_type_ids']
        outputs = model(token_ids, attention_masks, token_type_ids)
        outputs = torch.sigmoid(outputs).cpu().detach().numpy().tolist()
        outputs_1 = (np.array(outputs)> 0.7).astype(int)
        outputs_2 = np.where(outputs_1[0] == 1)[0].tolist()
        if outputs_2==[]:
            lst=outputs[0]
            max_value=max(lst)
            max_index = lst.index(max_value)
            return [labels[max_index]]
        else:
            return [ labels[i] for i in outputs_2]

   

In [7]:
import pandas as pd
df=pd.read_excel('.\\Accidents_ASN.xlsx')

In [8]:
df['OCCURENCE_CATEGORY']=df['Narrative'].apply(predict)

In [9]:
df.to_excel('ASN_add_occurence.xlsx',index=None)

In [3]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
checkpoint = torch.load('.//best_model.pt', map_location='cpu')
model = MyBert()
model.load_state_dict(checkpoint['state_dict']) 
model.eval()

  checkpoint = torch.load('.//best_model.pt', map_location='cpu')


MyBert(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine

In [30]:
import torch
def inference(text):
    inputs = tokenizer(
        text,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=512
    )
    with torch.no_grad():
        logits = model(**inputs)

    probs = torch.sigmoid(logits)

    threshold = 0.5
    predicted_labels = (probs > threshold).int()
    output_tensor=predicted_labels
    labels=['ADRM','AMAN', 'ARC', 'ATM', 'BIRD', 'CABIN','CFIT', 'CTOL', 'EVAC', 'EXTL', 'F-NI', 'F-POST', 'FUEL',
    'GCOL',  'GTOW',  'ICE',  'LALT',  'LOC-G',  'LOC-I', 'LOLI', 'MAC', 'OTHERS','RAMP', 'RE',
    'RI',  'SCF-NP',  'SCF-PP',  'SEC',  'TURB',  'UIMC',  'UNK', 'USOS','WILD', 'WSTRW']
    predicted_label_index = torch.argmax(output_tensor).item()
    predicted_label = labels[predicted_label_index]
    return predicted_label

In [33]:
import pandas as pd
df=pd.read_excel('.\\ASN.xlsx')

In [35]:
df['OCCURENCE_CATEGORY']=df['Narrative'].apply(inference)

In [37]:
df.to_excel('ASN_occurence.xlsx',index=None)