In [None]:
# install NeMo
BRANCH = 'main'
!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]

In [None]:
!pip install editdistance
!pip install --upgrade onnxruntime

In [7]:
import torch
import numpy as np
from nemo.collections import nlp as nemo_nlp
from nemo.utils.exp_manager import exp_manager
from nemo.utils import logging
import os
import wget
import pytorch_lightning as pl
from omegaconf import OmegaConf
from nlp_engine.data.data_utils import DataUtils, create_train_test_data
import time
import onnxruntime

[NeMo W 2023-05-31 05:54:47 optimizers:66] Could not import distributed_fused_adam optimizer from Apex
[NeMo W 2023-05-31 05:54:48 experimental:27] Module <class 'nemo.collections.nlp.models.text_normalization_as_tagging.thutmose_tagger.ThutmoseTaggerModel'> is experimental, not ready for production and is not fully supported. Use at your own risk.
    


In [8]:
from nemo.utils import logging
from nemo.collections.nlp.parts.utils_funcs import tensor2list
from nemo.collections.nlp.models.text_classification import TextClassificationModel
from nemo.collections.nlp.data.text_classification import TextClassificationDataset

In [73]:
class NemoTrainer(object):
    def __init__(self):
       
        config_file = "/config/intent_slot_classification_config.yaml"
        #print(config_file)
        self.config = OmegaConf.load(config_file)
        print(OmegaConf.to_yaml(self.config))
        self.config.model.data_dir = '/nemo_format/'
        
        # lets modify some trainer configs
        # checks if we have GPU available and uses it
        accelerator = 'gpu' if torch.cuda.is_available() else 'cpu'
        self.config.trainer.devices = 1
        self.config.trainer.accelerator = accelerator

        self.config.trainer.precision = 16 if torch.cuda.is_available() else 32

        # for mixed precision training, uncomment the line below (precision should be set to 16 and amp_level to O1):
        # config.trainer.amp_level = O1

        # remove distributed training flags
        self.config.trainer.strategy = None

        # setup a small number of epochs for demonstration purposes of this tutorial
        self.config.trainer.max_epochs = 5

        self.trainer = pl.Trainer(**self.config.trainer)

        exp_dir = exp_manager(self.trainer, self.config.get("exp_manager", None))
        # the exp_dir provides a path to the current experiment for easy access
        print(str(exp_dir))
     
    def train(self):
        # initialize the model
        model = nemo_nlp.models.IntentSlotClassificationModel(self.config.model, trainer=self.trainer)

        # train
        self.trainer.fit(model)

    def test(self):
        # specify checkpoint path with .nemo file
        checkpoint_path = os.path.join("/nemo_experiments/IntentSlot/2023-05-22_11-13-36/checkpoints/", "IntentSlot.nemo")

        # load the model from this checkpoint
        eval_model = nemo_nlp.models.IntentSlotClassificationModel.restore_from(checkpoint_path)
        #eval_model.optimize_threshold(self.config.model.test_ds, 'dev')
        queries = []
        
        test_generation_file = "intent_detection_test_sentences.txt"
        with open(test_generation_file, "r", encoding="utf-8") as r:
            for line in r:
                queries.append(line)

        # We use the optimized threshold for predictions
        pred_intents, pred_slots = eval_model.predict_from_examples(queries, self.config.model.test_ds)
        logging.info('The prediction results of some sample queries with the trained model:')

        for query, intent, slots in zip(queries, pred_intents, pred_slots):
            logging.info(f'Query : {query}')
            logging.info(f'Predicted Intents: {intent}')
            logging.info(f'Predicted Slots: {slots}')
            
    def export_model(self, nemo_checkpoint_path, onnx_filename):
        # extract the path of the best checkpoint from the training, you may update it to any other saved checkpoint file
        #checkpoint_path = self.trainer.checkpoint_callback.best_model_path
        checkpoint_path = os.path.join(nemo_checkpoint_path, "IntentSlot.nemo")
        # load the model from this checkpoint
        eval_model = nemo_nlp.models.IntentSlotClassificationModel.restore_from(checkpoint_path)
        eval_model.eval()
  
        eval_model.export(output=onnx_filename, onnx_opset_version=14)
        
    
    def to_numpy(self, tensor):
        return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

    def create_infer_dataloader(self, model, queries):
        batch_size = len(queries)
  
        dataset = TextClassificationDataset(tokenizer=model.tokenizer, queries=queries, max_seq_length=50)

        return torch.utils.data.DataLoader(
            dataset=dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=2,
            pin_memory=True,
            drop_last=False,
            collate_fn=dataset.collate_fn,
        )
    def postprocessing(self, intent_preds, slot_preds, intent_label_map, slot_label_map):
        processed_results = []
        for intent_pred, slot_pred in zip(intent_preds, slot_preds):
            intent_label = intent_label_map[intent_pred]
            slot_labels = [slot_label_map[slot] for slot in slot_pred]
            processed_results.append((intent_label, slot_labels))
        return processed_results
        
    def inference(self, queries, nemo_checkpoint_path, onnx_model, intent_slot_label_path):
        checkpoint_path = os.path.join(nemo_checkpoint_path, "IntentSlot.nemo")
        # load the model from this checkpoint
        eval_model = nemo_nlp.models.IntentSlotClassificationModel.restore_from(checkpoint_path)
        eval_model.eval()
        
        
        start_time = time.time()
        infer_datalayer = self.create_infer_dataloader(eval_model, queries)

        ort_session = onnxruntime.InferenceSession(onnx_model)
        
        
        for batch in infer_datalayer:
            input_ids, input_type_ids, input_mask, subtokens_mask = batch
            ort_inputs = {ort_session.get_inputs()[0].name: self.to_numpy(input_ids),
                          ort_session.get_inputs()[1].name: self.to_numpy(input_mask),
                          ort_session.get_inputs()[2].name: self.to_numpy(input_type_ids),}
            #ologits = ort_session.run(None, ort_inputs)
            
            intent_logits, slot_logits = ort_session.run(None, ort_inputs)

            intent_preds = tensor2list(torch.argmax(torch.from_numpy(intent_logits), dim=-1))
            slot_preds = tensor2list(torch.argmax(torch.from_numpy(slot_logits), dim=-1))
            
            
            # Define intent label map
            intent_label_map = {}
            with open(intent_slot_label_path + 'dict.intents.csv', 'r') as file:
                for index, line in enumerate(file):
                    label = line.strip()
                    intent_label_map[index] = label
            # Define slot label map
            slot_label_map = {}
            with open(intent_slot_label_path + 'dict.slots.csv', 'r') as file:
                for index, line in enumerate(file):
                    label = line.strip()
                    slot_label_map[index] = label
    
            
            processed_results = self.postprocessing(intent_preds, slot_preds, intent_label_map, slot_label_map)

            logging.info('The prediction results of some sample queries with the trained model:')
            for query, (intent_result, slot_result) in zip(queries, processed_results):
                logging.info(f'Query: {query}')
                logging.info(f'Predicted intent: {intent_result}')
                logging.info(f'Predicted slots: {slot_result}')
            stop_time = time.time()
            print("Inference time:", stop_time - start_time)

# Train model

In [None]:
nemo_model = NemoTrainer()
nemo_model.train()

# Evaluate

In [None]:
nemo_model = NemoTrainer()
nemo_model.test()

# Export model

In [None]:
# https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/core/export.html
# https://onnxruntime.ai/docs/reference/compatibility.html
nemo_model = NemoTrainer()
nemo_checkpoint_path = "/nemo_experiments/IntentSlot/2023-05-29_10-51-01/checkpoints/"
onnx_filename = "turkish_isc.onnx"
nemo_model.export_model(nemo_checkpoint_path, onnx_filename)

# Onnxruntime Inference

In [None]:
nemo_model = NemoTrainer()
intent_slot_label_path = "/nemo_format/"
nemo_checkpoint_path= "/nemo_experiments/IntentSlot/2023-05-29_10-51-01/checkpoints/"
onnx_model = "turkish_isc.onnx"

query = ['kahve pişirmeyi başlat']

nemo_model.inference(query, nemo_checkpoint_path, onnx_model, intent_slot_label_path)

In [10]:
torch.cuda.is_available()

True

# TensorRT Inference

In [None]:
!pip install pycuda

In [None]:
from jetson_voice_utils.trt_model import TRTModel
from transformers import AutoTokenizer

class IntentSlotClassificationTRTInference(object):
    def __init__(self):
        
        self.nlp_dynamic_shapes = False
        self.intent_slot_label_path = "/nemo_format/"
        
        queries = []

        config_file = "/intent_slot_classification_config.yaml"
        self.config = OmegaConf.load(config_file)
        
        self.config.model_path = 'turkish_isc.onnx'
        print(OmegaConf.to_yaml(self.config))
             
        # load model
        dynamic_shapes = {'max' : (1, self.config.model['language_model']['max_seq_length'])}  # (batch_size, sequence_length)
        
        
        if self.nlp_dynamic_shapes:
            dynamic_shapes['min'] = (1, 1)
        #print(dynamic_shapes)
        
        self.model = TRTModel(self.config, dynamic_shapes)
        
        # create tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(self.config.model['tokenizer']['tokenizer_name'])
             
        
    def normalize_logits(self, logits):
        """
        Normalize logits such that they are distributed between [0,1]
        """
        return np.exp(logits - np.log(np.sum(np.exp(logits), axis=-1, keepdims=True)))
    
    def intent_labels(self):
        """
        List of the intent class labels.
        """
        # Define intent label map
        intent_labels = []
        with open(self.intent_slot_label_path + 'dict.intents.csv', 'r') as file:
            for intent in file:
                intent_labels.append(intent.strip())     
               
        return intent_labels


    def intent_label(self, index):
        """
        Return an intent label by index (with bounds checking) 
        """
        return self.intent_labels()[int(index)] if index < len(self.intent_labels()) else 'Unknown_Intent'
    
    def slot_labels(self):
        """
        List of the slot class labels.
        """
        # Define slot label map
        slot_labels = []
        with open(self.intent_slot_label_path + 'dict.slots.csv', 'r') as file:
            for slot in file:
                slot_labels.append(slot.strip())
  
        return slot_labels 
    
    def slot_label(self, index):
        """
        Return a slot label by index (with bounds checking)
        """
        return self.slot_labels()[int(index)] if index < len(self.slot_labels()) else self.null_slot
   
        
    def find_subtokens(self, encodings, method='char_span'):
        """
        Compute the subtoken mask, where each token is marked as True if it's a subtoken or False otherwise.
        Longer words/acronyms may be tokenized into mulitple word pieces (called subtokens), for example:

            'Yosemite' -> ['yo', '##se', '##mite']
            'U.S.' -> ['u', '.', 's', '.']

        Parameters:
          encodings (BatchEncoding) -- Output from tokenizer

          method (string) -- If 'char_span', the subtoken mask will be determined by looking at the character
                             indices.  Tokens that map to characters that are side-by-side are flagged as subtokens.

                             If 'subtoken_delimiters', subtokens will be identified by looking for '##' symbols.
                             However this can miss punctuated subtokens, such as 'U.S.'

        Returns boolean subtoken mask array with shape (num_queries, num_tokens)
        """
        num_queries = encodings['input_ids'].shape[0]
        subtoken_mask = []

        if method == 'char_span':
            for query_idx in range(num_queries):
                mask = []
                last_char = -1
                tokens = encodings.tokens(query_idx)

                for token_idx, word_id in enumerate(encodings.word_ids(query_idx)):
                    if word_id is None:  # skip special tokens
                        mask.append(False)
                        continue

                    chars = encodings.token_to_chars(query_idx, token_idx)

                    if chars[0] == last_char:
                        mask.append(True)
                    else:
                        mask.append(False)

                    last_char = chars[1]

                subtoken_mask.append(mask)

        elif method == 'subtoken_delimiters':
            for query_idx in range(num_queries):
                subtoken_mask.append([token.startswith('##') for token in encodings.tokens(query_idx)])
        else:
            raise ValueError(f"invalid method ('{method}')")

        return np.asarray(subtoken_mask)
        
       
     
    def inference(self, queries):
        start_time = time.time()
        
        """
        Perform intent/slot classification on the input query.
        
        Parameters:
          query (string) -- The text query, for example:
                             'What is the weather in San Francisco tomorrow?'

        Returns a dict with the following keys:
             'intent' (string) -- the classified intent label
             'score' (float) -- the intent probability [0,1]
             'slots' (list[dict]) -- a list of dicts, where each dict has the following keys:
                  'slot' (string) -- the slot label
                  'text' (string) -- the slot text from the query
                  'score' (float) -- the slot probability [0,1]
        """

        queries = ['kahve pişirmeyi başlat']
        
        self.null_slot = self.slot_labels()[-1]  # 'O' in assistant dataset - always the last label?
        
        encodings = self.tokenizer(
            text=query,
            padding='longest' if self.nlp_dynamic_shapes else 'max_length',
            truncation=True,
            max_length=self.config.model['language_model']['max_seq_length'],
            return_tensors='np',
            return_token_type_ids=True,
            return_overflowing_tokens=True,
            return_offsets_mapping=True,
            return_special_tokens_mask=True,
        )

        # during slot classification, we want to ignore slots from subtokens and special tokens 
        subtoken_mask = self.find_subtokens(encodings, method='subtoken_delimiters')
        ignore_mask = subtoken_mask | encodings['special_tokens_mask']
        
        
        # retrieve the inputs from the encoded tokens
        inputs = {}
        
        for input in self.model.inputs:
            if input.name not in encodings:
                raise ValueError(f"the encoded inputs from the tokenizer doesn't contain '{input.name}'")

            inputs[input.name] = encodings[input.name]
                    
        # run the model
        intent_logits, slot_logits = self.model.execute(inputs)
        
        intent_logits = self.normalize_logits(intent_logits)
        slot_logits = self.normalize_logits(slot_logits)

        intent_preds = np.argmax(intent_logits, axis=-1)
        slot_preds = np.argmax(slot_logits, axis=-1)

        # convert numerical outputs to intent/slot labels
        results = []

        for query_idx, intent_id in enumerate(intent_preds):
            results.append({
                'intent' : self.intent_label(intent_id),
                'score' : intent_logits[query_idx][intent_id],
                'slots' : []
            })
            
        for query_idx, slots in enumerate(slot_preds):
            query_slots = [self.slot_label(slot) for slot in slots]

            for token_idx, slot in enumerate(query_slots):
                # ignore unclassified slots or masked tokens
                if slot == self.null_slot or ignore_mask[query_idx][token_idx]:
                    continue
                    
                # convert from token index back to the query string
                chars = encodings.token_to_chars(query_idx, token_idx)
                text = query[chars[0]:chars[1]]      # queries[query_idx]
                
                # append subtokens from the query to the text
                for subtoken_idx in range(token_idx+1, len(query_slots)):
                    if subtoken_mask[query_idx][subtoken_idx]:
                        subtoken_chars = encodings.token_to_chars(query_idx, subtoken_idx)
                        text += query[subtoken_chars[0]:subtoken_chars[1]]
                    else:
                        break
                        
                results[query_idx]['slots'].append({
                    'slot' : slot,
                    'text' : text,
                    'score' : slot_logits[query_idx][token_idx][slots[token_idx]]
                })
        print("TRT inference time:", time.time() - start_time)

        if len(results) == 1:
            return results[0]
        else:
            return results
        
trt_nemo = IntentSlotClassificationTRTInference()

In [None]:
query = ['iki kişilik türk kahvesi yap']
trt_nemo.inference(query)