# DM pipeline evaluation
This notebook is specifically designed to evaluate the DM component of the HMD project. It uses the input of a injected DM and the prompt

## Setup NLU

In [None]:
%env HF_HOME=/kaggle/working #for Kaggle

In [None]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Get the HF_TOKEN
hf_token = os.getenv('HF_TOKEN')
print(f'HuggingFace Token: {hf_token}')

In [2]:
MODELS = {
    "llama3": "meta-llama/Meta-Llama-3-8B-Instruct",
}

TEMPLATES = {
    "llama3": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|><|start_header_id|>assistant<|end_header_id|>",
}

In [3]:
FIRST_TIME = False

if FIRST_TIME:
    from transformers import AutoModelForCausalLM, AutoTokenizer
    import torch
    
    def download_models(models):
        for model_name in models.values():
            # triggers download of the models
            AutoModelForCausalLM.from_pretrained(
                model_name,
                device_map="auto",
                torch_dtype=torch.float16
            )
            AutoTokenizer.from_pretrained(model_name)
    
    download_models(MODELS)

In [4]:
import torch
import json

from typing import Tuple
from transformers import AutoModelForCausalLM, AutoTokenizer, BatchEncoding, PreTrainedTokenizer, PreTrainedModel

def load_model(model_name: str, dtype) -> Tuple[PreTrainedModel, PreTrainedTokenizer]:
    torch_dtype = torch.float32
    if dtype == "bf16":
        torch_dtype = torch.bfloat16
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        torch_dtype=torch_dtype,

    )
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return model, tokenizer

def generate(
    model: PreTrainedModel,
    inputs: BatchEncoding,
    tokenizer: PreTrainedTokenizer,
    max_seq_length: int,
) -> str:
    output = model.generate(
        inputs.input_ids,
        attention_mask=inputs.attention_mask,
        max_length=max_seq_length,
        pad_token_id=tokenizer.eos_token_id,
    )
    return tokenizer.decode(
        output[0][len(inputs.input_ids[0]) :], skip_special_tokens=True
    )

In [None]:
model_name = "llama3"
chat_template = TEMPLATES[model_name]
model_name = MODELS[model_name]

dtype = "bf16"
#max_seq_length = 1024
max_seq_length = 2048

model, tokenizer = load_model(model_name, dtype)

## DM prompt

In [7]:
#Few shots
DM_PROMPT = """
You are a Dialogue Manager of a human dialogue system.
Given the outputs of the NLU component which is in a JSON format, you should only generate the next best actions from this list:
- request_details(slot), if a slot value is missing (None) by substituting slot with the missing slot name. The slot value is missing when None is the value.
- confirmation(intent), if all the values in the slots are filled. The slot is also filled when 'none' is the value.
- fallback_policy, if intent is out_of_domain.
- provide_information(entity), if the intent is request_information(entity).
- error_handling, if intent is set to error_handling.
Pay close attention that 'none' is not the same as None. 'none' is a filled field. None is an missing slot value.
Do not generate any other text.

For example:
INPUT: {'intent': 'chicken_ordering', 'slots': {'chicken_size': 'small', 'chicken_bones': 'yes', 'sauce_type': 'mushroom', 'side_dish': 'carrots', 'chicken_type': None}}
OUTPUT: request_details(chicken_type)

INPUT: {'intent': 'drink_ordering', 'slots': {'drink_size': 'small', 'drink_ice': 'no', 'drink_type': None}}
OUTPUT: request_details(drink_type)

INPUT: {'intent': 'dessert_ordering', 'slots': {'dessert_type': None, 'extra_whipped_cream': 'yes'}}
OUTPUT: request_details(dessert_type)

INPUT: {'intent': 'appetizer_ordering', 'slots': {'appetizer_moment': '2', 'appetizer_type': None}}
OUTPUT: request_details(appetizer_type)


INPUT: {'intent': 'chicken_ordering', 'slots': {'chicken_type': 'grilled', 'chicken_size': 'small', 'chicken_bones': 'yes', 'sauce_type': 'mushroom', 'side_dish': 'carrots'}}
OUTPUT: confirmation(chicken_ordering)

INPUT: {'intent': 'chicken_ordering', 'slots': {'chicken_type': 'roasted', 'chicken_size': 'medium', 'chicken_bones': 'no', 'sauce_type': 'none', 'side_dish': 'smashed potatoes'}}
OUTPUT: confirmation(chicken_ordering)

INPUT: {'intent': 'chicken_ordering', 'slots': {'chicken_type': 'grilled', 'chicken_size': 'small', 'chicken_bones': 'yes', 'sauce_type': 'pesto', 'side_dish': 'none'}}
OUTPUT: confirmation(chicken_ordering)

INPUT: {'intent': 'drink_ordering', 'slots': {'drink_size': 'large', 'drink_ice': 'yes', 'drink_type': 'no'}}
OUTPUT: confirmation(drink_ordering)

INPUT: {'intent': 'dessert_ordering', 'slots': {'dessert_type': 'smoothie', 'extra_whipped_cream': 'yes'}}
OUTPUT: confirmation(dessert_ordering)

INPUT: {'intent': 'appetizer_ordering', 'slots': {'appetizer_moment': '2', 'appetizer_type': 'crisps'}}
OUTPUT: confirmation(appetizer_type)
"""

## Evaluation methods
Use Accuracy for the next-best actions.

In [8]:
def evaluate_decision_accuracy(ground_truth, predicted):
    """
    Evaluates the intent prediction accuracy.

    Args:
        ground_truth (list of dict): List of ground truth dictionaries with intent.
        predicted (list of dict): List of predicted dictionaries with intent.

    Returns:
        float: Intent accuracy.
    """
    correct = 0
    total = len(ground_truth)

    for gt, pred in zip(ground_truth, predicted):
        if gt == pred:
            correct += 1

    return correct

In [9]:
def output_analysis(gt, pred, sent):
    for i in range(len(gt)):
        print(sent[i])
        print("GT:", gt[i])
        print("Prediction", pred[i])
        print()

## All predictions

Needed to calculate the overall F1 and accuracy

In [10]:
predictions = []
ground_truths = []
global_exception_counter = 0

In [11]:
from itertools import product
import copy
from tqdm import tqdm

## Helper method

In [12]:
def generate_dialogue_states(options, intent):
    all_slots = list(options.keys())
    generated_states = []
    
    for skip_slot in all_slots:
        remaining_slots = [slot for slot in all_slots if slot != skip_slot]
        
        for values in product(*[options[slot] for slot in remaining_slots]):
            ds = {"intent": intent, "slots": {}}
            
            for slot, value in zip(remaining_slots, values):
                ds["slots"][slot] = value
            ds["slots"][skip_slot] = None
            
            generated_states.append((ds, skip_slot))
    
    return generated_states

In [13]:
def generate_all_dialogue_states(options, intent):
    all_slots = list(options.keys())
    generated_states = []
    
    for values in product(*[options[slot] for slot in all_slots]):
        ds = {"intent": intent, "slots": {}}
        
        for slot, value in zip(all_slots, values):
            ds["slots"][slot] = value
        
        generated_states.append(ds)
    
    return generated_states

In [14]:
def use_LLM(prompt, inp):
    txt = chat_template.format(prompt, inp)
    inp2 = tokenizer(txt, return_tensors="pt").to(model.device)
    outp = generate(model, inp2, tokenizer, max_seq_length)
    return outp

## Request details

### Intent 1 - Chicken ordering

In [None]:
# Dialogue state and options
options = {
    "chicken_type": ["grilled", "roasted"],
    "chicken_size": ["small", "medium", "large"],
    "chicken_bones": ["yes", "no"],
    "sauce_type": ["mushroom", "pesto", "none"],
    "side_dish": ["carrots", "smashed potatoes", "none"]
}

# Generate all possible dialogue states
dialogue_states = generate_dialogue_states(options, "chicken_ordering")
print("Total:", len(dialogue_states))

np = []
ngt = []
nlus = []
exception_counter = 0

# Print or use them one by one
for idx, (nlu_out, skipped_slot) in tqdm(enumerate(dialogue_states)):
    try:
        dm_out = use_LLM(DM_PROMPT, nlu_out)
        gt = f"request_details({skipped_slot})"
        np.append(dm_out)
        ngt.append(gt)
        nlus.append(nlu_out)
    
        predictions.append(dm_out)
        ground_truths.append(gt)
    except Exception as e:
        exception_counter += 1
        global_exception_counter += 1
        print(f"An unexpected error occurred at iteration {i}: {e}")
        print("NLU output:", nlu_output)
        print("NLU input:", filled_template)
        print()
np = [pred.strip("\n") if "\n" in pred else pred for pred in np] #postprocessing
np = [pred.replace("'", "") if "'" in pred else pred for pred in np] #postprocessing
np = [pred.replace('"', '') if '"' in pred else pred for pred in np] #postprocessing
correct = evaluate_decision_accuracy(ngt, np)
acc = correct/(len(ngt)+exception_counter)
print(f"Intent Accuracy: {acc:.2f}")

In [None]:
output_analysis(ngt, np, nlus) #no problems

### Intent 2 - drink ordering

In [None]:
# Dialogue state and options
options = {
    "drink_type": ["Coca Cola", "Fanta", "Sprite", "Water"],
    "drink_size": ["small", "medium", "large"],
    "drink_ice": ["yes", "no"]
}

# Generate all possible dialogue states
dialogue_states = generate_dialogue_states(options, "drink_ordering")
print("Total:", len(dialogue_states))

np = []
ngt = []
nlus = []
exception_counter = 0

# Print or use them one by one
for idx, (nlu_out, skipped_slot) in tqdm(enumerate(dialogue_states)):
    try:
        dm_out = use_LLM(DM_PROMPT, nlu_out)
        gt = f"request_details({skipped_slot})"
        np.append(dm_out)
        ngt.append(gt)
        nlus.append(nlu_out)
    
        predictions.append(dm_out)
        ground_truths.append(gt)
    except Exception as e:
        exception_counter += 1
        global_exception_counter += 1
        print(f"An unexpected error occurred at iteration {i}: {e}")
        print("NLU output:", nlu_output)
        print("NLU input:", filled_template)
        print()
np = [pred.strip("\n") if "\n" in pred else pred for pred in np] #postprocessing
np = [pred.replace("'", "") if "'" in pred else pred for pred in np] #postprocessing
np = [pred.replace('"', '') if '"' in pred else pred for pred in np] #postprocessing
correct = evaluate_decision_accuracy(ngt, np)
acc = correct/(len(ngt)+exception_counter)
print(f"Intent Accuracy: {acc:.2f}")

In [None]:
output_analysis(ngt, np, nlus) #no problems

### Intent 3 - dessert ordering

In [None]:
# Dialogue state and options
options = {
    "dessert_type": ["tiramisu", "ice cream", "apple crumble pie", "waffles", "smoothie"],
    "extra_whipped_cream": ["yes", "no"]
}

# Generate all possible dialogue states
dialogue_states = generate_dialogue_states(options, "dessert_ordering")
print("Total:", len(dialogue_states))

np = []
ngt = []
nlus = []
exception_counter = 0

# Print or use them one by one
for idx, (nlu_out, skipped_slot) in tqdm(enumerate(dialogue_states)):
    try:
        dm_out = use_LLM(DM_PROMPT, nlu_out)
        gt = f"request_details({skipped_slot})"
        np.append(dm_out)
        ngt.append(gt)
        nlus.append(nlu_out)
    
        predictions.append(dm_out)
        ground_truths.append(gt)
    except Exception as e:
        exception_counter += 1
        global_exception_counter += 1
        print(f"An unexpected error occurred at iteration {i}: {e}")
        print("NLU output:", nlu_output)
        print("NLU input:", filled_template)
        print()
np = [pred.strip("\n") if "\n" in pred else pred for pred in np] #postprocessing
np = [pred.replace("'", "") if "'" in pred else pred for pred in np] #postprocessing
np = [pred.replace('"', '') if '"' in pred else pred for pred in np] #postprocessing
correct = evaluate_decision_accuracy(ngt, np)
acc = correct/(len(ngt)+exception_counter)
print(f"Intent Accuracy: {acc:.2f}")

In [None]:
output_analysis(ngt, np, nlus) #no problems

### Intent 4 - table reservation

In [None]:
# Dialogue state and options
options = {
    "table_type": ["normal", "business", "romantic"],
    "table_size": ["2", "3", "20", "6", "5", "8", "9"],
    "sitting_equipment": ["chair(s)", "bench(es)", "mixed chairs and benches", "does not matter"],
    "birthday_surprise": ["yes", "no"]
}

# Generate all possible dialogue states
dialogue_states = generate_dialogue_states(options, "table_reservation")
print("Total:", len(dialogue_states))

np = []
ngt = []
nlus = []
exception_counter = 0

# Print or use them one by one
for idx, (nlu_out, skipped_slot) in tqdm(enumerate(dialogue_states)):
    try:
        dm_out = use_LLM(DM_PROMPT, nlu_out)
        gt = f"request_details({skipped_slot})"
        np.append(dm_out)
        ngt.append(gt)
        nlus.append(nlu_out)
    
        predictions.append(dm_out)
        ground_truths.append(gt)
    except Exception as e:
        exception_counter += 1
        global_exception_counter += 1
        print(f"An unexpected error occurred at iteration {i}: {e}")
        print("NLU output:", nlu_output)
        print("NLU input:", filled_template)
        print()
np = [pred.strip("\n") if "\n" in pred else pred for pred in np] #postprocessing
np = [pred.replace("'", "") if "'" in pred else pred for pred in np] #postprocessing
np = [pred.replace('"', '') if '"' in pred else pred for pred in np] #postprocessing
correct = evaluate_decision_accuracy(ngt, np)
acc = correct/(len(ngt)+exception_counter)
print(f"Intent Accuracy: {acc:.2f}")

In [None]:
output_analysis(ngt, np, nlus) #numbering, should be fix in rerun

### Intent 5 - appetizer ordering

In [None]:
# Dialogue state and options
options = {
    "appetizer_type": ["crisps", "simple salad", "deluxe salad", "tomato soup", "onion soup"],
    "appetizer_moment": ["2", "3", "20", "6", "5", "8", "9"]
}


# Generate all possible dialogue states
dialogue_states = generate_dialogue_states(options, "appetizer_ordering")
print("Total:", len(dialogue_states))

np = []
ngt = []
nlus = []
exception_counter = 0

# Print or use them one by one
for idx, (nlu_out, skipped_slot) in tqdm(enumerate(dialogue_states)):
    try:
        dm_out = use_LLM(DM_PROMPT, nlu_out)
        gt = f"request_details({skipped_slot})"
        np.append(dm_out)
        ngt.append(gt)
        nlus.append(nlu_out)
    
        predictions.append(dm_out)
        ground_truths.append(gt)
    except Exception as e:
        exception_counter += 1
        global_exception_counter += 1
        print(f"An unexpected error occurred at iteration {i}: {e}")
        print("NLU output:", nlu_output)
        print("NLU input:", filled_template)
        print()
np = [pred.strip("\n") if "\n" in pred else pred for pred in np] #postprocessing
np = [pred.replace("'", "") if "'" in pred else pred for pred in np] #postprocessing
np = [pred.replace('"', '') if '"' in pred else pred for pred in np] #postprocessing
correct = evaluate_decision_accuracy(ngt, np)
acc = correct/(len(ngt)+exception_counter)
print(f"Intent Accuracy: {acc:.2f}")

In [None]:
output_analysis(ngt, np, nlus)

## Overall scores before we do the others

In [25]:
predictions = [pred.strip("\n") if "\n" in pred else pred for pred in predictions] #postprocessing
predictions = [pred.replace("'", "") if "'" in pred else pred for pred in predictions] #postprocessing
predictions = [pred.replace('"', '') if '"' in pred else pred for pred in predictions] #postprocessing

In [None]:
print("Grand total", len(ground_truths))
print("Attempted", len(ground_truths)+global_exception_counter)

correct = evaluate_decision_accuracy(ground_truths, predictions)
acc = correct/(len(ground_truths)+global_exception_counter)
print(f"OVERALL intent Accuracy: {acc:.2f}")

## Confirmation

In [27]:
predictions2 = []
ground_truths2 = []
global_exception_counter2 = 0

### Intent 1 - chicken ordering

In [None]:
# Dialogue state and options
options = {
    "chicken_type": ["grilled", "roasted"],
    "chicken_size": ["small", "medium", "large"],
    "chicken_bones": ["yes", "no"],
    "sauce_type": ["mushroom", "pesto", "none"],
    "side_dish": ["carrots", "smashed potatoes", "none"]
}

# Generate all possible dialogue states
intent = "chicken_ordering"
dialogue_states = generate_all_dialogue_states(options, intent)
print("Total:", len(dialogue_states))

np = []
ngt = []
nlus = []
exception_counter = 0

# Print or use them one by one
for idx, nlu_out in tqdm(enumerate(dialogue_states)):
    try:
        dm_out = use_LLM(DM_PROMPT, nlu_out)
        gt = f"confirmation({intent})"
        np.append(dm_out)
        ngt.append(gt)
        nlus.append(nlu_out)
    
        predictions2.append(dm_out)
        ground_truths2.append(gt)
    except Exception as e:
        exception_counter += 1
        global_exception_counter2 += 1
        print(f"An unexpected error occurred at iteration {i}: {e}")
        print("NLU output:", nlu_output)
        print("NLU input:", filled_template)
        print()
np = [pred.strip("\n") if "\n" in pred else pred for pred in np] #postprocessing
np = [pred.replace("'", "") if "'" in pred else pred for pred in np] #postprocessing
np = [pred.replace('"', '') if '"' in pred else pred for pred in np] #postprocessing
correct = evaluate_decision_accuracy(ngt, np)
acc = correct/(len(ngt)+exception_counter)
print(f"Intent Accuracy: {acc:.2f}")

In [None]:
output_analysis(ngt, np, nlus)

### Intent 2 - drink ordering

In [None]:
# Dialogue state and options
options = {
    "drink_type": ["Coca Cola", "Fanta", "Sprite", "Water"],
    "drink_size": ["small", "medium", "large"],
    "drink_ice": ["yes", "no"]
}

# Generate all possible dialogue states
intent = "drink_ordering"
dialogue_states = generate_all_dialogue_states(options, intent)
print("Total:", len(dialogue_states))

np = []
ngt = []
nlus = []
exception_counter = 0

# Print or use them one by one
for idx, nlu_out in tqdm(enumerate(dialogue_states)):
    try:
        dm_out = use_LLM(DM_PROMPT, nlu_out)
        gt = f"confirmation({intent})"
        np.append(dm_out)
        ngt.append(gt)
        nlus.append(nlu_out)
    
        predictions2.append(dm_out)
        ground_truths2.append(gt)
    except Exception as e:
        exception_counter += 1
        global_exception_counter2 += 1
        print(f"An unexpected error occurred at iteration {i}: {e}")
        print("NLU output:", nlu_output)
        print("NLU input:", filled_template)
        print()
np = [pred.strip("\n") if "\n" in pred else pred for pred in np] #postprocessing
np = [pred.replace("'", "") if "'" in pred else pred for pred in np] #postprocessing
np = [pred.replace('"', '') if '"' in pred else pred for pred in np] #postprocessing
correct = evaluate_decision_accuracy(ngt, np)
acc = correct/(len(ngt)+exception_counter)
print(f"Intent Accuracy: {acc:.2f}")

In [None]:
output_analysis(ngt, np, nlus)

### Intent 3 - dessert ordering

In [None]:
# Dialogue state and options
options = {
    "dessert_type": ["tiramisu", "ice cream", "apple crumble pie", "waffles", "smoothie"],
    "extra_whipped_cream": ["yes", "no"]
}

# Generate all possible dialogue states
intent = "dessert_ordering"
dialogue_states = generate_all_dialogue_states(options, intent)
print("Total:", len(dialogue_states))

np = []
ngt = []
nlus = []
exception_counter = 0

# Print or use them one by one
for idx, nlu_out in tqdm(enumerate(dialogue_states)):
    try:
        dm_out = use_LLM(DM_PROMPT, nlu_out)
        gt = f"confirmation({intent})"
        np.append(dm_out)
        ngt.append(gt)
        nlus.append(nlu_out)
    
        predictions2.append(dm_out)
        ground_truths2.append(gt)
    except Exception as e:
        exception_counter += 1
        global_exception_counter2 += 1
        print(f"An unexpected error occurred at iteration {i}: {e}")
        print("NLU output:", nlu_output)
        print("NLU input:", filled_template)
        print()
np = [pred.strip("\n") if "\n" in pred else pred for pred in np] #postprocessing
np = [pred.replace("'", "") if "'" in pred else pred for pred in np] #postprocessing
np = [pred.replace('"', '') if '"' in pred else pred for pred in np] #postprocessing
correct = evaluate_decision_accuracy(ngt, np)
acc = correct/(len(ngt)+exception_counter)
print(f"Intent Accuracy: {acc:.2f}")

In [None]:
output_analysis(ngt, np, nlus)

### Intent 4 - table reservation

In [None]:
# Dialogue state and options
options = {
    "table_type": ["normal", "business", "romantic"],
    "table_size": ["2", "3", "20", "6", "5", "8", "9"],
    "sitting_equipment": ["chair(s)", "bench(es)", "mixed chairs and benches", "does not matter"],
    "birthday_surprise": ["yes", "no"]
}

# Generate all possible dialogue states
intent = "table_reservation"
dialogue_states = generate_all_dialogue_states(options, intent)
print("Total:", len(dialogue_states))

np = []
ngt = []
nlus = []
exception_counter = 0

# Print or use them one by one
for idx, nlu_out in tqdm(enumerate(dialogue_states)):
    try:
        dm_out = use_LLM(DM_PROMPT, nlu_out)
        gt = f"confirmation({intent})"
        np.append(dm_out)
        ngt.append(gt)
        nlus.append(nlu_out)
    
        predictions2.append(dm_out)
        ground_truths2.append(gt)
    except Exception as e:
        exception_counter += 1
        global_exception_counter2 += 1
        print(f"An unexpected error occurred at iteration {i}: {e}")
        print("NLU output:", nlu_output)
        print("NLU input:", filled_template)
        print()
np = [pred.strip("\n") if "\n" in pred else pred for pred in np] #postprocessing
np = [pred.replace("'", "") if "'" in pred else pred for pred in np] #postprocessing
np = [pred.replace('"', '') if '"' in pred else pred for pred in np] #postprocessing
correct = evaluate_decision_accuracy(ngt, np)
acc = correct/(len(ngt)+exception_counter)
print(f"Intent Accuracy: {acc:.2f}")

In [None]:
output_analysis(ngt, np, nlus)

### Intent 5 - appetizer ordering

In [None]:
# Dialogue state and options
options = {
    "appetizer_type": ["crisps", "simple salad", "deluxe salad", "tomato soup", "onion soup"],
    "appetizer_moment": ["2", "3", "20", "6", "5", "8", "9"]
}

# Generate all possible dialogue states
intent = "appetizer_ordering"
dialogue_states = generate_all_dialogue_states(options, intent)
print("Total:", len(dialogue_states))

np = []
ngt = []
nlus = []
exception_counter = 0

# Print or use them one by one
for idx, nlu_out in tqdm(enumerate(dialogue_states)):
    try:
        dm_out = use_LLM(DM_PROMPT, nlu_out)
        gt = f"confirmation({intent})"
        np.append(dm_out)
        ngt.append(gt)
        nlus.append(nlu_out)
    
        predictions2.append(dm_out)
        ground_truths2.append(gt)
    except Exception as e:
        exception_counter += 1
        global_exception_counter2 += 1
        print(f"An unexpected error occurred at iteration {i}: {e}")
        print("NLU output:", nlu_output)
        print("NLU input:", filled_template)
        print()
np = [pred.strip("\n") if "\n" in pred else pred for pred in np] #postprocessing
np = [pred.replace("'", "") if "'" in pred else pred for pred in np] #postprocessing
np = [pred.replace('"', '') if '"' in pred else pred for pred in np] #postprocessing
correct = evaluate_decision_accuracy(ngt, np)
acc = correct/(len(ngt)+exception_counter)
print(f"Intent Accuracy: {acc:.2f}")

In [None]:
output_analysis(ngt, np, nlus)

In [38]:
predictions2 = [pred.strip("\n") if "\n" in pred else pred for pred in predictions2] #postprocessing
predictions2 = [pred.replace("'", "") if "'" in pred else pred for pred in predictions2] #postprocessing
predictions2 = [pred.replace('"', '') if '"' in pred else pred for pred in predictions2] #postprocessing

In [None]:
print("Grand total", len(ground_truths2))
print("Attempted", len(ground_truths2)+global_exception_counter2)

correct = evaluate_decision_accuracy(ground_truths2, predictions2)
acc = correct/(len(ground_truths2)+global_exception_counter2)
print(f"OVERALL intent Accuracy: {acc:.2f}")

### Tutti!!!

In [40]:
pred_tutti = predictions + predictions2
gt_tutti = ground_truths + ground_truths2

In [None]:
print("Grand total TUTTI", len(gt_tutti))
print("Attempted", len(gt_tutti)+global_exception_counter+global_exception_counter2)

correct = evaluate_decision_accuracy(gt_tutti, pred_tutti)
acc = correct/(len(gt_tutti)+global_exception_counter+global_exception_counter2)
print(f"OVERALL intent Accuracy: {acc:.2f}")