In [1]:
# Load the environment variables
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
%load_ext autoreload
%autoreload 2
from llm_utils import limp_llm_plan, build_intent_dict #, transform_data



In [3]:
import json 



def extract_prosody_vectors(json_file_path, file_name):
    # read the data from the json object
    with open(json_file_path, 'r') as input_file:
        data = json.load(input_file) 

    file_contents = data[file_name]

    # select the words within the file_contents
    words_list = file_contents["words"]
    prosody_features = file_contents["features"]

    instruction = ' '.join(words_list) 
    extracted_prosody_vectors = transform_data(prosody_features)


    return extracted_prosody_vectors





In [4]:
## Test instructions:
"Pick up the book on the table with the red cover." #003
"Place the vase near the flowers on the table." #014
"Place the coke can beside the pringles on the counter." #017
"Bring the book and the magazine on the nightstand." #013
"Bring the mug from the table near the sink." #032

# Evaluation protocol:
# 1. For each instruction, we are running experiments for both interpretations:
#   for each interpretation, we are running experiments for all the speakers
#   The results that we are computing include:
#   * Detailed referrent resolution word error rate
#   * Goal referrent resolution word error rate
#   We will average the score obtained for these experiments across all of the speakers.

#2. For each instruction, measure the accuracy of the generated plan. 
# Did it get the plan right? True or False. (i.e Qualitative analysis)
# The final value generated should be a percentage measure.

'Bring the mug from the table near the sink.'

In [5]:

import json


def generate_task_plans(test_file_path, file_id):
    #read the test file
    with open(test_file_path, 'r') as input_file:
        file_content = json.load(input_file)
    
    #use the file id to find the plans
    desired_file_details = file_content[file_id]['Interpretations'] 
    task_plans = {'Task_plan_a': desired_file_details[0]['TaskPlan'], 'Task_plan_b': desired_file_details[1]['TaskPlan']}

    return task_plans


In [9]:
import json
import ast

def generate_robot_plan_v2(baseline_choice, input_file_path):
    file_list = ['014', '017', '032']

    with open(input_file_path, 'r') as input_json_data:
        input_data = json.load(input_json_data)

    #get all of the recording for each speaker for the given file_id
    speaker_recordings_content = {}

    for file_id in file_list:
        speaker_recordings_content[file_id] = []

        for input_data_element in input_data:
            #get the audio file name
            audio_file_name = input_data_element["audio_file"]

            #split the audio file name
            split_audio_file_name = audio_file_name.split('_')

            #get the file id for the current audio
            current_file_id = split_audio_file_name[2]

            #check to see if the current file id is one we are interested in
            if current_file_id == file_id:
                #get the speaker id
                speaker_id = split_audio_file_name[1]

                #get the interpretation for the text in the current audio file
                interpretation_id = split_audio_file_name[3]

                #get the intent dict
                intent_dict = input_data_element['intent_dict']

                print("Here is the intent dict", intent_dict)

                #get the instruction
                instruction = input_data_element['sentence']

                #get the possible task plans for the utterance in the audio
                task_plans = generate_task_plans('./llm_generated_plans/test_instruction_task_plans.json', current_file_id) #new_constructed_test_instruction_task_plans.json

                #construct the path to the right prompt
                baseline_prompt_path = f"prompts/{baseline_choice}_prompt.txt"
                
                #read the contents of the prompt file
                limp_llmplaner_incontext_prompt = open(baseline_prompt_path, "r").read()

                #get the output of the llm
                if baseline_choice == "choice_intent_label":
                    llm_output = limp_llm_plan(limp_llmplaner_incontext_prompt, instruction, task_plans, intent_dict=intent_dict, baseline=baseline_choice)
                elif baseline_choice == "choice_asr":
                    #obtain the output for the llm 
                    llm_output = limp_llm_plan(limp_llmplaner_incontext_prompt,instruction, task_plans, baseline=baseline_choice)
                else:
                    #generate the prosody vectors for the file
                    extracted_prosody_vectors = extract_prosody_vectors(input_file_path, input_data_element)
                    
                    #obtain the output for the llm
                    llm_output = limp_llm_plan(limp_llmplaner_incontext_prompt,instruction, task_plans, prosody_values=extracted_prosody_vectors, baseline=baseline_choice)
                
                llm_output_generated_for_speaker = {
                                                    'speaker': speaker_id, 
                                                    'instruction': instruction,
                                                    'interpretation_id': interpretation_id,
                                                    'baseline_choice':baseline_choice,
                                                    'llm_output': llm_output
                                                    }
                
                speaker_recordings_content[file_id].append(llm_output_generated_for_speaker)
    

    #write the generated llm output into an output json file
    baseline_output_llm_json = input_file_path.split('.json')[0] + '_' + baseline_choice + '_llm_output.json'

    with open(baseline_output_llm_json, 'w') as output_file:
        json.dump(speaker_recordings_content, output_file, indent=4)

    return baseline_output_llm_json




def evaluate_llm_output(llm_output_path):
    '''
    Takes in the path to the json file containing the llm outputs and 
    then checks to see the level of accuracy of the llm outputs

    Params:
        llm_output_path (str): This is the path to the generated llm output
    
    Returns:
        results (dict): Dict containing the evaluation results
    '''
    #read the json file
    with open(llm_output_path, 'r') as input_path:
        file_contents = json.load(input_path)
    
   
    results = {}

    #iterate through the read json file and count the number of correctly predicted plans
    for file_id in file_contents:
        num_corrects = 0
        num_wrongs = 0
        file_speaker_count = 0

        #get the generated results for each speaker for the corresponding file id
        file_id_generated_outputs = file_contents[file_id]

        for file_id_generated_output in file_id_generated_outputs:
            interpretation_id = file_id_generated_output["interpretation_id"]
            llm_output = file_id_generated_output["llm_output"]
            if llm_output != "":
                if llm_output.startswith("```json"):
                    if "\"\n}\n```" not in llm_output:
                        llm_output += "\"\n}\n```"

                    llm_output = llm_output.strip("```json").strip("```").strip()
            
                formatted_llm_output = llm_output.replace('\\"', '"').replace('"\n', '",\n')

                llm_output_dict = ast.literal_eval(formatted_llm_output)
                task_plan_choice = llm_output_dict["Task_plan_choice"]

                if interpretation_id == "i1" and task_plan_choice == "Task_plan_a":
                    num_corrects += 1
                    file_id_generated_output["score"] = 1
                elif interpretation_id == "i2" and task_plan_choice == "Task_plan_b":
                    num_corrects += 1
                    file_id_generated_output["score"] = 1
                else:
                    file_id_generated_output["score"] = 0
                    num_wrongs += 1
                
                #keep track of the file speaker count
                file_speaker_count += 1
        
        results[file_id] = {'num_corrects': num_corrects, 'num_wrongs': num_wrongs, 'total_file_count': file_speaker_count}

    

    #compute the percentage score
    num_corrects = 0
    total = 0
    num_wrongs = 0

    for file_number in results:
        results_dict = results[file_number]

        num_corrects += results_dict['num_corrects']

        total += results_dict['total_file_count']
    

    percentage_accuracy = (num_corrects/total) * 100
    print(results)

    

       
    return percentage_accuracy


def run_average_experiments(number_of_experiments, prompt_text_file):
    #"./new_data_json_files/filtered_prosody_transformer_multiclass_results.json", 
    # path_to_files = ["./new_data_json_files/corrected_filtered_prosody_bilstm_multiclass_results.json"]
    path_to_files = ["./new_json_for_eval/filtered_prosody_bilstm_multiclass_results.json"]

    for path_to_file in path_to_files:
        computed_averages = []

        for i in range(number_of_experiments):
            generated_output_path = generate_robot_plan_v2(prompt_text_file, path_to_file)
            computed_averages.append(evaluate_llm_output(generated_output_path))
        
        average_results = sum(computed_averages)/len(computed_averages)

        print('Average experiment results for '+ prompt_text_file + ' after '+ str(number_of_experiments) + ' experiments for ' + path_to_file + ' is ' + str(average_results) + '%')
        print('Total range of results for ' + prompt_text_file + ' are ', computed_averages)


    return


run_average_experiments(1, "choice_intent_label")
#run_average_experiments(1, "choice_asr")




Here is the intent dict {'Goal_intent': ['vase', 'table'], 'Avoidance_intent': [], 'Detail_intent': ['flowers']}
Executing Baseline: choice_intent_label
		Model: o1 || Deterministic?: True
Here is the intent dict {'Goal_intent': ['vase'], 'Avoidance_intent': [], 'Detail_intent': ['flowers', 'table']}
Executing Baseline: choice_intent_label
		Model: o1 || Deterministic?: True
Here is the intent dict {'Goal_intent': ['vase', 'table'], 'Avoidance_intent': [], 'Detail_intent': ['flowers']}
Executing Baseline: choice_intent_label
		Model: o1 || Deterministic?: True
Here is the intent dict {'Goal_intent': ['vase'], 'Avoidance_intent': [], 'Detail_intent': ['flowers', 'table']}
Executing Baseline: choice_intent_label
		Model: o1 || Deterministic?: True
Here is the intent dict {'Goal_intent': ['vase', 'table'], 'Avoidance_intent': [], 'Detail_intent': ['flowers']}
Executing Baseline: choice_intent_label
		Model: o1 || Deterministic?: True
Here is the intent dict {'Goal_intent': ['vase'], 'Avoi

SyntaxError: '{' was never closed (<unknown>, line 1)

In [None]:
def generate_robot_plan(baseline_choice, input_file_path):
    #construct a list holding the test ids of interest
    file_list = ['014', '017', '032']


    #read from the json file containing the prosody vectors
    #file_path = '../prosody/data/ambiguous_prosody_multi_label_features_eval_with_prosody_vectors.json'
    file_path = input_file_path

    with open(file_path, 'r') as input_json_data:
        input_data = json.load(input_json_data)

    #get all of the recordings for each speaker for the given file_id
    speaker_recordings_content = {}

    #iterate through file_list 
    for file_id in file_list:
        #place the key of the file id in the speaker_recordings_content dict
        speaker_recordings_content[file_id] = []


        for input_data_json_element in input_data:
            if 'choice_intent_label' in baseline_choice:
                json_element_key_list = input_data_json_element['audio_file'].split('_')

                #construct an intent_dict for the current input data json element
                words_list = input_data_json_element['words']
                instruction = ' '.join(words_list)
                labels_list = input_data_json_element['predicted_labels']
                constructed_intent_dict = {}
                for word_index in range(len(words_list)):
                    constructed_intent_dict[words_list[word_index]] = labels_list[word_index]
                constructed_passedin_intent_dict = build_intent_dict(constructed_intent_dict)

            else:
                json_element_key_list = input_data_json_element.split('_')
                #construct an intent_dict for the current input data json element
                words_list = input_data[input_data_json_element]['words']
                instruction = ' '.join(words_list)
 

            if file_id in json_element_key_list:
                #obtain the speaker id
                speaker_id = json_element_key_list[1]

                #get the interpretation id
                interpretation_id = json_element_key_list[3]


                #obtain the corresponding task plans for each of the instructions
                task_plans = generate_task_plans('new_constructed_test_instruction_task_plans.json', file_id)

            
                #construct the path to the right prompt
                baseline_prompt_path = f"prompts/{baseline_choice}_prompt.txt"

                #read the contents of the prompt file
                limp_llmplaner_incontext_prompt = open(baseline_prompt_path, "r").read()

                if baseline_choice == "asr":
                    #obtain the output for the llm 
                    limp_planner_output = limp_llm_plan(limp_llmplaner_incontext_prompt,instruction, task_plans, baseline=baseline_choice, verbose=False)

                elif baseline_choice == "choice_intent_label":
                    limp_planner_output = limp_llm_plan(limp_llmplaner_incontext_prompt,instruction, task_plans, intent_dict=constructed_passedin_intent_dict, baseline=baseline_choice, verbose=False)
                
                else:
                    #generate the prosody vectors for the file
                    extracted_prosody_vectors = extract_prosody_vectors(file_path, input_data_json_element)

                    #obtain the output for the llm
                    limp_planner_output = limp_llm_plan(limp_llmplaner_incontext_prompt,instruction, task_plans, prosody_values=extracted_prosody_vectors, baseline=baseline_choice, verbose=False)
                
                #construct json for the speaker that would be placed in the speaker_recordings_content
                llm_output_generated_for_speaker = {
                                                    'speaker': speaker_id, 
                                                    'instruction': instruction,
                                                    'interpretation_id': interpretation_id,
                                                    'baseline_choice':baseline_choice,
                                                    'llm_output': limp_planner_output
                                                    }
                
                speaker_recordings_content[file_id].append(llm_output_generated_for_speaker)
        


                
    #print(speaker_recordings_content)

    #write the generated llm output into an output json file
    baseline_output_llm_json = input_file_path.split('.')[0] + '_' + 'llm_output.json'
    with open(baseline_output_llm_json, 'w') as output_file:
        json.dump(speaker_recordings_content, output_file, indent=4)

    return


generate_robot_plan("choice_intent_label", "prosody_transformer_multiclass_results.json")
#generate_robot_plan("choice_prosody_feature", "ambiguous_prosody_multi_label_features_eval_with_prosody_vectors.json")

            
             
            




            
            

  

In [21]:
import json



def preprocess_asr_json(asr_json_path, new_asr_json_path):
    #read the json file
    with open(asr_json_path, 'r') as input_file:
        asr_json_obj = json.load(input_file)

    #initialise a new dict for the files
    new_output_json = {}
    
    #iterate through the json file and for each object, obtain the section
    #that has to do with the llm_output
    for json_element_file in asr_json_obj:
        new_output_json[json_element_file] = []

        for file_info in asr_json_obj[json_element_file]:
            llm_output = file_info['llm_output']

            #check to see if 'Explanation' is in the llm_output
            if 'Explanation:' in llm_output:
                #split the llm_output according to 'Explanation'
                llm_output = llm_output.split('Explanation:')[0]


            #take the first item of the resulting array and split it based on 'Task plan:'
            llm_output = llm_output.split('Task plan:')[1]

            #split the task plan based on new line characters
            llm_task_plan_list = llm_output.split('\n')

            #place the llm_output in the json with the new task list
            file_info['llm_output'] = llm_task_plan_list

            new_output_json[json_element_file].append(file_info)

    

    #write the new asr json to the output path
    with open(new_asr_json_path, 'w') as output_file:
        json.dump(new_output_json, output_file, indent=4)


    return


preprocess_asr_json('asr_llm_output.json', 'new_asr_llm_output.json')

In [24]:
import json



def preprocess_prosody_json(prosody_json_path, new_prosody_json_path):
    #read the json file
    with open(prosody_json_path, 'r') as input_file:
        asr_json_obj = json.load(input_file)

    #initialise a new dict for the files
    new_output_json = {}
    
    #iterate through the json file and for each object, obtain the section
    #that has to do with the llm_output
    for json_element_file in asr_json_obj:
        new_output_json[json_element_file] = []

        for file_info in asr_json_obj[json_element_file]:
            llm_output = file_info['llm_output']

      

            #take the first item of the resulting array and split it based on 'Task plan:'
            llm_output = llm_output.split('Task plan:')[1]

            #split the task plan based on new line characters
            llm_task_plan_list = llm_output.split('\n')

            #place the llm_output in the json with the new task list
            file_info['llm_output'] = llm_task_plan_list

            new_output_json[json_element_file].append(file_info)

    

    #write the new asr json to the output path
    with open(new_prosody_json_path, 'w') as output_file:
        json.dump(new_output_json, output_file, indent=4)


    return


preprocess_prosody_json('prosody_vector_only_llm_output.json', 'new_prosody_vector_only_llm_output.json')

In [14]:



evaluate_llm_output('./new_data_json_files/filtered_prosody_transformer_multiclass_results_llm_output.json')




{'014': {'num_corrects': 30, 'num_wrongs': 14, 'total_file_count': 44}, '017': {'num_corrects': 22, 'num_wrongs': 22, 'total_file_count': 44}, '032': {'num_corrects': 21, 'num_wrongs': 23, 'total_file_count': 44}}


55.3030303030303