## Notebook: Analyse LLM Synthesis Retries

## Packages

In [27]:
import numpy as np
import json

## Constants

In [28]:
LLMS = ["Llama70B", "GPT-3"] # "Llama70B", "GPT-3"
FEW_SHOT_CONDITIONS = ["fixed", "random"] # "fixed", "random"
N_SPLITS = 5

## Code

### Retries

In [29]:
language_statistics = {}

In [30]:
for llm in LLMS:
    language_statistics[llm] = {}
    for condition in FEW_SHOT_CONDITIONS:
        language_statistics[llm][condition] = {
            "n_examples": 0,
            "n_retries": 0,
            "more_than_25_retries": 0,
            "invalid_xml_schema": 0,
            "invalid_xml_tags": 0,
            "aspect_polarity_in_text_but_not_in_label": 0,
            "more_than_one_sentences": 0,
            "llm_empty_aspect_term": 0
        }

        for split in range(N_SPLITS):
            with open(f"../07 train models/synth/{llm}/{condition}/split_{split}.json", 'r') as file:
                synth_data_split = json.load(file)

            language_statistics[llm][condition]["n_examples"] += len(synth_data_split)
            language_statistics[llm][condition]["n_retries"] += np.sum(len(example["llm_retry_statistic"]) for example in synth_data_split)
            language_statistics[llm][condition]["more_than_25_retries"] += len([ex for ex in (len(example["llm_retry_statistic"]) for example in synth_data_split) if ex > 25])
            language_statistics[llm][condition]["invalid_xml_schema"] += sum(example["llm_invalid_xml_schema"] for example in synth_data_split)
            language_statistics[llm][condition]["invalid_xml_tags"] += sum(example["llm_invalid_xml_tags"] for example in synth_data_split)
            language_statistics[llm][condition]["aspect_polarity_in_text_but_not_in_label"] += sum(example["llm_aspect_polarity_in_text_but_not_in_label"] for example in synth_data_split)
            language_statistics[llm][condition]["more_than_one_sentences"] += sum(example["llm_more_than_one_sentences"] for example in synth_data_split)
            language_statistics[llm][condition]["empty_aspect_term"] += sum(example["llm_llm_empty_aspect_term"] for example in synth_data_split)
            language_statistics[llm][condition]["invalid_single_word_aspect_term_pos_tag"] += sum(example["llm_invalid_single_word_aspect_term_pos_tag"] for example in synth_data_split)



  language_statistics[llm][condition]["n_retries"] += np.sum(len(example["llm_retry_statistic"]) for example in synth_data_split)


In [31]:
language_statistics

{'Llama70B': {'fixed': {'n_examples': 7500,
   'n_retries': 985,
   'more_than_25_retries': 5,
   'invalid_xml_schema': 5,
   'invalid_xml_tags': 0,
   'aspect_polarity_in_text_but_not_in_label': 200,
   'more_than_one_sentences': 670,
   'no_german_language_total': 110},
  'random': {'n_examples': 7500,
   'n_retries': 985,
   'more_than_25_retries': 5,
   'invalid_xml_schema': 5,
   'invalid_xml_tags': 0,
   'aspect_polarity_in_text_but_not_in_label': 200,
   'more_than_one_sentences': 670,
   'no_german_language_total': 110}},
 'GPT-3': {'fixed': {'n_examples': 7500,
   'n_retries': 985,
   'more_than_25_retries': 5,
   'invalid_xml_schema': 5,
   'invalid_xml_tags': 0,
   'aspect_polarity_in_text_but_not_in_label': 200,
   'more_than_one_sentences': 670,
   'no_german_language_total': 110},
  'random': {'n_examples': 7500,
   'n_retries': 985,
   'more_than_25_retries': 5,
   'invalid_xml_schema': 5,
   'invalid_xml_tags': 0,
   'aspect_polarity_in_text_but_not_in_label': 200,
   '

### Duration

In [42]:
def convert_seconds_to_time(seconds):
    hours, remainder = divmod(seconds, 3600)
    minutes, seconds = divmod(remainder, 60)
    
    time_string = "{:02}:{:02}:{:.2f}".format(int(hours), int(minutes), seconds)
    return time_string

In [47]:
duration_statistics = {}

In [48]:
for llm in LLMS:
    duration_statistics[llm] = {}
    for condition in FEW_SHOT_CONDITIONS:
        duration_statistics[llm][condition] = {}
        duration_statistics[llm][condition]["time_no_retries"] = []
        duration_statistics[llm][condition]["time_with_retries"] = []
        duration_statistics[llm][condition]["avg_gen_time_no_retries"] = []
        duration_statistics[llm][condition]["avg_gen_time_with_retries"] = []
        for split in range(N_SPLITS):
            with open(f"../07 train models/synth/{llm}/{condition}/split_{split}.json", 'r') as file:
                synth_data_split = json.load(file)
            duration_statistics[llm][condition]["time_no_retries"] += [example["llm_prediction_duration"] for example in synth_data_split]
            duration_statistics[llm][condition]["time_with_retries"] += [example["llm_prediction_duration"] for example in synth_data_split]
            duration_statistics[llm][condition]["time_with_retries"] += [example["llm_prediction_duration"] for main_example in synth_data_split for example in main_example["llm_retry_statistic"]]
  

        duration_statistics[llm][condition]["avg_gen_time_no_retries"] = convert_seconds_to_time(np.mean(duration_statistics[llm][condition]["time_no_retries"]))
        duration_statistics[llm][condition]["avg_gen_time_with_retries"] = convert_seconds_to_time(np.mean(duration_statistics[llm][condition]["time_with_retries"]))
        duration_statistics[llm][condition]["time_no_retries"] = convert_seconds_to_time(np.sum(duration_statistics[llm][condition]["time_no_retries"]))
        duration_statistics[llm][condition]["time_with_retries"] = convert_seconds_to_time(np.sum(duration_statistics[llm][condition]["time_with_retries"]))

In [49]:
duration_statistics

{'Llama70B': {'fixed': {'time_no_retries': '01:03:20.73',
   'time_with_retries': '01:03:20.73',
   'avg_gen_time_no_retries': '00:00:0.51',
   'avg_gen_time_with_retries': '00:00:0.51'},
  'random': {'time_no_retries': '01:02:20.77',
   'time_with_retries': '01:02:20.77',
   'avg_gen_time_no_retries': '00:00:0.50',
   'avg_gen_time_with_retries': '00:00:0.50'}},
 'GPT-3': {'fixed': {'time_no_retries': '01:02:16.22',
   'time_with_retries': '01:02:16.22',
   'avg_gen_time_no_retries': '00:00:0.50',
   'avg_gen_time_with_retries': '00:00:0.50'},
  'random': {'time_no_retries': '01:02:30.06',
   'time_with_retries': '01:02:30.06',
   'avg_gen_time_no_retries': '00:00:0.50',
   'avg_gen_time_with_retries': '00:00:0.50'}}}