# Notebook: Convert to Formatted JSON

## Packages

In [180]:
from sklearn.metrics import accuracy_score, multilabel_confusion_matrix
import matplotlib.pyplot as plt
import sys
import os
sys.path.append(os.path.abspath('../03 dataset split/'))
from format_labelstudio_json import format_json

In [181]:
sys.path.append(os.path.abspath('../07 train classifier/'))
from TASD.evaluation import calculate_metrics_for_examples
import constants

In [182]:
import pandas as pd
import json

## Settings

In [183]:
MODEL = "Llama13B"
FEW_SHOT = "random"
N_EXAMPLES = 25

## Code

In [184]:
with open(f"annotation_datasets/synth_annotation_labelstudio_output/annotation_{MODEL}_{FEW_SHOT}_{N_EXAMPLES}.json", 'r') as json_file:
    synthetic_data = json.load(json_file)    

In [185]:
synth_llm_labels = []
for split_id in range(5):
    with open(f"../07 train classifier/synth/{MODEL}/{FEW_SHOT}/split_{split_id}.json", 'r') as json_file:
       synthetic_data_split = json.load(json_file)
       for example in  synthetic_data_split:
           synth_llm_labels.append(example)    

In [186]:
# ground truth - annotations
synth_annotated = format_json(synthetic_data)
synth_annotated_ids = [example["id"] for example in synth_annotated]

# label from llm
synth_llm_labels = [example for example in synth_llm_labels if example["id"] in synth_annotated_ids]
def get_index(example):
    return synth_annotated_ids.index(example["id"])
synth_llm_labels = sorted(synth_llm_labels, key=get_index)

In [187]:
len(synth_llm_labels)

25

In [188]:
synth_annotated[1]["tags"], synth_annotated[1]["text"]

([], 'Ja, das ist sicher eine Nostalgie für uns.')

In [189]:
synth_llm_labels[1]["tags"], synth_llm_labels[1]["text"]

([{'text': 'Nostalgie',
   'start': 24,
   'end': 33,
   'tag_with_polarity': 'GENERAL-IMPRESSION-POSITIVE',
   'tag_with_polarity_and_type': 'GENERAL-IMPRESSION-POSITIVE-explicit',
   'type': 'label-explicit',
   'label': 'GENERAL-IMPRESSION',
   'polarity': 'POSITIVE'}],
 'Ja, das ist sicher eine Nostalgie für uns.')

In [190]:
synth_llm_labels = [[{"aspect_category": tag["label"], "aspect_polarity": tag["polarity"], "aspect_term": tag["text"]} for tag in example["tags"]] for example in synth_llm_labels]
synth_annotated = [[{"aspect_category": tag["label"], "aspect_polarity": tag["polarity"], "aspect_term": tag["text"] if tag["text"] != 'NULL' else None} for tag in example["tags"]] for example in synth_annotated]

## Test: Check how many Triplets could be identified

In [191]:
calculate_metrics_for_examples(synth_annotated, synth_llm_labels)

{'f1': 0.4193548387096774,
 'recall': 0.5416666666666666,
 'precision': 0.34210526315789475,
 'accuracy': 0.2653061224489796,
 'tp': 13,
 'tn': 0,
 'fp': 25,
 'fn': 11}

## Test: Compare aspect category annotations

In [192]:
def category_list_to_label(cat_list):
    return [1 if cat in cat_list else 0 for cat in constants.ASPECT_CATEGORIES]

In [193]:
synth_llm_labels_categories = [category_list_to_label([tag["aspect_category"] for tag in example]) for example in synth_llm_labels]
synth_annotated_categories = [category_list_to_label([tag["aspect_category"] for tag in example]) for example in synth_annotated]

In [194]:
true_classes = synth_annotated_categories
predicted_classes = synth_llm_labels_categories

accuracy = accuracy_score(true_classes, predicted_classes)
# Accuracy ist korrekt, wenn alle fünf klassen eines Beispiels korrekt predicted wurden
print(f'Accuracy: {accuracy}')

confusion = multilabel_confusion_matrix(true_classes, predicted_classes)


Accuracy: 0.56


In [195]:
for i in range(len(constants.ASPECT_CATEGORIES)):
    tp, tn, fp, fn = confusion[i][1][1], confusion[i][0][0], confusion[i][0][1], confusion[i][1][0]

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
    acc = (tp + tn) / (tp + tn + fp + fn)

    # Runden auf drei Nachkommastellen
    precision = round(precision, 3)
    recall = round(recall, 3)
    f1_score = round(f1_score, 3)
    acc = round(acc, 3)

    print(constants.ASPECT_CATEGORIES[i], "\n| True Positives: ", tp, "| True Negatives: ", tn, "| False Positives: ", fp, "| False Negatives: ", fn, "| Accuracy: ", acc, "| Precision: ", precision, "| Recall: ", recall, "| F1-Score: ", f1_score, "|")

GENERAL-IMPRESSION 
| True Positives:  2 | True Negatives:  18 | False Positives:  4 | False Negatives:  1 | Accuracy:  0.8 | Precision:  0.333 | Recall:  0.667 | F1-Score:  0.444 |
FOOD 
| True Positives:  4 | True Negatives:  17 | False Positives:  3 | False Negatives:  1 | Accuracy:  0.84 | Precision:  0.571 | Recall:  0.8 | F1-Score:  0.667 |
SERVICE 
| True Positives:  2 | True Negatives:  20 | False Positives:  2 | False Negatives:  1 | Accuracy:  0.88 | Precision:  0.5 | Recall:  0.667 | F1-Score:  0.571 |
AMBIENCE 
| True Positives:  8 | True Negatives:  15 | False Positives:  2 | False Negatives:  0 | Accuracy:  0.92 | Precision:  0.8 | Recall:  1.0 | F1-Score:  0.889 |
PRICE 
| True Positives:  5 | True Negatives:  17 | False Positives:  3 | False Negatives:  0 | Accuracy:  0.88 | Precision:  0.625 | Recall:  1.0 | F1-Score:  0.769 |


## Test: Check if category and polarity detected

In [196]:
AC_POLARITY_COMBINATIONS = [cat+"_"+polarity for cat in constants.ASPECT_CATEGORIES for polarity in constants.POLARITIES]

In [197]:
def category_polarity_list_to_label(cat_pol_list):
    return [1 if ac_pol in cat_pol_list else 0 for ac_pol in AC_POLARITY_COMBINATIONS]

In [198]:
synth_llm_labels_categories = [category_polarity_list_to_label(
    [tag["aspect_category"]+"_"+tag["aspect_polarity"] for tag in example]) for example in synth_llm_labels]
synth_annotated_categories = [category_polarity_list_to_label(
    [tag["aspect_category"]+"_"+tag["aspect_polarity"] for tag in example]) for example in synth_annotated]

In [199]:
true_classes = synth_annotated_categories
predicted_classes = synth_llm_labels_categories

accuracy = accuracy_score(true_classes, predicted_classes)
# Accuracy ist korrekt, wenn alle fünf klassen eines Beispiels korrekt predicted wurden
print(f'Accuracy: {accuracy}')

confusion = multilabel_confusion_matrix(true_classes, predicted_classes)

Accuracy: 0.32


In [200]:
for i in range(len(AC_POLARITY_COMBINATIONS)):
    tp, tn, fp, fn = confusion[i][1][1], confusion[i][0][0], confusion[i][0][1], confusion[i][1][0]

    precision = round(tp / (tp + fp) if (tp + fp) > 0 else 0.0, 3)
    recall = round(tp / (tp + fn) if (tp + fn) > 0 else 0.0, 3)
    f1_score = round(2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0, 3)
    acc = round((tp + tn) / (tp + tn + fp + fn), 3)

    print(AC_POLARITY_COMBINATIONS[i], "\n| True Positives: ", tp, "| True Negatives: ", tn, "| False Positives: ", fp, "| False Negatives: ", fn, "| Accuracy: ", acc, "| Precision: ", precision, "| Recall: ", recall, "| F1-Score: ", f1_score, "|")


GENERAL-IMPRESSION_POSITIVE 
| True Positives:  0 | True Negatives:  23 | False Positives:  2 | False Negatives:  0 | Accuracy:  0.92 | Precision:  0.0 | Recall:  0.0 | F1-Score:  0.0 |
GENERAL-IMPRESSION_NEUTRAL 
| True Positives:  0 | True Negatives:  23 | False Positives:  2 | False Negatives:  0 | Accuracy:  0.92 | Precision:  0.0 | Recall:  0.0 | F1-Score:  0.0 |
GENERAL-IMPRESSION_NEGATIVE 
| True Positives:  2 | True Negatives:  22 | False Positives:  0 | False Negatives:  1 | Accuracy:  0.96 | Precision:  1.0 | Recall:  0.667 | F1-Score:  0.8 |
FOOD_POSITIVE 
| True Positives:  1 | True Negatives:  22 | False Positives:  1 | False Negatives:  1 | Accuracy:  0.92 | Precision:  0.5 | Recall:  0.5 | F1-Score:  0.5 |
FOOD_NEUTRAL 
| True Positives:  0 | True Negatives:  23 | False Positives:  2 | False Negatives:  0 | Accuracy:  0.92 | Precision:  0.0 | Recall:  0.0 | F1-Score:  0.0 |
FOOD_NEGATIVE 
| True Positives:  2 | True Negatives:  22 | False Positives:  1 | False Negatives: