In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json

In [None]:
import pandas as pd

df_all = pd.read_excel('results/df_all.xlsx')


# Function to generate message for a given task and annotator
def generate_task_message(annotator_mail, task_number, ground_truth_mail):
    task_prefix = f'task_{task_number}'
    
    # Filter the relevant columns for the specific task
    task_columns = [col for col in df_all.columns if col.startswith(task_prefix)]
    
    # Extract data for the specific annotator and ground truth
    annotator_data = df_all.loc[df_all['mail'] == annotator_mail, ['mail'] + task_columns].squeeze()
    ground_truth_data = df_all.loc[df_all['mail'] == ground_truth_mail, task_columns].squeeze()
    
    if annotator_data.empty:
        return f"No data found for {annotator_mail} on Task {task_number}."
    
    if ground_truth_data.empty:
        return f"No ground truth data found for {ground_truth_mail} on Task {task_number}."
    
    # Begin constructing the message
    message = f"Task {task_number} results are as follows:\n"

    # English Term Detection
    message += (
        "1. English Term Detection:\n"
        f"    - Number of TP: {annotator_data.get(f'{task_prefix}_english_term_detection_tp', 'N/A')}\n"
        f"    - Number of FP: {annotator_data.get(f'{task_prefix}_english_term_detection_fp', 'N/A')}\n"
    )

    # Process FP Set
    fp_set = eval(annotator_data.get(f'{task_prefix}_english_term_detection_fp_set', '[]'))
    message += ''.join(f'    \t * "{word}" in {sentence_index} is incorrectly labeled as a term.\n' for word, _, _, sentence_index in fp_set)

    message += f"    - Number of FN: {annotator_data.get(f'{task_prefix}_english_term_detection_fn', 'N/A')}\n"

    # Process FN Set
    fn_set = eval(annotator_data.get(f'{task_prefix}_english_term_detection_fn_set', '[]'))
    message += ''.join(f'    \t * "{word}" is a term in {sentence_index} but you missed it.\n' for word, _, _, sentence_index in fn_set)

    # Add Precision, Recall, F1 Score, and Accuracy
    message += (
        f"    - Precision: {annotator_data.get(f'{task_prefix}_english_term_detection_precision', 'N/A'):.2%}\n"
        f"    - Recall: {annotator_data.get(f'{task_prefix}_english_term_detection_recall', 'N/A'):.2%}\n"
        f"    - F1 Score: {annotator_data.get(f'{task_prefix}_english_term_detection_f1_score', 'N/A'):.2%}\n"
        f"    - Accuracy: {annotator_data.get(f'{task_prefix}_english_term_detection_accuracy', 'N/A'):.2%}\n"
    )

    # Turkish Term Detection
    message += (
        "2. Turkish Term Detection:\n"
        f"    - Number of TP: {annotator_data.get(f'{task_prefix}_turkish_term_detection_tp', 'N/A')}\n"
        f"    - Number of FP: {annotator_data.get(f'{task_prefix}_turkish_term_detection_fp', 'N/A')}\n"
    )

    # Process Turkish FP Set
    fp_set = eval(annotator_data.get(f'{task_prefix}_turkish_term_detection_fp_set', '[]'))
    message += ''.join(f'    \t * "{word}" in {sentence_index} is incorrectly labeled as a term.\n' for word, _, _, sentence_index in fp_set)

    message += f"    - Number of FN: {annotator_data.get(f'{task_prefix}_turkish_term_detection_fn', 'N/A')}\n"

    # Process Turkish FN Set
    fn_set = eval(annotator_data.get(f'{task_prefix}_turkish_term_detection_fn_set', '[]'))
    message += ''.join(f'    \t * "{word}" is a term in {sentence_index} but you missed it.\n' for word, _, _, sentence_index in fn_set)

    # Add Precision, Recall, F1 Score, and Accuracy for Turkish Term Detection
    message += (
        f"    - Precision: {annotator_data.get(f'{task_prefix}_turkish_term_detection_precision', 'N/A'):.2%}\n"
        f"    - Recall: {annotator_data.get(f'{task_prefix}_turkish_term_detection_recall', 'N/A'):.2%}\n"
        f"    - F1 Score: {annotator_data.get(f'{task_prefix}_turkish_term_detection_f1_score', 'N/A'):.2%}\n"
        f"    - Accuracy: {annotator_data.get(f'{task_prefix}_turkish_term_detection_accuracy', 'N/A'):.2%}\n"
    )

    # Turkish Translation Labels
    message += (
        "3. Turkish Translation Labels:\n"
        f"    - Number of Intersection: {annotator_data.get(f'{task_prefix}_turkish_labels_intersection_num', 'N/A')}\n"
        f"    - Number of Difference: {annotator_data.get(f'{task_prefix}_turkish_labels_difference_num', 'N/A')}\n"
        f"    - Exact Match: {annotator_data.get(f'{task_prefix}_turkish_labels_exact_match', 'N/A'):.2%}\n"
    )
    

    # Process Turkish Translation Labels Difference Set
    difference_set = eval(annotator_data.get(f'{task_prefix}_turkish_labels_difference_set', '[]'))
    ground_truth_label_set = eval(ground_truth_data.get(f'{task_prefix}_turkish_labels_intersection_set', '[]'))
    
    # get the indicies of the same start, end tuples
    same_indices = []
    for i, (word, start, end, sentence_index, label) in enumerate(difference_set):
        for j, (word_gt, start_gt, end_gt, sentence_index_gt, label_gt) in enumerate(ground_truth_label_set):
            if start == start_gt and end == end_gt:
                same_indices.append((i, j))
                break
    
    same_indices = list(set(same_indices))
    difference_set = list(difference_set)
    ground_truth_label_set = list(ground_truth_label_set)
    
    for i, j in same_indices:
        word, start, end, sentence_index, label = difference_set[i]
        word_gt, start_gt, end_gt, sentence_index_gt, label_gt = ground_truth_label_set[j]
        if label != label_gt:
            message += f'    \t * "{word}" is labeled as "{label}" in {sentence_index} but it should be "{label_gt}".\n'
    

    # Turkish Translation Corrections
    message += (
        "4. Turkish Translation Corrections:\n"
        f"    - Number of Intersection: {annotator_data.get(f'{task_prefix}_turkish_corrections_intersection_num', 'N/A')}\n"
        f"    - Number of Difference: {annotator_data.get(f'{task_prefix}_turkish_corrections_difference_num', 'N/A')}\n"
    )
    difference_set_turkish_corrections= eval(annotator_data.get(f'{task_prefix}_turkish_corrections_difference_set', '[]'))
    ground_truth_correction_set = eval(ground_truth_data.get(f'{task_prefix}_turkish_corrections_intersection_set', '[]'))
    
    # get the indicies of the same start, end tuples 
    same_indices = []
    for i, (word, start, end, sentence_index, label) in enumerate(difference_set_turkish_corrections):
        for j, (word_gt, start_gt, end_gt, sentence_index_gt, label_gt) in enumerate(ground_truth_correction_set):
            if start == start_gt and end == end_gt:
                same_indices.append((i, j))
                break
    
    same_indices = list(set(same_indices))
    difference_set_turkish_corrections = list(difference_set_turkish_corrections)
    ground_truth_correction_set = list(ground_truth_correction_set)
    
    
    for i, j in same_indices:
        word, start, end, sentence_index, label = difference_set_turkish_corrections[i]
        word_gt, start_gt, end_gt, sentence_index_gt, label_gt = ground_truth_correction_set[j]
        if label != label_gt:
            message += f'    \t * "{word}" is rectified as "{label}" in {sentence_index} but it should be "{label_gt}".\n'


        
    # exact match score
    message += f"    - Exact Match: {annotator_data.get(f'{task_prefix}_turkish_corrections_exact_match', 'N/A'):.2%}\n"
    
    # 5. English Term Linking
    message += (
        "5. English Term Linking:\n"
        f"    - Number of TP: {annotator_data.get(f'{task_prefix}_english_term_linking_intersection_num', 'N/A')}\n"
        f"    - Number of Difference: {annotator_data.get(f'{task_prefix}_english_term_linking_difference_num', 'N/A')}\n"
    )
    
    # Process English Term Linking Difference Set
    difference_set_english_term_linking = eval(annotator_data.get(f'{task_prefix}_english_term_linking_difference_set', '[]'))
    ground_truth_linking_set = eval(ground_truth_data.get(f'{task_prefix}_english_term_linking_intersection_set', '[]'))
    
    # get the indicies of the same start, end tuples
    same_indices = []
    for i, (word, start, end, sentence_index, label) in enumerate(difference_set_english_term_linking):
        for j, (word_gt, start_gt, end_gt, sentence_index_gt, label_gt) in enumerate(ground_truth_linking_set):
            if start == start_gt and end == end_gt:
                same_indices.append((i, j))
                break
    
    same_indices = list(set(same_indices))
    difference_set_english_term_linking = list(difference_set_english_term_linking)
    ground_truth_linking_set = list(ground_truth_linking_set)
    
    for i, j in same_indices:
        word, start, end, sentence_index, label = difference_set_english_term_linking[i]
        word_gt, start_gt, end_gt, sentence_index_gt, label_gt = ground_truth_linking_set[j]
        if label != label_gt:
            message += f'    \t * "{word}" is linked as "{label}" in {sentence_index} but it should be "{label_gt}".\n'
    
    # exact match score
    message += f"    - Exact Match: {annotator_data.get(f'{task_prefix}_english_term_linking_exact_match', 'N/A'):.2%}\n"
    

    return message


# Example usage
annotator_mail = ''
task_number = 10
GROUND_TRUTH_MAIL = ''
print(generate_task_message(annotator_mail, task_number, GROUND_TRUTH_MAIL))
