In [None]:
import pandas as pd
from pathlib import Path
import collections
import numpy as np

current_path = Path().resolve().parent
print(current_path)

In [2]:
def parse_file(file_path):
    instances = []
    try:
        with open(file_path, encoding='utf-8') as nf:
            lines = nf.readlines()
            tokens, labels, predictions = [], [], []
            for line in lines:
                if line.strip():
                    tk, lb, pr = line.split()
                    tokens.append(tk)
                    labels.append(lb)
                    predictions.append(pr)
                else:
                    instances.append((tokens, labels, predictions))
                    tokens, labels, predictions = [], [], []
    except FileNotFoundError:
        print(f"File not found: {file_path}")
    except ValueError:
        print(f"Invalid file format: {file_path}")

    return instances

def get_indices_from_sequence(sequence_gold, sequence_prediction):

    gold_indices = []
    pred_indices = []

    start_index = None
    for i, pred_label in enumerate(sequence_prediction):
        if pred_label == '1':
            start_index = i
        elif pred_label == '0' and start_index is not None:
            end_index = i
            pred_indices.append((start_index, end_index))
            start_index = None
            
        if i == len(sequence_prediction) - 1 and start_index is not None:
            end_index = len(sequence_prediction)
            pred_indices.append((start_index, end_index))

    start_index = None
    for i, gold_label in enumerate(sequence_gold):
        if gold_label == '1':
            start_index = i
        elif gold_label == '0' and start_index is not None:
            end_index = i
            gold_indices.append((start_index, end_index))
            start_index = None
            
        if i == len(sequence_gold) - 1 and start_index is not None:
            end_index = len(sequence_gold)
            gold_indices.append((start_index, end_index))
    
    return gold_indices, pred_indices

def calculate_R_index(indices_gold, indices_prediction):
    r_indices = []

    def get_intersection_interval(start1, start2, end1, end2):
        intersection_start = max(start1, start2)
        intersection_end = min(end1, end2)

        if intersection_start <= intersection_end:
            return (intersection_start, intersection_end)
        else:
            return ()

    predicted_arguments_names = [f'P{i}' for i in range(len(indices_prediction))]
    gold_arguments_names = [f'A{i}' for i in range(len(indices_gold))]

    for (start_pred, end_pred) in indices_prediction:
        r_row = []
        for (start_gold, end_gold) in indices_gold:
            intersection_interval = get_intersection_interval(start_pred, start_gold, end_pred, end_gold)
            len_intersection_interval = 0 if intersection_interval == () else intersection_interval[1] - intersection_interval[0]
            len_longest_interval = max(end_gold - start_gold, end_pred - start_pred)
            r_value = round((len_intersection_interval / len_longest_interval), 3)
            r_row.append(r_value)
        r_indices.append(r_row)

    return r_indices, predicted_arguments_names, gold_arguments_names

def get_asociations(r_matrix, pred_arguments, gold_arguments):
    output = {}
    set_gold_related = set()

    for row, pred_arg in zip(r_matrix, pred_arguments):
        greater_than_zero_indices = [i for i, value in enumerate(row) if value > 0]
        golds_related = [(gold_arguments[i], row[i]) for i in greater_than_zero_indices]

        output[pred_arg] = golds_related

        set_gold_related.update(golds_related)

    golds_unrecognized = set(gold_arguments).difference([x[0] for x in set_gold_related])
    return output, golds_unrecognized

def get_brother_list(Pname, relations, asignacion, beta):
    brothers = []


    Pname_relations = [(x, v) for (x, v) in relations[Pname] if v >= beta] # VER

    for (arg, v) in Pname_relations:
        row = []
        # if v >= beta:  # VER
        for (key, value) in relations.items():
            if (key != Pname) and (asignacion[key] == 'MISP'): # VER
                for (ax, vx) in value:
                    if (arg == ax) and (vx >= beta): # VER
                        row.append((key, arg, vx))

        if len(row) > 0:
            row.append((Pname, arg, v))
            brothers.append(row)

    return brothers

def asign_category_to_predicted_arguments(predicted_arguments_relations, alpha = 0.7, beta = 0.35):

    category_predicted_arguments = {}

    for Pi, Li in predicted_arguments_relations.items():
        if len(Li) == 0:
            category_predicted_arguments[Pi] = 'MU'
        elif len(Li) == 1:
            goldArg, v = Li[0]
            if v == 1:
                category_predicted_arguments[Pi] = 'PM'
            elif alpha <= v < 1:
                category_predicted_arguments[Pi] = 'IMP'
            elif 0 < v < alpha:
                category_predicted_arguments[Pi] = 'MISP'
            else:
                print('Error')
        elif len(Li) > 1:
            sum_r_values = sum([float(x[1]) for x in Li])
            greater_than_beta = [float(x[1]) >= beta for x in Li]
            greater_than_alpha = [float(x[1]) >= alpha for x in Li]
            all_greater_than_beta = all(greater_than_beta)
            any_greater_than_beta = any(greater_than_alpha)

            if sum_r_values < alpha:
                category_predicted_arguments[Pi] = 'MISP'
            else:
                if all_greater_than_beta:
                    category_predicted_arguments[Pi] = 'Mg'
                else:
                    if any_greater_than_beta:
                        category_predicted_arguments[Pi] = 'IMP'
                    else:
                        category_predicted_arguments[Pi] = 'MISP'

    # print("----------------- Primera pasada -----------------")
    # print(category_predicted_arguments)
    # print("--------------------------------------------------")

    for Pi, cat in category_predicted_arguments.items():
        # print(tuple)
        # cat, value_r = tuple
        if cat == 'MISP':
            Hi = get_brother_list(Pi, predicted_arguments_relations, category_predicted_arguments, beta)
            # print(Hi)

            for hList in Hi:
                args_in_hList = [x[0] for x in hList]
                sum_r_values = sum([float(x[2]) for x in hList])
                if sum_r_values >= alpha:
                    for a in args_in_hList:
                        category_predicted_arguments[a] = 'SP'

    # print("----------------- Segunda pasada -----------------")
    # print(category_predicted_arguments)
    # print("--------------------------------------------------")

    return category_predicted_arguments

def get_best_run_number(row):
    columns = [f'run{i}' for i in range(10)]
    values = row[columns].values
    return np.argmax(values)

def r_average(counter, total_gold_argument_units):
    # print(counter, total_gold_argument_units)
    suma_r_values = sum(x[1] for x in counter)
    return round(suma_r_values / len(counter), 2), round(suma_r_values / total_gold_argument_units, 2), len(counter)

def show_results(train, test, counter, tot_pred, tot_pred_plus_unr, total_gold):
    def div(x, y):
        return round((x / y)*100, 1)
    
    divisor = total_gold
    # divisor = tot_pred
    print(train, '-', test)
    print(tot_pred, tot_pred_plus_unr, total_gold)
    print(f"& {counter['PM']} & {counter['IMP']} & {counter['MISP']} & {counter['SP']} & {counter['Mg']} & {counter['MU']} & {counter['UNR']}")
    print(f"& {div(counter['PM'], divisor)} & {div(counter['IMP'], divisor)} & {div(counter['MISP'], divisor)} & {div(counter['SP'], divisor)} & {div(counter['Mg'], divisor)} & {div(counter['MU'], divisor)} & {div(counter['UNR'], divisor)} \\\\")
    print()

In [None]:
f1_results = pd.read_csv(str(current_path / 'data.csv'))
f1_results['best_run'] = f1_results.apply(lambda row: get_best_run_number(row), axis = 1)

print(f1_results.shape)
f1_results.head()

In [None]:
error_df = pd.read_csv(str(current_path) + '/error_data.csv')

error_df['DISP'] = error_df['MISP'] + error_df['IMP']


error_df

In [None]:
test = 'abam'

train_set = ['we', 'abam']
model = 'bert_crf'

for train in train_set:
    print(train, '-', test, '-', model)
    row = error_df[(error_df['train'] == train) & (error_df['test'] == test) & (error_df['model'] == model)].values[0].tolist()
    number_gold_arguments = row[3]
    number_pred_arguments = row[4]
    output_string = []
    values_to_show = [row[5], row[-1]] + row[8:12]
    for i, val in enumerate(values_to_show):
        new_val = round((val / number_pred_arguments)*100, 1) if i != 5 else round((val / number_gold_arguments)*100, 1)
        output_string.append(new_val)

    output_string = output_string[:-1] + [round(100-output_string[-1], 1), output_string[-1]]
    print(" & ".join([str(x) for x in output_string]))
    print()

In [None]:
test = 'pe'

train = 'mix1'
models = ['bert_crf', 'bert_crf']

output_string = []

for m in models:
    print(train, '-', test, '-', m)
    row = error_df[(error_df['train'] == train) & (error_df['test'] == test) & (error_df['model'] == m)].values[0].tolist()
    number_gold_arguments = row[3]
    number_pred_arguments = row[4]
    values_to_show = [row[5], row[-1]] + row[8:12]
    for i, val in enumerate(values_to_show):
        new_val = round((val / number_gold_arguments)*100, 1) if i != 4 else round((val / number_pred_arguments)*100, 1)
        output_string.append(new_val)


def get_string_for_latex(values):
    first_half = values[:len(values)//2]
    second_half = values[len(values)//2:]

    first_half_st, second_half_st = [], []
    for i in range(len(first_half)):
        if i == 0:
            if first_half[i] > second_half[i]:
                first_half_st.append(f"\\better{{{first_half[i]}}}")
                second_half_st.append(f"{second_half[i]}")
            elif first_half[i] < second_half[i]:
                first_half_st.append(f"{first_half[i]}")
                second_half_st.append(f"\\better{{{second_half[i]}}}")
            else:
                first_half_st.append(f"\\better{{{first_half[i]}}}")
                second_half_st.append(f"\\better{{{second_half[i]}}}")
        else:
            if first_half[i] < second_half[i]:
                first_half_st.append(f"\\better{{{first_half[i]}}}")
                second_half_st.append(f"{second_half[i]}")
            elif first_half[i] > second_half[i]:
                first_half_st.append(f"{first_half[i]}")
                second_half_st.append(f"\\better{{{second_half[i]}}}")
            else:
                first_half_st.append(f"\\better{{{first_half[i]}}}")
                second_half_st.append(f"\\better{{{second_half[i]}}}")


    output_string = " & ".join(first_half_st + second_half_st)
    return output_string


print(get_string_for_latex(output_string))

In [None]:
test = 'abam'

# train_set = ['pe', 'we', 'abam', 'mix1']
train_set = ['mix1']
model = 'bert_crf'

for train in train_set:
    print(train, '-', test, '-', model)
    row = error_df[(error_df['train'] == train) & (error_df['test'] == test) & (error_df['model'] == model)].values[0].tolist()
    number_gold_arguments = row[3]
    number_pred_arguments = row[4]
    output_string = []
    values_to_show = [row[5], row[-1]] + row[8:12]
    for i, val in enumerate(values_to_show):
        new_val = round((val / number_gold_arguments)*100, 1) if i != 4 else round((val / number_pred_arguments)*100, 1)
        output_string.append(new_val)

    # output_string = output_string[:-1] + [round(100-output_string[-1], 1), output_string[-1]]
    print(" & ".join([str(x) for x in output_string]))
    print()

# MTL

In [None]:
mtl_error_df = pd.read_csv(str(current_path) + '/error_data_mtl.csv')

mtl_error_df['DISP'] = mtl_error_df['MISP'] + mtl_error_df['IMP']

mtl_error_df

In [None]:
test_set = ['pe', 'we', 'abam']
weighting = 'equal'

for test in test_set:
    print(weighting, '-', test, '-', model)

    row = mtl_error_df[(mtl_error_df['test'] == test) & (mtl_error_df['weighting'] == weighting)].values[0].tolist()

    number_gold_arguments = row[3] 
    number_pred_arguments = row[4]

    output_string = []
    values_to_show = [row[5], row[-1]] + row[8:12]
    for i, val in enumerate(values_to_show):
        new_val = round((val / number_pred_arguments)*100, 1) if i != 5 else round((val / number_gold_arguments)*100, 1)
        # new_val = round((val / number_gold_arguments)*100, 1) if i != 5 else round((val / number_pred_arguments)*100, 1)
        output_string.append(new_val)

    output_string = output_string[:-1] + [round(100-output_string[-1], 1), output_string[-1]]
    print(" & ".join([str(x) for x in output_string]))
    print()

In [None]:
test_set = ['pe', 'we', 'abam']
weighting = 'dwa'

for test in test_set:
    print(weighting, '-', test, '-', model)

    row = mtl_error_df[(mtl_error_df['test'] == test) & (mtl_error_df['weighting'] == weighting)].values[0].tolist()

    number_gold_arguments = row[3] 
    number_pred_arguments = row[4]

    output_string = []
    values_to_show = [row[5], row[-1]] + row[8:12]
    for i, val in enumerate(values_to_show):
        # new_val = round((val / number_pred_arguments)*100, 1) if i != 5 else round((val / number_gold_arguments)*100, 1)
        new_val = round((val / number_gold_arguments)*100, 1) if i != 4 else round((val / number_pred_arguments)*100, 1)
        output_string.append(new_val)

    # output_string = output_string[:-1] + [round(100-output_string[-1], 1), output_string[-1]]
    print(" & ".join([str(x) for x in output_string]))
    print()