In [None]:
from sklearn.metrics import f1_score, classification_report
import pathlib
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
import collections
import numpy as np
import pandas as pd
import importlib
import math
from sentence_transformers import SentenceTransformer

import utils
import categorization
import scoring

importlib.reload(categorization)
importlib.reload(scoring)

from utils import *
from categorization import *
from scoring import get_N_score

current_path = pathlib.Path().resolve().parent
print(current_path)

## Categorization of predicted arguments


In [None]:
lambda_PM = 1
lambda_MU = 1
lambda_UNR = 1

def get_line_of_info(dataname, sts_type, tanda, criterion = 'best_f1'):
    best_f1 = 0
    best_model = 0
    best_predictions = None
    data1 = []
    
    for i in range(10):
        dirpath = str(current_path / f'results-{tanda}' / f'{dataname}_{sts_type}' / f'test_{dataname}_{i}.txt')
        tokens, labels, predictions = parse_file(dirpath)
        run_f1 = f1_score(labels, predictions, average='macro')
        if (run_f1 > best_f1):
            best_model = i
            best_f1 = run_f1
    
        instances = parse_file_for_arg_level(dirpath)
        id_values_pairs = get_values_for_predicted_arguments(instances)

        number_match, number_partial_match, number_made_ups = get_number_predicted_arguments_per_category(id_values_pairs)
        number_unrecognized = get_unrecognized_arguments(instances)
        
        number_golds = get_gold_arguments(instances)
        number_preds = get_pred_arguments(instances)
        
        assert (number_match + number_partial_match + number_made_ups) == number_preds

        r_values_for_no_match = get_partial_match_values(id_values_pairs)
                
        # N_score = get_N_score(number_match, number_partial_match, number_made_ups, number_unrecognized, r_values_for_no_match, number_golds, number_preds, 
                             # lambda_PM, lambda_MU, lambda_UNR)
       
        data1.append([number_match, number_partial_match, number_made_ups, number_unrecognized])

    stats_of_best_model = data1[best_model]

    averages = []
    for i in range(len(data1[0])):
        averages.append(round(np.mean([item[i] for item in data1])))    
        # if i == len(data1[0])-1:
        #     averages.append(round(np.mean([item[i] for item in data1]), 3))
        # else:
        #     averages.append(round(np.mean([item[i] for item in data1])))    
    
    s = ""
    for l1, l2 in zip(stats_of_best_model, averages):
        s += f" & {l1} ({l2})"

    return s

# , round(N_score, 3)

In [None]:
for dataname in ['ugen', 'pe', 'webis']:
    sts_type = 'no_sts'
    print(dataname, '-', sts_type)
    info = get_line_of_info(dataname, sts_type, 'simple')
    print(info)

In [None]:
for dataname in ['ugen', 'pe', 'webis']:
    # 'ugen',
    for sts_type in ['sts_sbert', 'sts_sbert_ft', 'sts_arguebert_ft']:
        print(dataname, '-', sts_type)
        info = get_line_of_info(dataname, sts_type, 'tanda-2')
        print(info)

    print()

## N-score (syntactic)

In [None]:
dataname = 'ugen'
tanda = 'simple'
sts_type = 'no_sts'
best_f1 = 0
best_model = 0

lambda_PM = 1
lambda_MU = 1
lambda_UNR = 1

N_scores = []

for i in range(10):
    dirpath = str(current_path / f'results-{tanda}' / f'{dataname}_{sts_type}' / f'test_{dataname}_{i}.txt')
    tokens, labels, predictions = parse_file(dirpath)
    run_f1 = f1_score(labels, predictions, average='macro')
    if (run_f1 > best_f1):
        best_model = i
        best_f1 = run_f1

    instances = parse_file_for_arg_level(dirpath)
    id_values_pairs = get_values_for_predicted_arguments(instances)
    number_match, number_partial_match, number_made_ups = get_number_predicted_arguments_per_category(id_values_pairs)
    number_unrecognized = get_unrecognized_arguments(instances)
        
    number_golds = get_gold_arguments(instances)
    number_preds = get_pred_arguments(instances)
        
    assert (number_match + number_partial_match + number_made_ups) == number_preds

    r_values_for_no_match = get_partial_match_values(id_values_pairs)
    
    N_score = get_N_score(number_match, number_partial_match, number_made_ups, number_unrecognized, r_values_for_no_match, 
                          number_golds, number_preds, 
                          lambda_PM, lambda_MU, lambda_UNR)

    N_scores.append(N_score)


print("BEST score: ", round(N_scores[best_model], 3))

print("AVG score: ", round(np.mean(N_scores), 3))

### Ploting syntactic similarity

In [None]:
def get_data_for_ploting(dataname, sts_types, tandas):
    data_to_plot = []

    for sts_type, tanda in zip(sts_types, tandas):
        best_f1 = 0
        best_model = 0 
        
        for i in range(10):
            dirpath = str(current_path / f'results-{tanda}' / f'{dataname}_{sts_type}' / f'test_{dataname}_{i}.txt')
            tokens, labels, predictions = parse_file(dirpath)
            run_f1 = f1_score(labels, predictions, average='macro')
            if (run_f1 > best_f1):
                best_model = i
                best_f1 = run_f1
        
        dirpath = str(current_path / f'results-{tanda}' / f'{dataname}_{sts_type}' / f'test_{dataname}_{best_model}.txt')
        instances = parse_file_for_arg_level(dirpath)
        id_values_pairs = get_values_for_predicted_arguments(instances)
        
        number_match, number_partial_match, number_made_ups = get_number_predicted_arguments_per_category(id_values_pairs)
        number_unrecognized = get_unrecognized_arguments(instances)
                
        number_golds = get_gold_arguments(instances)
        number_preds = get_pred_arguments(instances)
                
        assert (number_match + number_partial_match + number_made_ups) == number_preds
            
        data1 = get_partial_match_values(id_values_pairs)
        data_to_plot.append(data1)

    return data_to_plot

def get_data_for_ploting_all_models(dataname, sts_types, tandas):
    data_to_plot = []

    for sts_type, tanda in zip(sts_types, tandas):
        data1 = []
        for i in range(10):       
            dirpath = str(current_path / f'results-{tanda}' / f'{dataname}_{sts_type}' / f'test_{dataname}_{i}.txt')
            instances = parse_file_for_arg_level(dirpath)
            id_values_pairs = get_values_for_predicted_arguments(instances)
            
            number_match, number_partial_match, number_made_ups = get_number_predicted_arguments_per_category(id_values_pairs)
            number_unrecognized = get_unrecognized_arguments(instances)
                    
            number_golds = get_gold_arguments(instances)
            number_preds = get_pred_arguments(instances)
                    
            assert (number_match + number_partial_match + number_made_ups) == number_preds
                
            data1 += get_partial_match_values(id_values_pairs)
        data_to_plot.append(data1)

    return data_to_plot

def get_percentages(data_to_plot, numbins):
    percentages = []
    num_bins = numbins
    for values in data_to_plot:
        # Define the number of bins
        bins = np.linspace(0, 1, num_bins + 1)
        
        counts, bin_edges = np.histogram(values, bins=bins)
        total_count = len(values)
        percentages.append((counts / total_count) * 100)
    
    return percentages

In [None]:
datanames = ['ugen', 'pe', 'webis']
map_datanames = {'ugen': 'UGEN', 'webis':'WE', 'pe': 'AAEC'}

sts_types = ['no_sts'] + ['sts_sbert', 'sts_sbert_ft', 'sts_arguebert_ft'] #  ['sts_sbert', 'sts_sbert_ft', 'sts_arguebert_ft']:
tandas = ['simple'] + ['tanda-2']*3

# Create a heatmap
fig, axis = plt.subplots(1, 3, figsize=(16, 4), sharey=True)

for index_axis, dataname in enumerate(datanames):

    data_to_plot = get_data_for_ploting(dataname, sts_types, tandas)
    # data_to_plot = get_data_for_ploting_all_models(dataname, sts_types, tandas)
    
    percentages = get_percentages(data_to_plot, numbins = 4)

    rounded_percentages = [np.array([round(x) for x in p]) for p in percentages]
    # print(percentages)
    # print(rounded_percentages)

    for row_percentajes in rounded_percentages:
        difference = 100 - sum(row_percentajes)
        
        for i in range(abs(difference)):
            index = i % len(row_percentajes)
            if difference > 0:
                row_percentajes[index] += 1
            elif difference < 0:
                row_percentajes[index] -= 1

    # print(rounded_percentages)
    percentages = rounded_percentages

    data = np.array([percentages[0], percentages[1], percentages[2], percentages[3]])
    
    xtickslabels = ['0 - 0.25', '0.25 - 0.5', '0.5 - 0.75', '0.75 - 1']
    yticklabels=["Simple", "SBERT", "SBERT"+"\n"+"[STSb+BWS]", "argueBERT"+"\n"+"[STSb+BWS]"]
    
    def fmt(x, pos):
        return f'{round(x)}%'
    
    ax = axis[index_axis]
    sns.heatmap(data, annot=True, fmt="", cbar=False,
                cmap="Blues", xticklabels=xtickslabels, 
                yticklabels=yticklabels, ax=ax, annot_kws={"size": 18})
    
    # Format the annotations with percentage symbol
    for text in ax.texts:
        text.set_text(fmt(float(text.get_text()), None))
    
    # Rotate y-tick labels
    # ax.set_yticks(yticklabels)
    ax.set_yticklabels(yticklabels, rotation=0, fontsize = 11)
        
    # Add labels
    ax.set_xlabel('R', fontsize = 11)
    # ax.set_ylabel('Modelos')
    
    ax.set_title(map_datanames[dataname])
    
# Display the plot
plt.tight_layout()
# plt.savefig(current_path / 'figures' / 'syntactic_simil_all_models.pdf')
plt.savefig(current_path / 'figures' / 'syntactic_simil_best_models.pdf')
plt.show()


## N-score (semantic)

In [None]:
dataname = 'pe'
# tanda = 'simple'
tanda = 'tanda-2'
# sts_type = 'no_sts'
# sts_type = 'sts_sbert'
# sts_type = 'sts_sbert_ft'
sts_type = 'sts_arguebert_ft'
best_f1 = 0
best_model = 0

lambda_PM = 1
lambda_MU = 1
lambda_UNR = 1

N_scores = []

semModel = SentenceTransformer("all-MiniLM-L6-v2")

for i in range(10):

    dirpath = str(current_path / f'results-{tanda}' / f'{dataname}_{sts_type}' / f'test_{dataname}_{i}.txt')
    tokens, labels, predictions = parse_file(dirpath)
    run_f1 = f1_score(labels, predictions, average='macro')
    if (run_f1 > best_f1):
        best_model = i
        best_f1 = run_f1

    instances = parse_file_for_arg_level(dirpath)
    # id_values_pairs = get_values_for_predicted_arguments(instances)
    id_values_pairs = get_semantic_values_for_predicted_arguments(instances, semModel)

    # print(len(id_values_pairs), id_values_pairs)
    number_match, number_partial_match, number_made_ups = get_number_predicted_arguments_per_category(id_values_pairs)
    number_unrecognized = get_unrecognized_arguments(instances)
        
    number_golds = get_gold_arguments(instances)
    number_preds = get_pred_arguments(instances)
        
    assert (number_match + number_partial_match + number_made_ups) == number_preds

    # r_values_for_no_match = get_partial_match_values(id_values_pairs)

    # print(len(r_values_for_no_match), r_values_for_no_match)
    semantic_similarity_values_for_no_match = get_partial_match_values(id_values_pairs)
    
    N_score = get_N_score(number_match, number_partial_match, number_made_ups, number_unrecognized, semantic_similarity_values_for_no_match, 
                          number_golds, number_preds, 
                          lambda_PM, lambda_MU, lambda_UNR)

    print(N_score)
    # break

    N_scores.append(N_score)


print("BEST score: ", round(N_scores[best_model], 3))

print("AVG score: ", round(np.mean(N_scores), 3))

### Ploting semantic similarity

In [None]:
def get_data_for_ploting(dataname, sts_types, tandas, semModel):
    data_to_plot = []

    for sts_type, tanda in zip(sts_types, tandas):
        best_f1 = 0
        best_model = 0 
        
        for i in range(10):
            dirpath = str(current_path / f'results-{tanda}' / f'{dataname}_{sts_type}' / f'test_{dataname}_{i}.txt')
            tokens, labels, predictions = parse_file(dirpath)
            run_f1 = f1_score(labels, predictions, average='macro')
            if (run_f1 > best_f1):
                best_model = i
                best_f1 = run_f1
        
        dirpath = str(current_path / f'results-{tanda}' / f'{dataname}_{sts_type}' / f'test_{dataname}_{best_model}.txt')
        instances = parse_file_for_arg_level(dirpath)
        id_values_pairs = get_semantic_values_for_predicted_arguments(instances, semModel)
        
        number_match, number_partial_match, number_made_ups = get_number_predicted_arguments_per_category(id_values_pairs)
        number_unrecognized = get_unrecognized_arguments(instances)
                
        number_golds = get_gold_arguments(instances)
        number_preds = get_pred_arguments(instances)
                
        assert (number_match + number_partial_match + number_made_ups) == number_preds
            
        data1 = get_partial_match_values(id_values_pairs)
        data_to_plot.append(data1)

    return data_to_plot


def get_percentages(data_to_plot, numbins):
    percentages = []
    num_bins = numbins
    for values in data_to_plot:
        # Define the number of bins
        bins = np.linspace(0, 1, num_bins + 1)
        
        counts, bin_edges = np.histogram(values, bins=bins)
        total_count = len(values)
        percentages.append((counts / total_count) * 100)
    
    return percentages

In [None]:
datanames = ['ugen', 'pe', 'webis']
map_datanames = {'ugen': 'UGEN', 'webis':'WE', 'pe': 'AAEC'}

sts_types = ['no_sts'] + ['sts_sbert', 'sts_sbert_ft', 'sts_arguebert_ft'] #  ['sts_sbert', 'sts_sbert_ft', 'sts_arguebert_ft']:
tandas = ['simple'] + ['tanda-2']*3

# semModel = SentenceTransformer("all-MiniLM-L6-v2")
# semModel = SentenceTransformer(str(current_path / 'similarity_models' / 'sbert-all-MiniLM-L6-v2-sts-bws' / 'final'))
semModel = SentenceTransformer(str(current_path / 'similarity_models' / 'argueBert_base_similar-sts-bws' / 'final'))

# Create a heatmap
fig, axis = plt.subplots(1, 3, figsize=(16, 4), sharey=True)

for index_axis, dataname in enumerate(datanames):

    data_to_plot = get_data_for_ploting(dataname, sts_types, tandas, semModel)
    
    percentages = get_percentages(data_to_plot, numbins = 4)

    rounded_percentages = [np.array([round(x) for x in p]) for p in percentages]

    for row_percentajes in rounded_percentages:
        difference = 100 - sum(row_percentajes)
        
        for i in range(abs(difference)):
            index = i % len(row_percentajes)
            if difference > 0:
                row_percentajes[index] += 1
            elif difference < 0:
                row_percentajes[index] -= 1

    percentages = rounded_percentages

    data = np.array([percentages[0], percentages[1], percentages[2], percentages[3]])
    
    xtickslabels = ['0 - 0.25', '0.25 - 0.5', '0.5 - 0.75', '0.75 - 1']
    yticklabels=["Simple", "SBERT", "SBERT"+"\n"+"[STSb+BWS]", "argueBERT"+"\n"+"[STSb+BWS]"]
    
    def fmt(x, pos):
        return f'{round(x)}%'
    
    ax = axis[index_axis]
    sns.heatmap(data, annot=True, fmt="", cbar=False,
                cmap="Blues", xticklabels=xtickslabels, 
                yticklabels=yticklabels, ax=ax, annot_kws={"size": 18})
    
    # Format the annotations with percentage symbol
    for text in ax.texts:
        text.set_text(fmt(float(text.get_text()), None))
    
    # Rotate y-tick labels
    # ax.set_yticks(yticklabels)
    ax.set_yticklabels(yticklabels, rotation=0)
    
    # Add labels
    ax.set_xlabel('R-index')
    # ax.set_ylabel('Modelos')
    
    ax.set_title(map_datanames[dataname])
    
# Display the plot
plt.tight_layout()
plt.savefig(current_path / 'figures' / 'semantic_simil_best_models_arguebertft.pdf')
plt.show()


In [None]:
datanames = ['ugen', 'pe', 'webis']
map_datanames = {'ugen': 'UGEN', 'webis':'WE', 'pe': 'AAEC'}

sts_types = ['no_sts'] + ['sts_sbert', 'sts_sbert_ft', 'sts_arguebert_ft'] #  ['sts_sbert', 'sts_sbert_ft', 'sts_arguebert_ft']:
tandas = ['simple'] + ['tanda-2']*3

fig, axis = plt.subplots(1, 3, figsize=(16, 4), sharey=True)


# rounded_percentages = []

# rounded_percentages.append([
#     np.array([37, 11, 28, 24]),
#     np.array([1, 18, 23, 58]),
#     np.array([1, 21, 22, 56]),
#     np.array([1, 20, 22, 57])
# ])

# rounded_percentages.append([
#     np.array([11, 5, 26, 58]),
#     np.array([0, 7, 10, 83]),
#     np.array([0, 3, 10, 87]),
#     np.array([0, 1, 16, 83])
# ])

# rounded_percentages.append([
#     np.array([5, 7, 33, 55]),
#     np.array([1, 2, 12, 85]),
#     np.array([0, 5, 13, 82]),
#     np.array([0, 5, 14, 81])
# ])


rounded_percentages = []

rounded_percentages.append([
    np.array([24, 23, 34, 19]),
    np.array([6, 13, 12, 69]),
    np.array([7, 13, 14, 66]),
    np.array([4, 14, 14, 68])
])

rounded_percentages.append([
    np.array([6, 10, 32, 52]),
    np.array([2, 6, 13, 79]),
    np.array([0, 2, 13, 85]),
    np.array([0, 2, 14, 84])
])

rounded_percentages.append([
    np.array([2, 8, 38, 52]),
    np.array([1, 1, 11, 87]),
    np.array([0, 4, 15, 81]),
    np.array([0, 3, 15, 82])
])




for index_axis, dataname in enumerate(datanames):
    # print(rounded_percentages)
    percentages = rounded_percentages[index_axis]
    
    data = np.array([percentages[0], percentages[1], percentages[2], percentages[3]])
        
    xtickslabels = ['0 - 0.25', '0.25 - 0.5', '0.5 - 0.75', '0.75 - 1']
    yticklabels=["Simple", "SBERT", "SBERT"+"\n"+"[STSb+BWS]", "argueBERT"+"\n"+"[STSb+BWS]"]
        
    def fmt(x, pos):
        return f'{round(x)}%'
        
    ax = axis[index_axis]
    sns.heatmap(data, annot=True, fmt="", cbar=False,
                    cmap="Blues", xticklabels=xtickslabels, 
                    yticklabels=yticklabels, ax=ax, annot_kws={"size": 18})
        
    # Format the annotations with percentage symbol
    for text in ax.texts:
        text.set_text(fmt(float(text.get_text()), None))
        
    # Rotate y-tick labels
    # ax.set_yticks(yticklabels)
    ax.set_yticklabels(yticklabels, rotation=0, fontsize = 11)
        
    # Add labels
    ax.set_xlabel('Argue-BERT [STSb+BWS]', fontsize = 11)
    # ax.set_ylabel('Modelos')
        
    ax.set_title(map_datanames[dataname])
    
    
# Display the plot
plt.tight_layout()
plt.savefig(current_path / 'figures' / 'semantic_simil_best_models_arguebertft.pdf')
plt.show()
