In [None]:
from sklearn.metrics import f1_score, classification_report, precision_score
import pathlib
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
import collections
import numpy as np
import pandas as pd

from utils import parse_file

current_path = pathlib.Path().resolve().parent
print(current_path)

In [None]:
def get_values_of_metrics(dataname, sts_type, tanda):
    metrics_results = []

    precision_results = []
    recall_results = []
    
    for i in range(10):
        dirpath = str(current_path / f'results-{tanda}' / f'{dataname}_{sts_type}' / f'test_{dataname}_{i}.txt')
        tokens, labels, predictions = parse_file(dirpath)
        report = classification_report(labels, predictions, target_names=['O', 'B', 'I'], output_dict = True)
    
        results_model = [report[lb]['f1-score'] for lb in ['O', 'I', 'B', 'macro avg']]
        metrics_results.append(results_model)
        precision_results.append(report['B']['precision'])
        recall_results.append(report['B']['recall'])
        
    best_model_results = None
    best_f1 = 0
    best_model_nr = 0
    for model_nr, model_results in enumerate(metrics_results):
        if model_results[-1] >= best_f1:
            best_f1 = model_results[-1]
            best_model_nr = model_nr
            best_model_results = model_results
    
    print(best_f1, best_model_nr)

    t1 = [round(x, 3) for x in best_model_results]

    t2 = [round(np.mean([x[0] for x in metrics_results]), 3), 
          round(np.mean([x[1] for x in metrics_results]), 3),
          round(np.mean([x[2] for x in metrics_results]), 3), 
          round(np.mean([x[3] for x in metrics_results]), 3)]

    print(precision_results)
    print('PREC Bs', round(np.mean(precision_results), 3))
    print(recall_results)
    print('REC Bs', round(np.mean(recall_results), 3))

    
    
    return t1, t2

def get_line_of_info(t1, t2):
    l = [(tx1, tx2) for tx1, tx2 in zip(t1, t2)]
    l = [l[2], l[1], l[0], l[-1]]
    s = ""
    for l1, l2 in l:
        s += f" & {l1} ({l2})"
    
    print(s)

In [None]:
for dataname in ['ugen', 'webis', 'pe']:
    sts_type = 'no_sts'
    print(dataname, '-', sts_type)
    t1, t2 = get_values_of_metrics(dataname, sts_type, 'simple')
    get_line_of_info(t1, t2)
    print()

In [None]:
tanda = 'tanda-2'

for dataname in ['ugen', 'webis', 'pe']:
    for sts_type in ['sts_sbert', 'sts_sbert_ft', 'sts_arguebert_ft']:
        print(dataname, '-', sts_type)
        t1, t2 = get_values_of_metrics(dataname, sts_type, tanda)
        get_line_of_info(t1, t2)
    
    print()


In [None]:
cm = None
best_f1 = 0
best_model = 0

dataname = 'ugen'
# sts_type = 'no_sts'
sts_type = 'sts_sbert'

averages = {
    'O': [], 'I': [], 'B': [], 'macro': []
}

for i in range(10):

    dirpath = str(current_path / 'results' / f'{dataname}_{sts_type}' / f'test_{dataname}_{i}.txt')
    tokens, labels, predictions = parse_file(dirpath)
    report = classification_report(labels, predictions, target_names=['O', 'B', 'I'], output_dict = True)
    for lb in ['O', 'I', 'B']:
        averages[lb].append(report[lb]['f1-score'])
    averages['macro'].append(report['macro avg']['f1-score'])
    # print(report['macro avg'])
    run_f1 = f1_score(labels, predictions, average='macro')
    if (run_f1 > best_f1):
        cm = confusion_matrix(labels, predictions)
        best_model = i
        best_f1 = run_f1

print(best_model, best_f1)

dirpath = str(current_path / 'results' / f'{dataname}_{sts_type}' / f'test_{dataname}_{best_model}.txt')
tokens, labels, predictions = parse_file(dirpath)
macro_f1_score = round(f1_score(labels, predictions, average = 'macro'), 3)
f1_scores = [round(x, 3) for x in f1_score(labels, predictions, average = None)]
print(f1_scores, macro_f1_score)

for k, v in averages.items():
    mean = round(np.mean(v), 3)
    std_dev = round(np.std(v, ddof=1), 3)
    print(k, ":", mean,"$\pm$", std_dev)

In [None]:
cm = None
best_f1 = 0
best_model = 0

dataname = 'ugen'
sts_type = 'no_sts'

averages = {
    'O': [], 'I': [], 'B': [], 'macro': []
}

for i in range(10):

    dirpath = str(current_path / 'results' / f'{dataname}_{sts_type}' / f'test_{dataname}_{i}.txt')
    tokens, labels, predictions = parse_file(dirpath)
    # print(classification_report(labels, predictions, target_names=['O', 'B', 'I'], zero_division = 0))
    report = classification_report(labels, predictions, target_names=['O', 'B', 'I'], output_dict = True)
    for lb in ['O', 'I', 'B']:
        averages[lb].append(report[lb]['f1-score'])
    averages['macro'].append(report['macro avg']['f1-score'])
    # print(report['macro avg'])
    run_f1 = f1_score(labels, predictions, average='macro')
    if (run_f1 > best_f1):
        cm = confusion_matrix(labels, predictions)
        best_model = i
        best_f1 = run_f1

print(best_model, best_f1)

dirpath = str(current_path / 'results' / f'{dataname}_no_sts' / f'test_{dataname}_{best_model}.txt')
tokens, labels, predictions = parse_file(dirpath)
macro_f1_score = round(f1_score(labels, predictions, average = 'macro'), 3)
f1_scores = [round(x, 3) for x in f1_score(labels, predictions, average = None)]
print(f1_scores, macro_f1_score)
# print(classification_report(labels, predictions, target_names=['O', 'B', 'I'], zero_division = 0))

for k, v in averages.items():
    mean = round(np.mean(v), 3)
    std_dev = round(np.std(v, ddof=1), 3)
    print(k, ":", mean,"$\pm$", std_dev)

class_labels = np.unique(labels)
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, cbar = False, cmap='Blues', fmt='d', xticklabels=['O', 'B', 'I'], yticklabels=['O', 'B', 'I'], annot_kws={"fontsize":16})
plt.title(f'Modelo simple')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()