In [7]:
from itertools import product

import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.metrics import multilabel_confusion_matrix

In [8]:
import sys

PROJECT_PATH = '/home/adiel/full-temporal-relation'
if PROJECT_PATH not in sys.path:
    sys.path.append(PROJECT_PATH)

from full_temporal_relation.visualization.data import plot_relation_bars
from full_temporal_relation.data.preprocessing import load_data
from full_temporal_relation.metrics import summary_results

# Data

In [9]:
DATA_PATH = Path('../data')
TRC_RESULTS_PATH = DATA_PATH / 'TRC' / 'results'

In [10]:
gold_df = load_data(DATA_PATH/ 'MATRES' / 'platinum.txt')
gold_df

Unnamed: 0,docid,verb1,verb2,eiid1,eiid2,relation,label,unique_id
0,WSJ_20130322_159,apologized,happened,e1,e5,VAGUE,VAGUE,e1-e5
1,WSJ_20130322_159,apologized,wrapped,e1,e6,BEFORE,BEFORE,e1-e6
2,WSJ_20130322_159,apologized,seemed,e1,e10,BEFORE,BEFORE,e1-e10
3,WSJ_20130322_159,apologized,yield,e1,e11,VAGUE,VAGUE,e1-e11
4,WSJ_20130322_159,happened,wrapped,e5,e6,BEFORE,BEFORE,e5-e6
...,...,...,...,...,...,...,...,...
832,CNN_20130322_248,sparing,begin,e3,e6,BEFORE,BEFORE,e3-e6
833,CNN_20130322_248,sparing,said,e3,e7,EQUAL,EQUAL,e3-e7
834,CNN_20130322_248,expected,begin,e4,e6,BEFORE,BEFORE,e4-e6
835,CNN_20130322_248,expected,said,e4,e7,BEFORE,BEFORE,e4-e7


In [5]:
import logging
from itertools import product

methods = ['zero-shot', 'few-shot']

suffixs_path = [None, 'completion', 'completion-explanation']

model_names = ['gemini-1.5-pro', 'gemini-1.5-flash', "Meta-Llama-3.1-8B-Instruct-Turbo"]

summary_results_lst = []
for (method, suffix_path, model_name) in product(methods, suffixs_path, model_names):
    result_file_suffix = '-'.join(s for s in [model_name, method, suffix_path] if s) 
    try:
        df = summary_results(TRC_RESULTS_PATH / method / f'platinum-results-{result_file_suffix}.csv', 
                        gold_df, 
                        model_name=f'platinum-results-{model_name}-{method}.csv')
        df['method'] = method
        df['suffix_path'] = suffix_path
        df['model_name'] = model_name
        summary_results_lst.append(df)
    except KeyError as e:
        logging.warning(f'No results for {result_file_suffix}, probably old format, error: {e}')
        continue
    except FileNotFoundError as e:
        logging.warning(f'No results for {result_file_suffix}, experiment results not been found')
        continue

  return pd.DataFrame(columns=[col[1] for col in df.columns][:4], data=[labels_values / sum_per_label])
  return pd.DataFrame(columns=[col[1] for col in df.columns][:4], data=[labels_values / sum_per_label])
  return pd.DataFrame(columns=[col[1] for col in df.columns][:4], data=[labels_values / sum_per_label])
  return pd.DataFrame(columns=[col[1] for col in df.columns][:4], data=[labels_values / sum_per_label])


In [6]:
summary_results_df = pd.concat(summary_results_lst)
summary_results_df

relation,VAGUE,VAGUE,VAGUE,BEFORE,BEFORE,BEFORE,AFTER,AFTER,AFTER,EQUAL,EQUAL,EQUAL,micro-f1,relax-micro-f1,cycles,coverage,method,suffix_path,model_name
metric,precision,recall,f1,precision,recall,f1,precision,recall,f1,precision,recall,f1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0,0.177694,0.221698,0.197272,0.2,0.003717,0.007299,0.03125,0.032258,0.031746,0.0,0.0,0.0,0.007659,0.007659,0.0 / 20,0.221027,zero-shot,,gemini-1.5-pro
0,0.077156,0.120283,0.094009,0.222222,0.007435,0.014388,0.0,0.0,0.0,0.006923,0.079646,0.012739,0.016272,0.008729,1.0 / 20,0.210275,zero-shot,,Meta-Llama-3.1-8B-Instruct-Turbo
0,0.570934,0.389151,0.462833,0.625,0.055762,0.102389,0.125,0.096774,0.109091,0.176471,0.026549,0.046154,0.035527,0.046398,0.0 / 16,0.415771,zero-shot,completion,gemini-1.5-pro
0,0.52907,0.214623,0.305369,0.217391,0.074349,0.110803,0.02381,0.032258,0.027397,0.167883,0.40708,0.237726,-0.013243,0.003323,2.0 / 20,0.692951,zero-shot,completion,Meta-Llama-3.1-8B-Instruct-Turbo
0,0.575,0.650943,0.610619,0.657143,0.171004,0.271386,0.181818,0.129032,0.150943,0.236111,0.150442,0.183784,0.103299,0.132796,2.0 / 20,0.769415,zero-shot,completion-explanation,gemini-1.5-pro
0,0.458333,0.181604,0.260135,0.25,0.02974,0.053156,0.0,0.0,0.0,0.14455,0.539823,0.228037,0.363234,0.037009,3.0 / 20,0.732378,zero-shot,completion-explanation,Meta-Llama-3.1-8B-Instruct-Turbo
0,0.2,0.356132,0.256149,0.111111,0.007435,0.013937,0.285714,0.064516,0.105263,0.090909,0.026549,0.041096,0.046826,0.056548,1.0 / 20,0.35006,few-shot,,gemini-1.5-pro
0,0.071429,0.117925,0.088968,0.015385,0.007435,0.010025,0.0,0.0,0.0,0.013699,0.053097,0.021779,0.007138,0.004131,3.0 / 20,0.222222,few-shot,,Meta-Llama-3.1-8B-Instruct-Turbo
0,0.52766,0.877358,0.65899,0.666667,0.081784,0.145695,0.0,0.0,0.0,0.214286,0.106195,0.142012,0.285241,0.412102,0.0 / 20,0.956989,few-shot,completion,gemini-1.5-pro
0,0.449102,0.176887,0.253807,0.44898,0.081784,0.138365,0.0,0.0,0.0,0.122288,0.548673,0.2,0.0968,0.059398,1.0 / 20,0.868578,few-shot,completion,Meta-Llama-3.1-8B-Instruct-Turbo
