In [1]:
import os
import pickle
from tqdm import tqdm
import numpy as np
import pandas as pd
from evaluation.utils import sort_constraints

In [2]:
MODEL_NAME = 'label2constraint'
dataset='sap_sam_2022/filtered'
path_to_true_constraints = f'data/{dataset}/constraints_to_log_labels/'
path_to_pred_constraints = f'data/evaluation/{dataset}/validation/new/google/flan-t5-small_checkpoint-118800/'
constraint_type='DECLARE'
constraints_dir = f'data/{dataset}/constraints'
path_to_all_constraint_types_file = os.path.join(constraints_dir,f'ALL_CONSTRAINT_TYPES.{constraint_type}.pkl')
#evaluation_output_file_name = f'evaluation_sap_sam_2022_test_{MODEL_NAME}_{anaysis_tape}.pkl'
tresholds = np.arange(0.5,.975,.01) #[.3, .9]

In [3]:
#with open('../../ml-semantic-anomaly-dection/evaluation_sap_sam_2022_test_case_names.pkl', 'rb') as f:
#    model_case_names = pickle.load(f)
model_case_names = os.listdir('data/evaluation/sap_sam_2022/filtered/validation/new/google/flan-t5-small_checkpoint-118800/')
model_case_names = [i.split('.')[0] for i in model_case_names]
#get all possible constraint types
with open(path_to_all_constraint_types_file,'rb') as f:
    all_constraint_types = pickle.load(f)

In [4]:
def calculate_precision_recall_f1(true_list, prediction_list):
    intersection_num = len(list(set(true_list).intersection(set(prediction_list))))
    recall = intersection_num/len(true_list)
    if len(prediction_list)!=0:
        precision = intersection_num/len(prediction_list)
        if (precision+recall)!= 0:
            f1 = (2*precision*recall)/(precision+recall)
            return precision, recall, f1
        return precision, recall, 0
    else:
        precision = 0
        if (precision+recall)!= 0:
            f1 = (2*precision*recall)/(precision+recall)
            return precision, recall, f1
        return precision, recall, 0

In [5]:
anaysis_tape='for_comparison_set'
evaluation_output_file_name = f'evaluation_sap_sam_2022_validation_{MODEL_NAME}_{anaysis_tape}_new.pkl'
CONSTRAINT_TYPE=['Response', 'Precedence','Succession','Alternate Succession', 'Alternate Precedence','Alternate Response','Choice','Co-Existence']
evaluation_results = []

path_to_all_constraint_types_file = os.path.join(constraints_dir,f'ALL_CONSTRAINT_TYPES.{constraint_type}.pkl')
with open(path_to_all_constraint_types_file,'rb') as f:
    all_constraint_types = pickle.load(f)

for model_case_name in tqdm(model_case_names,desc='calculate prec, rec. and f1'):
    with open(f'{path_to_true_constraints}{model_case_name}.CONSTRAINTS.pkl','rb') as f:
        true_constraints = sort_constraints(pickle.load(f), remove_duplicates=True)
        all_constraint_types_in_model = list(set([i.split('[')[0] for i in true_constraints]))

    with open(f'{path_to_pred_constraints}{model_case_name}.pkl','rb') as f:
        pred_file = pickle.load(f)
    pred_pairs_file=[]
    for i in pred_file:
        pred_pairs_file+=i[1]
    
    for treshold in tresholds:
        pred_pairs_temp = [i for i in pred_pairs_file if i[1]>treshold]
        
        true_pairs=[]
        pred_pairs=[]
        for c in list(all_constraint_types):
            if c in CONSTRAINT_TYPE:
                true_pairs_ = [i.split('[')[1][:-1] for i in true_constraints if i.startswith(c+ '[') ]
                true_pairs+=true_pairs_
                pred_pairs_ = [i[0].split('[')[1][:-1] for i in pred_pairs_temp if i[0].startswith(c+ '[')] 
                pred_pairs+=pred_pairs_
        if len(true_pairs)>0:
            precision, recall, f1 = calculate_precision_recall_f1(true_list=list(set(true_pairs)), prediction_list=list(set(pred_pairs)))
            evaluation_results.append({'constraint_type':anaysis_tape, 'model':MODEL_NAME,'threshold':treshold, 'precision':precision,'recall':recall,'f1':f1, 'case_name':model_case_name})
                   
df = pd.DataFrame(evaluation_results)
df.to_pickle(evaluation_output_file_name)   

calculate prec, rec. and f1: 100%|██████████| 6069/6069 [00:34<00:00, 176.76it/s]


In [6]:
len(df.case_name.unique())

5806

In [7]:
anaysis_tape='seperated'
evaluation_output_file_name = f'evaluation_sap_sam_2022_validation_{MODEL_NAME}_{anaysis_tape}_new.pkl'
CONSTRAINT_TYPE=['Response', 'Precedence','Succession','Alternate Succession', 'Alternate Precedence','Alternate Response','Choice','Co-Existence']
evaluation_results = []


path_to_all_constraint_types_file = os.path.join(constraints_dir,f'ALL_CONSTRAINT_TYPES.{constraint_type}.pkl')
with open(path_to_all_constraint_types_file,'rb') as f:
    all_constraint_types = pickle.load(f)

for model_case_name in tqdm(model_case_names,desc='make predictions'):
    with open(f'{path_to_true_constraints}{model_case_name}.CONSTRAINTS.pkl','rb') as f:
        true_constraints = sort_constraints(pickle.load(f), remove_duplicates=True)
        all_constraint_types_in_model = list(set([i.split('[')[0] for i in true_constraints]))

    with open(f'{path_to_pred_constraints}{model_case_name}.pkl','rb') as f:
        pred_file = pickle.load(f)
    pred_pairs_file=[]
    for i in pred_file:
        pred_pairs_file+=i[1]
        
    for c in list(all_constraint_types):
        if c in all_constraint_types_in_model:
            true_pairs = [i.split('[')[1][:-1] for i in true_constraints if i.startswith(c+ '[') ]
            pred_pairs_temp = [i for i in pred_pairs_file if i[0].startswith(c+ '[')] 
            for treshold in tresholds:
                pred_pairs = [i[0].split('[')[1][:-1] for i in pred_pairs_temp if i[1]>treshold]
                if len(true_pairs)>0:
                    precision, recall,f1 = calculate_precision_recall_f1(true_list=true_pairs, prediction_list=pred_pairs)
                    evaluation_results.append({'constraint_type':c, 'model':MODEL_NAME,'threshold':treshold, 'precision':precision,'recall':recall, 'f1':f1, 'case_name':model_case_name})
                   
df = pd.DataFrame(evaluation_results)
df.to_pickle(evaluation_output_file_name)   

make predictions: 100%|██████████| 6069/6069 [00:11<00:00, 542.03it/s]


In [8]:
#get the best threshold for each constraint 
import pickle
d = df[['constraint_type', 'threshold', 'precision', 'recall','f1']].groupby(['threshold','constraint_type']).mean().reset_index()
thresholds_constraints = {}
for constraint in d.constraint_type.unique():
    #print(constraint)
    #print(d.iloc[d[d['constraint_type']==constraint]['f1'].idxmax()]['threshold'])
    thresholds_constraints[constraint]=d.iloc[d[d['constraint_type']==constraint]['f1'].idxmax()]['threshold']
print(thresholds_constraints)
with open('evaluation_sap_sam_2022_validation_opt_thresholds.pkl', 'wb') as f:
    pickle.dump(thresholds_constraints, f)

{'Alternate Precedence': 0.8300000000000003, 'Alternate Response': 0.7800000000000002, 'Alternate Succession': 0.7600000000000002, 'Choice': 0.7100000000000002, 'Co-Existence': 0.7500000000000002, 'End': 0.7000000000000002, 'Exclusive Choice': 0.7700000000000002, 'Init': 0.7700000000000002, 'Precedence': 0.8000000000000003, 'Response': 0.7400000000000002, 'Succession': 0.7100000000000002}


In [9]:
anaysis_tape='for_comparison_overall_performance'
evaluation_output_file_name = f'evaluation_sap_sam_2022_validation_{MODEL_NAME}_{anaysis_tape}_new.pkl'
CONSTRAINT_TYPE=['Alternate Precedence',
 'Alternate Response',
 'Alternate Succession',
 'Choice',
 'Co-Existence',
 'End',
 'Exclusive Choice',
 'Init',
 'Precedence',
 'Response',
 'Succession']
evaluation_results = []


path_to_all_constraint_types_file = os.path.join(constraints_dir,f'ALL_CONSTRAINT_TYPES.{constraint_type}.pkl')
with open(path_to_all_constraint_types_file,'rb') as f:
    all_constraint_types = pickle.load(f)

for model_case_name in tqdm(model_case_names,desc='make predictions'):
    with open(f'{path_to_true_constraints}{model_case_name}.CONSTRAINTS.pkl','rb') as f:
        true_constraints = sort_constraints(pickle.load(f), remove_duplicates=True)
        all_constraint_types_in_model = list(set([i.split('[')[0] for i in true_constraints]))

    with open(f'{path_to_pred_constraints}{model_case_name}.pkl','rb') as f:
        pred_file = pickle.load(f)
    pred_pairs_file=[]
    for i in pred_file:
        pred_pairs_file+=i[1]
    
    for treshold in tresholds:
        pred_pairs_temp = [i for i in pred_pairs_file if i[1]>treshold]
        
        true_pairs=[]
        pred_pairs=[]
        for c in list(all_constraint_types):
            if c in CONSTRAINT_TYPE:
                true_pairs_ = [i for i in true_constraints if i.startswith(c+ '[') ]
                true_pairs+=true_pairs_
                pred_pairs_ = [i[0] for i in pred_pairs_temp if i[0].startswith(c+ '[')] 
                pred_pairs+=pred_pairs_
        if len(true_pairs)>0:
            precision, recall,f1 = calculate_precision_recall_f1(true_list=true_pairs, prediction_list=pred_pairs)
            evaluation_results.append({'constraint_type':anaysis_tape, 'model':MODEL_NAME,'threshold':treshold, 'precision':precision,'recall':recall,'f1':f1, 'case_name':model_case_name})
                   
df = pd.DataFrame(evaluation_results)
df.to_pickle(evaluation_output_file_name)   

make predictions: 100%|██████████| 6069/6069 [00:36<00:00, 166.50it/s]
