In [None]:
import pandas as pd
from utils import get_reverse_dicts, get_possible_outputs
import os
import sys
# Add the parent directory to the system path
current_dir = os.getcwd()  # Get the current working directory
parent_dir = os.path.dirname(current_dir)  # Get the parent directory
sys.path.append(parent_dir)  # Add the parent directory to the path

In [28]:
from Evaluation.HierarchicalEvaluator import HierarchicalEvaluator

In [17]:
data = pd.read_csv('results_menda.csv')

In [22]:
def post_process_pred(pred, conver_dict, level="fallacy"):
    splits = pred.split('. ')
    answer = splits[-1:]
    answer = [a.lower() for a in answer]
    answer = answer[0]
    print(f'Answer: {answer}')
    if level == "fallacy":
        if answer == "yes":
            return 1
        elif answer == "no":
            return 0
        else:
            return -1
    elif level == "category":
        try:
            return conver_dict[answer]
        except KeyError:
            print(f'Unknown category: {answer}')
            return -1
    elif level == "class":
        try:
            return conver_dict[answer]
        except KeyError:
            print(f'Unknown class: {answer}')
            return -1


In [23]:
ntclass, ntcat, ntfallacy = get_reverse_dicts()
processed_df = data.copy()
for index, row in data.iterrows():
    print(f'Processing row {index}')
    detection = row['pred_detection']
    category = row['pred_categories']
    spec_class = row['pred_classes']
    det_pred = post_process_pred(detection, ntfallacy, level="fallacy")
    print(f'Fallacy prediction: {det_pred}')
    cat_pred = post_process_pred(category, ntcat, level="category")
    print(f'Category prediction: {cat_pred}')
    class_pred = post_process_pred(spec_class, ntclass, level="class")
    print(f'Class prediction: {class_pred}')

    # apply processing to processed_df
    processed_df.at[index, 'pred_detection'] = det_pred
    processed_df.at[index, 'pred_categories'] = cat_pred
    processed_df.at[index, 'pred_classes'] = class_pred

Processing row 0
Answer: no
Fallacy prediction: 0
Answer: none
Unknown category: none
Category prediction: -1
Answer: none
Unknown class: none
Class prediction: -1
Processing row 1
Answer: yes
Fallacy prediction: 1
Answer: fallacy of emotion
Category prediction: 0
Answer: appeal to emotion
Class prediction: 0
Processing row 2
Answer: no
Fallacy prediction: 0
Answer: none
Unknown category: none
Category prediction: -1
Answer: none
Unknown class: none
Class prediction: -1
Processing row 3
Answer: no
Fallacy prediction: 0
Answer: fallacy of logic
Category prediction: 2
Answer: false cause
Class prediction: 3
Processing row 4
Answer: no
Fallacy prediction: 0
Answer: fallacy of emotion
Category prediction: 0
Answer: appeal to emotion
Class prediction: 0


In [24]:
# make "gt_detection" into ints instead of floats
processed_df['gt_detection'] = processed_df['gt_detection'].astype(int)
processed_df

Unnamed: 0,index,statement,pred_detection,pred_categories,pred_classes,gt_detection,gt_categories,gt_classes
0,0,I also want to see more companies do profit-sh...,0,-1,-1,0,0,0
1,1,"The best way to defeat them is to never waver,...",1,0,0,0,0,0
2,2,We turn it over to the United Nations.,0,-1,-1,0,0,0
3,3,"Crime has come down the last 2 years, for the ...",0,2,3,1,2,3
4,4,I'm proud of the fact that America is stronger...,0,0,0,1,0,0


In [32]:
detection_preds = processed_df['pred_detection'].tolist()
group_preds = processed_df['pred_categories'].tolist()
classify_preds = processed_df['pred_classes'].tolist()
detection_gt = processed_df['gt_detection'].tolist()
group_gt = processed_df['gt_categories'].tolist()
classify_gt = processed_df['gt_classes'].tolist()

evaluator = HierarchicalEvaluator(num_classes=7, head_type='STL')

print(detection_preds)
print(detection_gt)

# Add predictions and labels to evaluator
for det_p, grp_p, cls_p, det_g, grp_g, cls_g in zip(detection_preds, group_preds, classify_preds, detection_gt, group_gt, classify_gt):
    evaluator.add(
        predictions=(det_p, grp_p, cls_p),
        ground_truth=(det_g, grp_g, cls_g)
    )

print(evaluator)

[0, 1, 0, 0, 0]
[0, 0, 0, 1, 1]
Detection Metrics:
  Accuracy          : 0.4000
  Precision         : 0.0000
  Recall            : 0.0000
  F1                : 0.0000

Category Metrics:
  Overall accuracy  : 0.0000
  Overall Accuracy: 0.0000

      Metric     Class 0     Class 1     Class 2      |  Avg
    Accuracy      0.0000      0.0000      0.0000   |  0.0000
   Precision      0.0000      0.0000      0.0000   |  0.0000
      Recall      0.0000      0.0000      0.0000   |  0.0000
          F1      0.0000      0.0000      0.0000   |  0.0000

Class Metrics:
  Overall accuracy  : 0.0000
  Overall Accuracy: 0.0000

      Metric     Class 0     Class 1     Class 2     Class 3     Class 4     Class 5     Class 6      |  Avg
    Accuracy      0.0000      0.0000      0.0000      0.0000      0.0000      0.0000      0.0000   |  0.0000
   Precision      0.0000      0.0000      0.0000      0.0000      0.0000      0.0000      0.0000   |  0.0000
      Recall      0.0000      0.0000      0.0000    