In [None]:
import numpy as np
import pandas as pd

def compute_iou(span1, span2):
    """Compute the Intersection over Union (IoU) between two spans."""
    start1, end1 = span1
    start2, end2 = span2
    inter_start = max(start1, start2)
    inter_end = min(end1, end2)
    inter_length = max(0, inter_end - inter_start)
    union_start = min(start1, start2)
    union_end = max(end1, end2)
    union_length = union_end - union_start
    return inter_length / union_length if union_length != 0 else 0

def extract_spans(labels):
    spans = []
    start = None
    for i, label in enumerate(labels):
        if label == 1 and start is None:
            start = i  # Start of a new span
        elif label == 0 and start is not None:
            spans.append((start, i))  # End the span
            start = None
    if start is not None:  # Handle last span
        spans.append((start, len(labels)))
    return spans

def compute_text_iou_f1(pred_labels, true_labels, iou_threshold=0.5):
    """Compute IoU-based precision, recall, and F1 score for a single instance."""
    pred_spans = extract_spans(pred_labels)
    true_spans = extract_spans(true_labels)

    if not pred_spans or not true_spans:
        # Handle cases where there are no predicted or true spans
        tp = np.zeros(len(pred_spans))
        fp = len(pred_spans)
        fn = len(true_spans)
        return tp, fp, fn

    ious = np.array([[compute_iou(pred, true) for true in true_spans] for pred in pred_spans])

    # True Positives (TP)
    tp = np.sum(ious > iou_threshold, axis=1)

    # False Positives (FP)
    fp = np.sum(ious <= iou_threshold, axis=1)

    # False Negatives (FN)
    fn = np.sum(np.all(ious <= iou_threshold, axis=0))

    return tp, fp, fn

def compute_overall_f1(predicted_column, ground_truth_column, iou_threshold=0.5):
    """Compute overall precision, recall, and F1 score across all instances."""
    total_tp, total_fp, total_fn = 0, 0, 0

    for pred_labels, true_labels in zip(predicted_column, ground_truth_column):
        tp, fp, fn = compute_text_iou_f1(pred_labels, true_labels, iou_threshold)
        total_tp += np.sum(tp)
        total_fp += np.sum(fp)
        total_fn += fn

    print(total_tp,total_fp,total_fn)
    precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0
    recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    return precision, recall, f1

df = pd.read_excel('read file here')

df = df[df['label'] == 1]

predicted_column = df['label'].apply(eval).tolist()  # Convert string representation of lists to actual lists
ground_truth_column = df['rationale'].apply(eval).tolist()  # Convert string representation of lists to actual lists

# Calculate overall F1 score
avg_precision, avg_recall, avg_f1 = compute_overall_f1(predicted_column, ground_truth_column)

print(f"Overall Precision: {avg_precision:.4f}")
print(f"Overall Recall: {avg_recall:.4f}")
print(f"Overall F1 Score: {avg_f1:.4f}")