In [1]:
import pandas as pd
import pandas.api.types
from sklearn.metrics import f1_score
import ast


class ParticipantVisibleError(Exception):
    """Custom exception for participant-visible errors."""
    pass


def score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str) -> float:
    """
    Compute span-level F1 score based on overlap.

    Parameters:
    - solution (pd.DataFrame): Ground truth DataFrame with row ID and token labels.
    - submission (pd.DataFrame): Submission DataFrame with row ID and token labels.
    - row_id_column_name (str): Column name for the row identifier.

    Returns:
    - float: The token-level weighted F1 score.

    Example:
    >>> solution = pd.DataFrame({
    ...     "id": [1, 2, 3],
    ...     "trigger_words": [[(612, 622), (725, 831)], [(300, 312)], []]
    ... })
    >>> submission = pd.DataFrame({
    ...     "id": [1, 2, 3],
    ...     "trigger_words": [[(612, 622), (700, 720)], [(300, 312)], [(100, 200)]]
    ... })
    >>> score(solution, submission, "id")
    0.16296296296296295
    """
    if not all(col in solution.columns for col in ["id", "trigger_words"]):
        raise ValueError("Solution DataFrame must contain 'id' and 'trigger_words' columns.")
    if not all(col in submission.columns for col in ["id", "trigger_words"]):
        raise ValueError("Submission DataFrame must contain 'id' and 'trigger_words' columns.")
    
    def safe_parse_spans(trigger_words):
        if isinstance(trigger_words, str):
            try:
                return ast.literal_eval(trigger_words)
            except (ValueError, SyntaxError):
                return []
        if isinstance(trigger_words, (list, tuple)):
            return trigger_words
        return []

    def extract_tokens_from_spans(spans):
        tokens = set()
        for start, end in spans:
            tokens.update(range(start, end))
        return tokens
    
    solution = solution.copy()
    submission = submission.copy()

    solution["trigger_words"] = solution["trigger_words"].apply(safe_parse_spans)
    submission["trigger_words"] = submission["trigger_words"].apply(safe_parse_spans)

    merged = pd.merge(
        solution,
        submission,
        on="id",
        suffixes=("_solution", "_submission")
    )

    total_true_tokens = 0
    total_pred_tokens = 0
    overlapping_tokens = 0

    for _, row in merged.iterrows():
        true_spans = row["trigger_words_solution"]
        pred_spans = row["trigger_words_submission"]

        true_tokens = extract_tokens_from_spans(true_spans)
        pred_tokens = extract_tokens_from_spans(pred_spans)

        total_true_tokens += len(true_tokens)
        total_pred_tokens += len(pred_tokens)
        overlapping_tokens += len(true_tokens & pred_tokens)

    precision = overlapping_tokens / total_pred_tokens if total_pred_tokens > 0 else 0
    recall = overlapping_tokens / total_true_tokens if total_true_tokens > 0 else 0
    f1 = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    return f1

In [2]:
import pandas as pd

df = pd.read_parquet('train.parquet')

ssubmission = pd.read_csv('sample_submission.csv')
test = pd.read_csv('test.csv')

In [6]:
score(solution=ssubmission, submission=ssubmission, row_id_column_name='id')

1.0

In [9]:
score(solution=df, submission=df, row_id_column_name='id')

0

In [18]:
from copy import deepcopy

df_ss = deepcopy(df)

In [22]:
def safe_string(row):
    if row is None:
        return '[]'
    else:
        return str([(s[0], s[1]) for s in row])

df_ss['trigger_words'] = df_ss.trigger_words.apply(safe_string)

In [26]:
score(solution=df_ss, submission=df_ss, row_id_column_name='id')

1.0

In [37]:
dfnotna = df.dropna(subset=['trigger_words'])

In [43]:
total_len = dfnotna.content.str.len()

In [44]:
used_len = dfnotna.trigger_words.apply(lambda x: sum([y[1] - y[0] for y in x]))

In [46]:
(used_len/total_len).mean()

0.41976577139166515

In [54]:
from copy import deepcopy

full_len_sub = deepcopy(df_ss)
full_len_sub['trigger_words'] = full_len_sub.content.str.len().apply(lambda x: str([(0, x)]))

In [55]:
score(solution=df_ss, submission=full_len_sub, row_id_column_name='id')

0.3959288009776071

In [68]:
test = pd.read_csv('test.csv')

In [69]:
full_len_sub = deepcopy(test)
full_len_sub['trigger_words'] = full_len_sub.content.str.len().apply(lambda x: str([(0, x)]))

In [73]:
full_len_sub[['id', 'trigger_words']].to_csv('submissions/naive_baseline_full_length_cv0.396.csv', index=False)

In [57]:
from copy import deepcopy

half_len_sub = deepcopy(df_ss)
half_len_sub['trigger_words'] = half_len_sub.content.str.len().apply(lambda x: str([(int(0.3*x), int(x - 0.3*x))]))

In [59]:
score(solution=df_ss, submission=half_len_sub, row_id_column_name='id')

0.2887426263234015