In [1]:
# library
import os

In [2]:
def create_label_dict(label_path):
    """
    Create a dictionary of labeled data from a file.

    Parameters:
    - label_path (str): The path to the file containing labeled data.

    Returns:
    - dict: A dictionary where each key is an ID and the corresponding value is a list of labels associated with that ID.
    """
    label_dict = {}  # y
    with open(label_path, "r", encoding="utf-8-sig") as f:
        file_text = f.read().strip()

    # Parse each line of the file and create the label dictionary
    for line in file_text.split("\n"):
        sample = line.split("\t")
        sample[2], sample[3] = int(sample[2]), int(sample[3])

        if sample[0] not in label_dict:
            label_dict[sample[0]] = [sample[1:]]
        else:
            label_dict[sample[0]].append(sample[1:])

    return label_dict


## Load predictions

In [3]:
val_label_path = "./submission/answer42.txt"
ans42 = create_label_dict(val_label_path)
val_label_path = "./submission/answer43.txt"
ans43 = create_label_dict(val_label_path)
val_label_path = "./submission/answer44.txt"
ans44 = create_label_dict(val_label_path)
val_label_path = "./submission/answer45.txt"
ans45 = create_label_dict(val_label_path)
val_label_path = "./submission/answer46.txt"
ans46 = create_label_dict(val_label_path)

In [4]:
all_predictions=[ans42,ans43,ans44,ans45,ans46]

## Combine all predictions to one prediction file

In [5]:
from collections import defaultdict, Counter

def vote_predictions(prediction_dicts):
    """
    Combine multiple prediction dictionaries using a voting mechanism.

    Parameters:
    - prediction_dicts (list): A list of dictionaries containing predictions.

    Returns:
    - dict: A final dictionary of predictions, resolving conflicts by majority voting.
    """
    all_predictions = defaultdict(list)

    # Combine predictions from different dictionaries
    for prediction_dict in prediction_dicts:
        for doc_id, entities in prediction_dict.items():
            all_predictions[doc_id].extend(entities)

    final_predictions = {}

    # Resolve conflicts and determine final predictions
    for doc_id, entities in all_predictions.items():
        voted_entities = defaultdict(Counter)

        # Count votes for each (label, start, end) combination
        for entity in entities:
            label, start, end, pred_value = entity
            voted_entities[(label, start, end)].update([pred_value])

        final_entities = []

        # Select the most common prediction for each (label, start, end) combination
        for key, counter in voted_entities.items():
            most_common_pred, _ = counter.most_common(1)[0]
            final_entities.append(list(key) + [most_common_pred])

        final_predictions[doc_id] = final_entities

    return final_predictions

In [6]:
final_predictions = vote_predictions(all_predictions)

In [7]:
def write_processed_labels_to_file(processed_label_dict, output_path):
    """
    Write processed labels from a dictionary to a file.

    Parameters:
    - processed_label_dict (dict): A dictionary where each key is a sample ID,
      and the corresponding value is a list of processed labels.
    - output_path (str): The path to the output file.

    Returns:
    - None
    """
    with open(output_path, "w", encoding="utf-8") as f:
        for sample_id, labels in processed_label_dict.items():
            for label in labels:
                output_line = f"{sample_id}\t{label[0]}\t{label[1]}\t{label[2]}\t{label[3]}"

                # Include additional fields if present
                if len(label) > 4:
                    output_line += f"\t{label[4]}"

                output_line += "\n"
                f.write(output_line)

In [8]:

output_path = "./submission/vote_answer.txt"

write_processed_labels_to_file(final_predictions, output_path)