In [None]:
# Import required libraries
import pandas as pd

In [None]:
# Define the function to blend submissions
def blend_submissions(sub1_path, sub2_path, sub3_path, weights, output_path):
    # Load the three submission files
    sub1 = pd.read_csv(sub1_path, sep="\t", header=None, names=["sequence_id", "go_term", "score"])
    sub2 = pd.read_csv(sub2_path, sep="\t", header=None, names=["sequence_id", "go_term", "score"])
    sub3 = pd.read_csv(sub3_path, sep="\t", header=None, names=["sequence_id", "go_term", "score"])

    # Extract weights for each submission
    w1, w2, w3 = weights

    # Merge all three submissions on sequence_id and go_term
    merged = sub1.merge(sub2, on=["sequence_id", "go_term"], suffixes=("_1", "_2"))
    merged = merged.merge(sub3, on=["sequence_id", "go_term"])
    merged.rename(columns={"score": "score_3"}, inplace=True)

    # Compute the blended score using weighted sum
    merged["blended_score"] = (
        w1 * merged["score_1"] +
        w2 * merged["score_2"] +
        w3 * merged["score_3"]
    )

    # Select final columns
    final = merged[["sequence_id", "go_term", "blended_score"]]

    # Save the blended output
    final.to_csv(output_path, sep="\t", index=False, header=False)

In [None]:
# Define the main function
def main():
    # Define input file paths
    sub1_path = "/kaggle/input/cafa-6-protein-function-vault/submission (1).tsv"
    sub2_path = "/kaggle/input/cafa-6-protein-function-vault/submission (2).tsv"
    sub3_path = "/kaggle/input/cafa-6-protein-function-vault/submission (3).tsv"

    # Define output file path
    output_path = "submission.tsv"

    # Define blending weights (do not need to sum to 1)
    weights = (1.2, 0.9, 0.7)

    # Call the blending function
    blend_submissions(sub1_path, sub2_path, sub3_path, weights, output_path)

In [None]:
# Call the main function
if __name__ == "__main__":
    main()