In [None]:
import matplotlib.pyplot as plt
import os
import numpy as np
import pandas as pd

In [None]:
os.getcwd()
os.chdir("..")
os.getcwd()

In [None]:
def get_results_ablation(result_df):
    # Aggregate over runs with different random seeds for prompt-tuned models.
    # Ablation is done on validation set.
    return result_df["val"].mean(), result_df["val"].std()

In [None]:
def get_results_trainvaltest(result_df):
    # Aggregate over runs with different random seeds for prompt-tuned models.
    # Results for training, validation and test set
    return result_df["train"].mean(), result_df["train"].std(), result_df["val"].mean(), result_df["val"].std(), result_df["test"].mean(), result_df["test"].std()

In [None]:
def get_results_comparison_to_basemodel(result_df):
    # Aggregate over runs with different random seeds for prompt-tuned models.
    # Base model model wasn't affected by different random seeds.
    return result_df["prompt_tuned"].mean(), result_df["prompt_tuned"].std(), result_df["base"][0]

In [None]:
def get_results_protcnn_family_percentage_single_df(result_df):
    # The percentage of proteins that were classified as the family
    return (result_df["is_family"].sum() / len(result_df["is_family"])) * 100

In [None]:
def get_results_protcnn_family_percentage_aggregated(result_dfs):
    percentages = [get_results_protcnn_family_percentage_single_df(df) for df in result_dfs]
    return np.mean(percentages), np.std(percentages)

In [None]:
def generate_results_table_protcnn_comparison(column_names, row_names, result_dfs_protcnn_models, result_df_protcnn_testdata):
    table=pd.DataFrame(columns=["Test Dataset"] + column_names, index=row_names)
    for i, result_dfs_protcnn_model in enumerate(result_dfs_protcnn_models):
        r_prompttuned_mean, r_prompttuned_std = get_results_protcnn_family_percentage_aggregated(result_dfs_protcnn_model)
        table[column_names[i]] = [f"{np.round(r_prompttuned_mean, decimals=1)} ± {np.round(r_prompttuned_std, decimals=1)}"]
    r_testdata = get_results_protcnn_family_percentage_single_df(result_df_protcnn_testdata)
    table["Test Dataset"] = [f"{np.round(r_testdata, decimals=1)}"]
    return table

In [None]:
def generate_results_table_for_clustering_comparison(column_names, result_dfs_clusterings):
    table=pd.DataFrame(columns=column_names, index=["Train", "Validation", "Test"])
    for i, df in enumerate(result_dfs_clusterings):
            r_train_mean, r_train_std, r_val_mean, r_val_std, r_test_mean, r_test_std, = get_results_trainvaltest(df)
            table[column_names[i]] = [f"{np.round(r_train_mean, decimals=2)} ± {np.round(r_train_std, decimals=2)}", f"{np.round(r_val_mean, decimals=2)} ± {np.round(r_val_std, decimals=2)}", f"{np.round(r_test_mean, decimals=2)} ± {np.round(r_test_std, decimals=2)}"]
    return table

In [None]:
def generate_results_table_for_ablation_study(column_names, row_names, result_dfs_ablations):
    # [[s,m,l], [s,m,l]]
    table=pd.DataFrame(columns=column_names, index=row_names)
    for j, result_dfs_for_ablation in enumerate(result_dfs_ablations):
        for i, df in enumerate(result_dfs_for_ablation):
            r_mean, r_std = get_results_ablation(df)
            table[column_names[i]][row_names[j]] = f"{np.round(r_mean, decimals=2)} ± {np.round(r_std, decimals=2)}"
    return table

In [None]:
def generate_results_table_for_comparison_to_basemodel(column_names, result_dfs):
    table=pd.DataFrame(columns=column_names, index=["Prompt-tuned model (ours)", "Base model"])
    for i, df in enumerate(result_dfs):
        r_prompttuned_mean, r_prompttuned_std, r_basemodel = get_results_comparison_to_basemodel(df)
        table[column_names[i]] = [f"{np.round(r_prompttuned_mean, decimals=2)} ± {np.round(r_prompttuned_std, decimals=2)}", np.round(r_basemodel, decimals=2)]
    return table

# Results of prompt tuning (with defaults) for all model sizes, comparison to base model

In [None]:
test_results_defaults_S = pd.read_csv("experiment_results/prompt-tuning-clustered-100-RITA_s-fromvocab-True-test_perplexity_comparison.csv")
test_results_defaults_M = pd.read_csv("experiment_results/prompt-tuning-clustered-100-RITA_m-fromvocab-True-test_perplexity_comparison.csv")
test_results_defaults_L = pd.read_csv("experiment_results/prompt-tuning-clustered-100-RITA_l-fromvocab-True-test_perplexity_comparison.csv")

In [None]:
table_defaults_all_sizes = generate_results_table_for_comparison_to_basemodel(["S", "M", "L"], [test_results_defaults_S, test_results_defaults_M, test_results_defaults_L])
table_defaults_all_sizes.to_csv("experiment_results/final_tables/results_perplexity_model_sizes.csv")
table_defaults_all_sizes

# Results for ablation study random uniform init VS from vocab

In [None]:
trainvaltest_results_fromvocab_S = pd.read_csv("experiment_results/prompt-tuning-clustered-100-RITA_s-fromvocab-True-trainvaltest_comparison.csv")
trainvaltest_results_fromvocab_M = pd.read_csv("experiment_results/prompt-tuning-clustered-100-RITA_m-fromvocab-True-trainvaltest_comparison.csv")

In [None]:
trainvaltest_results_randomuniform_S = pd.read_csv("experiment_results/prompt-tuning-clustered-100_s_random_init-RITA_s-fromvocab-False-trainvaltest_comparison.csv")
trainvaltest_results_randomuniform_M = pd.read_csv("experiment_results/prompt-tuning-clustered-100_m_random_init-RITA_m-fromvocab-False-trainvaltest_comparison.csv")

In [None]:
table_ablation_study = generate_results_table_for_ablation_study(["S", "M"], ["Sampled Vocab", "Random Uniform"],[[trainvaltest_results_fromvocab_S, trainvaltest_results_fromvocab_M], [trainvaltest_results_randomuniform_S, trainvaltest_results_randomuniform_M]])
table_ablation_study.to_csv("experiment_results/final_tables/results_perplexity_ablation_study.csv")
table_ablation_study

# Results for different clustering sequence similarity thresholds

In [None]:
trainvaltest_results_clustering_100 = pd.read_csv("experiment_results/prompt-tuning-clustered-100-RITA_s-fromvocab-True-trainvaltest_comparison.csv")
trainvaltest_results_clustering_95 = pd.read_csv("experiment_results/prompt-tuning-clustered-95-RITA_s-fromvocab-True-trainvaltest_comparison.csv")
trainvaltest_results_clustering_65 = pd.read_csv("experiment_results/prompt-tuning-clustered-65-RITA_s-fromvocab-True-trainvaltest_comparison.csv")
trainvaltest_results_clustering_35 = pd.read_csv("experiment_results/prompt-tuning-clustered-35-RITA_s-fromvocab-True-trainvaltest_comparison.csv")

In [None]:
table_trainvaltest_clustering = generate_results_table_for_clustering_comparison([100, 95, 65, 35], [trainvaltest_results_clustering_100,trainvaltest_results_clustering_95, trainvaltest_results_clustering_65, trainvaltest_results_clustering_35])
table_trainvaltest_clustering.to_csv("experiment_results/final_tables/results_perplexity_clustering_trainvaltest.csv")
table_trainvaltest_clustering

In [None]:
basemodel_results_clustering_100 = pd.read_csv("experiment_results/prompt-tuning-clustered-100-RITA_s-fromvocab-True-test_perplexity_comparison.csv")
basemodel_results_clustering_95 = pd.read_csv("experiment_results/prompt-tuning-clustered-95-RITA_s-fromvocab-True-test_perplexity_comparison.csv")
basemodel_results_clustering_65 = pd.read_csv("experiment_results/prompt-tuning-clustered-65-RITA_s-fromvocab-True-test_perplexity_comparison.csv")
basemodel_results_clustering_35 = pd.read_csv("experiment_results/prompt-tuning-clustered-35-RITA_s-fromvocab-True-test_perplexity_comparison.csv")

In [None]:
table_basemodelcomparison_clustering = generate_results_table_for_comparison_to_basemodel([100, 95, 65, 35], [basemodel_results_clustering_100, basemodel_results_clustering_95, basemodel_results_clustering_65, basemodel_results_clustering_35])
table_basemodelcomparison_clustering.to_csv("experiment_results/final_tables/results_perplexity_clustering_basemodelcomparison.csv")
table_basemodelcomparison_clustering

# Results for protCNN predictions

In [None]:
protcnn_results_clustering_100 = [pd.read_csv(f"experiment_results/protcnn_results/prompt-tuning-clustered-100-RITA_s-fromvocab-True-seed-{i}-generated.fasta_protcnn_results.csv") for i in range(3)]
protcnn_results_clustering_95 = [pd.read_csv(f"experiment_results/protcnn_results/prompt-tuning-clustered-95-RITA_s-fromvocab-True-seed-{i}-generated.fasta_protcnn_results.csv") for i in range(3)]
protcnn_results_clustering_65 = [pd.read_csv(f"experiment_results/protcnn_results/prompt-tuning-clustered-65-RITA_s-fromvocab-True-seed-{i}-generated.fasta_protcnn_results.csv") for i in range(3)]
protcnn_results_clustering_35 = [pd.read_csv(f"experiment_results/protcnn_results/prompt-tuning-clustered-35-RITA_s-fromvocab-True-seed-{i}-generated.fasta_protcnn_results.csv") for i in range(3)]
protcnn_results_basemodel = [pd.read_csv(f"experiment_results/protcnn_results/basemodel-RITA_s-generated.fasta_protcnn_results.csv") for i in range(3)]

In [None]:
protcnn_results_testdata = pd.read_csv("experiment_results/protcnn_results/InterProUniprotPF03272prepared_test.fasta_protcnn_results.csv")

In [None]:
table_protcnn_comparison_clustering = generate_results_table_protcnn_comparison([100, 95, 65, 35], ["Prompt-tuned model (ours)"], [protcnn_results_clustering_100, protcnn_results_clustering_95, protcnn_results_clustering_65, protcnn_results_clustering_35], protcnn_results_testdata)
table_protcnn_comparison_clustering.to_csv("experiment_results/final_tables/results_protcnn_clustering.csv")
table_protcnn_comparison_clustering

In [None]:
protcnn_results_S = [pd.read_csv(f"experiment_results/protcnn_results/prompt-tuning-clustered-100-RITA_s-fromvocab-True-seed-{i}-generated.fasta_protcnn_results.csv") for i in range(3)]
protcnn_results_M = [pd.read_csv(f"experiment_results/protcnn_results/prompt-tuning-clustered-100-RITA_m-fromvocab-True-seed-{i}-generated.fasta_protcnn_results.csv") for i in range(3)]
protcnn_results_L = [pd.read_csv(f"experiment_results/protcnn_results/prompt-tuning-clustered-100-RITA_l-fromvocab-True-seed-{i}-generated.fasta_protcnn_results.csv") for i in range(3)]

In [None]:
protcnn_results_SBase = [pd.read_csv(f"experiment_results/protcnn_results/basemodel-RITA_s-generated.fasta_protcnn_results.csv")]
protcnn_results_MBase = [pd.read_csv(f"experiment_results/protcnn_results/basemodel-RITA_m-generated.fasta_protcnn_results.csv")]
protcnn_results_LBase = [pd.read_csv(f"experiment_results/protcnn_results/basemodel-RITA_l-generated.fasta_protcnn_results.csv")]

In [None]:
table_protcnn_comparison_model_sizes = generate_results_table_protcnn_comparison(["S", "M", "L"], ["Prompt-tuned model (ours)"], [protcnn_results_S, protcnn_results_M, protcnn_results_L], protcnn_results_testdata)
table_protcnn_comparison_model_sizes_base = generate_results_table_protcnn_comparison(["S", "M", "L"], ["Base model"], [protcnn_results_SBase, protcnn_results_MBase, protcnn_results_LBase], protcnn_results_testdata)


In [None]:
table_protcnn_comparison_model_sizes_all = pd.concat([table_protcnn_comparison_model_sizes, table_protcnn_comparison_model_sizes_base])
table_protcnn_comparison_model_sizes_all.to_csv("experiment_results/final_tables/results_protcnn_model_sizes.csv")
table_protcnn_comparison_model_sizes_all