In [None]:
import os
import pandas as pd
from matplotlib.pyplot import figure
import seaborn as sns
import matplotlib.pyplot as plt
from make_benchmarks import *

Combine cafa3_1 and cafa3_2 results, and cafa2_1 and cafa2_1 results

In [None]:
eval_path = '/data/rashika/CAFA4/head2head/collected_eval/'

# Cafa2 paths
cafa2_1_path = eval_path + 'eval_cafa2_1/'
cafa2_2_path = eval_path + 'eval_cafa2_2/'
eval_cafa2 = eval_path + "eval_cafa2/"

# Cafa3 paths
cafa3_1_path = eval_path + 'eval_cafa3_1/'
cafa3_2_path = eval_path + 'eval_cafa3_2/'
eval_cafa3 = eval_path + "eval_cafa3/"

In [None]:
# Combines the results in the results_paths_list and outputs them in combined_results_path
def combine_results(results_paths_list, combined_results_path, add_result_path = False):
    
    #Collect the benchmark folders in each result path
    benchmarks = []
    for result_path in results_paths_list:
        benchmarks.append(set(os.listdir(result_path)))
        
    #Get the common benchmark folders
    benchmarks = list(set.intersection(*benchmarks))
    print("Common benchmarks = ", benchmarks)
    
    # Combine the results in the common benchmarks and save then at the combined_results_path
    for bm in benchmarks:
        if not os.path.exists(combined_results_path + bm):
            os.mkdir(combined_results_path + bm)
            
        result_files = []
        for result_path in results_paths_list:
            result_files.append(set(os.listdir(result_path + bm)))
        
        
        #Get the common result files
        result_files = list(set.intersection(*result_files))
        print("Common Result files = ", result_files)
        
        
        for result_file in result_files:
            dfs = []
            for result_path in results_paths_list:
                df = pd.read_csv(result_path + bm + "/"+result_file, sep = '\t')
                if add_result_path:
                    print(result_path.split("/"))
                    df["Result Path"] = result_path.split("/")[-2]
                    df["cafa"] = result_path.split("/")[-2].split("_")[1]
                    df["cafa_filename"] = df["cafa"] + "_" + df["filename"]
                dfs.append(df)
            
            df = pd.concat(dfs, axis = 0)
            display(df)
            df.to_csv(combined_results_path + bm + "/" + result_file, sep = "\t", index = None, header = True)
        
        #mets_plot = sns.displot([df1.f, df.f], kind="kde",common_norm=False)
        #mets_plot.set(yticks=[])
    

In [None]:
# Combine CAFA2_1 and CAFA2_2 results
#combine_results([cafa2_1_path, cafa2_2_path], eval_cafa2, add_result_path = False)

# Combine CAFA3_1 and CAFA3_2 results
#combine_results([cafa3_1_path, cafa3_2_path], eval_cafa3, add_result_path = False)

Combine CAFA2, CAFA3, CAFA4 results

In [None]:
eval_cafa4 = eval_path + "eval_cafa4/"
combined_results = eval_path + "combined_results/"
combine_results([eval_cafa2, eval_cafa3, eval_cafa4], combined_results, add_result_path = True)

### Distribution of f_max_micro and S_min for each CAFA

In [None]:
## Manually observe the y_limits for S_min distributions
S_min_y_lim = {}
S_min_y_lim['bpo_all_type1'] = [15, 40]
S_min_y_lim['bpo_all_type2'] = [15, 40]
S_min_y_lim['bpo_all_type3'] = [4, 120]
S_min_y_lim['bpo_all_type12'] = [15, 40]
S_min_y_lim['cco_all_type1'] = [3, 15]
S_min_y_lim['cco_all_type2'] = [4, 12]
S_min_y_lim['cco_all_type3'] = [4, 25]
S_min_y_lim['cco_all_type12'] = [5, 12]
S_min_y_lim['mfo_all_type1'] = [0, 12]
S_min_y_lim['mfo_all_type2'] = [0, 15]
S_min_y_lim['mfo_all_type3'] = [0, 25]
S_min_y_lim['mfo_all_type12'] = [0, 15]

benchmarks = os.listdir(combined_results)
for bm in benchmarks:
    f_micro_df = pd.read_csv(combined_results + bm + "/evaluation_best_f_micro.tsv", sep = "\t", header = 0)
    
    # Create a figure with two subplots side by side
    fig, axes = plt.subplots(1, 2, figsize=(10, 2.5))  

    #plt.figure()
    plot = sns.boxplot(x='Result Path', y='f', data = f_micro_df.loc[:, ["f", "Result Path"]], ax=axes[0])
    axes[0].set_title(bm)
    axes[0].set_xlabel("f_max")
    
    S_min_df = pd.read_csv(combined_results + bm + "/evaluation_best_s.tsv", sep = "\t", header = 0)
    plot = sns.boxplot(x='Result Path', y='s', data = S_min_df.loc[:, ["s", "Result Path"]], ax=axes[1])
    axes[1].set_xlabel("S_min")
    axes[1].set_ylim(S_min_y_lim[bm])
    axes[1].set_title(bm)

Combine the register files

In [None]:
cafa2_register = eval_path + "cafa2_register.tab"
cafa3_register = eval_path + "cafa3_register.tab"
cafa4_register = eval_path + "cafa4_register.tab"

In [None]:
registers = [cafa2_register, cafa3_register, cafa4_register]
dfs = []
for register in registers:
    df = pd.read_csv(register, sep = "\t", header = 0)
    df.columns = ['#INTERNAL_ID', 'EXTERNAL_ID', 'TEAM_NAME', 'TYPE', 'DISPLAY_NAME',
       'DUMP_NAME', 'PI', 'KEYWORDS', 'COLOR (RGB in HEX)']
    df["CAFA"] = register.split("/")[-1].split("_")[0]
    dfs.append(df)
    
combined_register = pd.concat(dfs, axis = 0)

In [None]:
combined_register.to_csv(combined_results + "Combined_register.tab", sep = "\t", index = False)

In [None]:
def create_plots_h2_h(results_path, metric, cols,out_path='/home/rashika/CAFA4/eval/plots/', n_curves = None, names_file = None, S_min_coord = None):
    dir_list = os.listdir(results_path)
    
    cumulate = True
    add_extreme_points = True
    coverage_threshold = 0.3
    axis_title_dict = {'pr': 'Precision', 'rc': 'Recall', 'f': 'F-score', 'pr_w': 'Weighted Precision', 'rc_w': 'Weighted Recall', 'f_w': 'Weighted F-score', 'mi': 'Misinformation (Unweighted)', 'ru': 'Remaining Uncertainty (Unweighted)', 'mi_w': 'Misinformation', 'ru_w': 'Remaining Uncertainty', 's': 'S-score', 'pr_micro': 'Precision (Micro)', 'rc_micro': 'Recall (Micro)', 'f_micro': 'F-score (Micro)', 'pr_micro_w': 'Weighted Precision (Micro)', 'rc_micro_w': 'Weighted Recall (Micro)', 'f_micro_w': 'Weighted F-score (Micro)'}
    ontology_dict = {'biological_process': 'BPO', 'molecular_function': 'MFO', 'cellular_component': 'CCO'}
    
    if not os.path.exists(out_path):
        os.mkdir(out_path)
    
    for file in dir_list:
        df_file = results_path + file +"/evaluation_all.tsv"
        df = pd.read_csv(df_file, sep="\t")
        out_folder = out_path + file
        if not os.path.exists(out_folder):
            os.mkdir(out_folder)
            
        
        df = pd.read_csv(df_file, sep="\t")
        
        # Set method information (optional)
        if names_file is None:
            df['group'] = df['cafa_filename']
            df['label'] = df['cafa_filename']
            df['is_baseline'] = False
        else:
            methods = pd.read_csv(names_file, sep = "\t", header=0)
            df = pd.merge(df, methods, on='cafa_filename', how='left')
            df['group'].fillna(df['cafa_filename'], inplace=True)
            df['label'].fillna(df['cafa_filename'], inplace=True)
            if 'is_baseline' not in df:
                df['is_baseline'] = False
            else:
                df['is_baseline'].fillna(False, inplace=True)
            # print(methods)
        #df = df.drop(columns='cafa_filename').set_index(['group', 'label', 'ns', 'tau'])
        df = df.set_index(['group_unique', 'label', 'ns', 'cafa_filename','tau'])
        
        # Filter by coverage
        df = df[df['cov'] >= coverage_threshold]
        
        # Assign colors based on group
        if 'colors' not in df.columns:
            cmap = plt.get_cmap('tab20')
            df['colors'] = df.index.get_level_values('group_unique')
            df['colors'] = pd.factorize(df['colors'])[0]
            df['colors'] = df['colors'].apply(lambda x: cmap.colors[x % len(cmap.colors)])

        index_best = df.groupby(level=['group_unique', 'ns'])[metric].idxmax() if metric in ['f', 'f_w', 'f_micro', 'f_micro_w'] else df.groupby(['group_unique', 'ns'])[metric].idxmin()
        
        # Filter the dataframe for the best methods
        df_methods = df.reset_index('tau').loc[[ele[:-1] for ele in index_best], ['tau', 'cov', 'colors'] + cols + [metric]].sort_index()

        # Makes the curves monotonic. Cumulative max on the last column of the cols variable, e.g. "pr" --> precision
        if cumulate:
            if metric in ['f', 'f_w', 'f_micro', 'f_micro_w']:
                df_methods[cols[-1]] = df_methods.groupby(level=['label', 'ns'])[cols[-1]].cummax()
            else:
                df_methods[cols[-1]] = df_methods.groupby(level=['label', 'ns'])[cols[-1]].cummin()


        # Save to file
        df_methods.drop(columns=['colors']).to_csv('{}/fig_{}.tsv'.format(out_folder, metric), float_format="%.3f", sep="\t")
        
        # Add first last points to precision and recall curves to improve APS calculation
        #def add_points(df_):
        #    df_ = pd.concat([df_.iloc[0:1], df_])
        #    df_.iloc[0, df_.columns.get_indexer(['tau', cols[0], cols[1]])] = [0, 1, 0]  # tau, rc, pr
        #    df_ = pd.concat([df_, df_.iloc[-1:]])
        #    df_.iloc[-1, df_.columns.get_indexer(['tau', cols[0], cols[1]])] = [1.1, 0, 1]
        #    return df_

        #if metric.startswith('f') and add_extreme_points:
        #    df_methods = df_methods.reset_index().groupby(['group_unique', 'label', 'ns'], as_index=False).apply(add_points).set_index(['group_unique', 'label', 'ns'])
        
        # Filter the dataframe for the best method and threshold
        df_best = df.loc[index_best, ['cov', 'colors'] + cols + [metric]]
        
        # Calculate average precision score 
        #if metric.startswith('f'):
        #    df_best['aps'] = df_methods.groupby(level=['group_unique', 'label', 'ns'])[[cols[0], cols[1]]].apply(lambda x: (x[cols[0]].diff(-1).shift(1) * x[cols[1]]).sum())

        # Calculate the max coverage across all thresholds
        df_best['max_cov'] = df_methods.groupby(level=['group_unique', 'label', 'ns'])['cov'].max()
        
        # Set a label column for the plot legend
        df_best['label'] = df_best.index.get_level_values('group_unique')
        df_best['label'] = df_best.agg(lambda x: f"{x['label']} ({metric.upper()}={x[metric]:.3f} C={x['max_cov']:.3f})", axis=1)
#         if 'aps' not in df_best.columns:
#             df_best['label'] = df_best.agg(lambda x: f"{x['label']} ({metric.upper()}={x[metric]:.3f} C={x['max_cov']:.3f})", axis=1)
#         else:
#             df_best['label'] = df_best.agg(lambda x: f"{x['label']} ({metric.upper()}={x[metric]:.3f} APS={x['aps']:.3f} C={x['max_cov']:.3f})", axis=1)
        
        # Generate the figures
        plt.rcParams.update({'font.size': 22, 'legend.fontsize': 18})

        # F-score contour lines
        x = np.arange(0.01, 1, 0.01)
        y = np.arange(0.01, 1, 0.01)
        X, Y = np.meshgrid(x, y)
        Z = 2 * X * Y / (X + Y)

        
        for ns, df_g in df_best.groupby(level='ns'):
            fig, ax = plt.subplots(figsize=(15, 15))

             # Contour lines. At the moment they are provided only for the F-score
            if metric.startswith('f'):
                CS = ax.contour(X, Y, Z, np.arange(0.1, 1.0, 0.1), colors='gray')
                ax.clabel(CS, inline=True) #, fontsize=10)

            cnt = -1
            # Iterate methods
            for i, (index, row) in enumerate(df_g.sort_values(by=[metric, 'max_cov'], ascending=[False if metric.startswith('f') else True, False]).iterrows()):
                
                cnt+=1
                #print(row)
                if (n_curves and cnt <= n_curves) or ('BLAST' in row['label']) or ('Naive' in row['label']):
                
                    #data = df_methods.loc[index[:-1]]

                    data = df_methods.loc[index[:-2]]
                    print(row[cols[0]], row[cols[1]])
    
    
                    # Precision-recall or mi-ru curves
                    if ('BLAST' in row['label']) or ('Naive' in row['label']):
                        ax.plot(data[cols[0]], data[cols[1]], linestyle='dotted', color=row['colors'], label=row['label'], lw=3.5, zorder=500-i)
                    else:
                        ax.plot(data[cols[0]], data[cols[1]], color=row['colors'], label=row['label'], lw=3.5, zorder=500-i)

                    # F-max or S-min dots
                    ax.plot(row[cols[0]], row[cols[1]], color=row['colors'], marker='o', markersize=12, mfc='none', zorder=1000-i)
                    ax.plot(row[cols[0]], row[cols[1]], color=row['colors'], marker='o', markersize=6, zorder=1000-i)

                
                
            # Set axes limit
            print(file)
            if metric.startswith('f') and S_min_coord:
                plt.xlim(S_min_coord[file][0])
                plt.ylim(S_min_coord[file][1])
            
            #Set axes limit
            #if metric.startswith('s'):
            #    plt.xlim(23, 28)
            #    plt.ylim(0, 50)
                #plt.xlim(0.4*df_best.loc[:,:,ns,:][cols[0]].max(), df_best.loc[:,:,ns,:][cols[0]].max())
                #plt.ylim(0, 1)

            # plt.xlim(0, max(1, df_best.loc[:,:,ns,:][cols[0]].max()))
            # plt.ylim(0, max(1, df_best.loc[:,:,ns,:][cols[1]].max()))

            # Set titles
            ax.set_title(file)
            ax.set_xlabel(axis_title_dict[cols[0]], labelpad=20, fontsize=36)
            ax.set_ylabel(axis_title_dict[cols[1]], labelpad=20, fontsize=36)

            # Legend
            #ax.legend(loc='center right', bbox_to_anchor=(1, 1.5))
            leg = ax.legend(markerscale=6, title=file)
            for legobj in leg.get_lines():
                legobj.set_linewidth(10.0)
                
            leg.set_bbox_to_anchor((1.65, 0.75))  

            # Save figure on disk
            plt.savefig("{}/fig_{}_{}.png".format(out_folder, metric, ns), bbox_inches='tight', dpi=300, transparent=True)
            # plt.clf()




In [None]:
S_min_coord = {}
S_min_coord['bpo_all_type1'] = [[17, 27], [0, 50]]
S_min_coord['bpo_all_type2'] = [[12, 22], [0, 40]]
S_min_coord['bpo_all_type3'] = [[16, 22], [0, 100]]
S_min_coord['bpo_all_type12'] = [[15, 24], [0, 60]]
S_min_coord['cco_all_type1'] = [[7, 10], [0, 20]]
S_min_coord['cco_all_type2'] = [[5, 8], [0, 15]]
S_min_coord['cco_all_type3'] = [[7, 10.5], [0, 15]]
S_min_coord['cco_all_type12'] = [[6, 9], [0, 15]]
S_min_coord['mfo_all_type1'] = [[2, 4.5], [0, 12]]
S_min_coord['mfo_all_type2'] = [[3, 6], [0, 20]]
S_min_coord['mfo_all_type3'] = [[7, 10], [0, 15]]
S_min_coord['mfo_all_type12'] = [[3, 5], [0, 20]]

In [None]:
plots_path = eval_path+ 'combined_plots/'
plots_path_f = plots_path + 'f/'
metric, cols = ('f', ['rc', 'pr'])
register = '/data/rashika/CAFA4/head2head/collected_eval/register_combined_modified.tsv'
create_plots_h2_h(combined_results, metric, cols, out_path = plots_path_f, n_curves = 10, names_file =register)

plots_path_f_micro = plots_path + 'f_micro/'
metric, cols =  ('f_micro', ['rc_micro', 'pr_micro'])
create_plots_h2_h(combined_results, metric, cols, out_path = plots_path_f_micro, n_curves = 10, names_file =register)

plots_path_s_w = plots_path + 's_w/'
metric, cols = ('s_w', ['ru_w', 'mi_w'])
create_plots_h2_h(combined_results, metric, cols, out_path = plots_path_s_w, n_curves = 10, names_file =register)




In [None]:
eval_path