In [23]:
import os 
import pandas as pd 
import numpy as np 
pd.options.mode.chained_assignment = None

In [3]:
DATAPATH = "/work/PCDC/s202005/Research/TransferLearning/Shelburne/Results/summary/\
Single_1_Anti_2_Dense_1024_Ensemble/Epochs_100_Batch_128/Epochs_100_Batch_128_total.csv"
DATA = pd.read_csv(DATAPATH)

In [21]:
def get_results(data,epochs_list,batch_list,pred_weights_list,input_weights_list,metric):
    if not metric:
        return {}
    ## get data separately 
    tempdata = data.copy(deep=True)
    if epochs_list:
        tempdata = tempdata[tempdata.config_epochs.isin(epochs_list)]
    if batch_list:
        tempdata = tempdata[tempdata.config_batch.isin(batch_list)]
    ## 2. get parp data
    ### stratgey: For the same seed + cv, the PARP should be the same 
    temp_parp = tempdata[(tempdata.model=="PARP")]
    temp_cols = [ele for ele in temp_parp.columns if "config" not in ele] + ["config_epochs","config_batch"]
    temp_parp = temp_parp[temp_cols]
    temp_parp.drop_duplicates(inplace=True)
    temp_parp["config_method"] = "PARP"
    #temp_parp["config"] = temp_parp.apply(lambda x:"Epochs_"+str(x["config_epochs"])+"_Batch_"+str(x["config_batch"])+"_PARP",axis=1)
    def parp_config(epochs,batch):
        return "Model: PARP <br>" + "Epochs: "+str(epochs)+"<br>Batch: "+str(batch)
    temp_parp["config"] = temp_parp.apply(lambda x: parp_config(x["config_epochs"],x["config_batch"]),axis=1)
        

    ## 3. get DTL data 
    temp_DTL = tempdata[tempdata.model=="DTL"]
    if pred_weights_list:
        temp_DTL = temp_DTL[temp_DTL.config_pred_weights.isin(pred_weights_list)]
    if input_weights_list:
        temp_DTL = temp_DTL[temp_DTL.config_input_weights.isin(input_weights_list)]
    def dtl_config(epochs,batch,pred_weights,input_weights):
        return "Model: DTL <br>" + "Epochs: "+str(epochs)+"<br>Batch: "+str(batch)+"<br>Pred_weights: "+pred_weights + "<br>Input_weights: "+input_weights
    temp_DTL["config"]=temp_DTL.apply(lambda x: dtl_config(x["config_epochs"],x["config_batch"],x["config_pred_weights"],x["config_input_weights"]),axis=1) 


    ## 4. get TL data 
    temp_TL = tempdata[tempdata.model=="TL"]
    if pred_weights_list:
        temp_TL = temp_TL[temp_TL.config_pred_weights.isin(pred_weights_list)]
    def tl_config(epochs,batch,pred_weights):
        return "Model: TL <br>" + "Epochs: "+str(epochs)+"<br>Batch: "+str(batch)+"<br>Pred_weights: "+pred_weights
    temp_TL["config"]=temp_TL.apply(lambda x: tl_config(x["config_epochs"],x["config_batch"],x["config_pred_weights"]),axis=1) 
    ## 5. get Scratch data 
    temp_Scratch = tempdata[tempdata.model=="Scratch"]
    def scratch_config(epochs,batch):
        return "Model: Scratch <br>" + "Epochs: "+str(epochs)+"<br>Batch: "+str(batch)
    temp_Scratch["config"] = temp_Scratch.apply(lambda x: scratch_config(x["config_epochs"],x["config_batch"]),axis=1)

    ## final data 
    results = pd.concat([temp_parp,temp_TL,temp_DTL,temp_Scratch])
    ## for px
    agg_func = {metric:["describe"]}
    temp_df = results.groupby(["config","model"]).agg(agg_func).reset_index()

    temp_df = temp_df.T.reset_index()
    temp_df.drop(columns=["level_0",'level_1'],inplace=True)
    temp_df.loc[0,"level_2"] = "config"
    temp_df.loc[1,"level_2"] = "model"
    temp_df = temp_df.T
    temp_df.columns = temp_df.iloc[0]
    temp_df.drop(temp_df.index[0],inplace=True)

    total_results = pd.merge(results,temp_df,how="left")
    total_results["input_w"] = total_results["config_input_weights"]
    total_results["pred_w"] = total_results["config_pred_weights"]
    total_results["epochs"] = total_results["config_epochs"]
    total_results["batch"] = total_results["config_batch"]
    
    total_results.sort_values(["model","input_w","pred_w","batch","epochs"],inplace=True)
    return total_results 



## Obs 1: robust?

In [92]:
metric = "accuracy_score"
overall_best = []
for metric in ['f1score','precision_score','accuracy_score',
               'balanced_accuracy_score','recall_score','roc_auc_score']:
    for batch in list(DATA.config_batch.unique()):
        for epochs in list(DATA.config_epochs.unique()):
            batch_list = [batch]
            epochs_list = [epochs]
            pred_weights_list = None 
            input_weights_list = None 
            tempresults = get_results(DATA,epochs_list,batch_list,pred_weights_list,input_weights_list,metric)
            cols = ["model","mean","std","input_w","pred_w","epochs","batch","method"]
            tempresults = tempresults[cols]
            tempresults.drop_duplicates(inplace=True)

            temp_parp = tempresults[tempresults.model=="PARP"]
            temp_parp = temp_parp.iloc[[0],:]
            temp_others = tempresults[tempresults.model!="PARP"]

            temp_all = pd.concat([temp_parp,temp_others],axis=0)
            temp_all.sort_values(["mean","std"],ascending=[True,False],inplace=True)
            temp_all.index = range(len(temp_all))
            temp_all["metric"] = metric
            temp_best = temp_all.loc[[len(temp_all)-1],:]
            overall_best.append(temp_best)

In [93]:
best_all = pd.concat(overall_best)
# save_path = "/work/PCDC/s202005/Research/TransferLearning/Shelburne/Results/summary/\
# Single_1_Anti_2_Dense_1024_Ensemble/Epochs_100_Batch_128/best_"
# best_all.to_csv("")

In [98]:
temprecall = []
metric = 'recall_score'
for batch in list(DATA.config_batch.unique()):
    for epochs in list(DATA.config_epochs.unique()):
        batch_list = [batch]
        epochs_list = [epochs]
        pred_weights_list = None 
        input_weights_list = None 
        tempresults = get_results(DATA,epochs_list,batch_list,pred_weights_list,input_weights_list,metric)
        cols = ["model","mean","std","input_w","pred_w","epochs","batch","method"]
        tempresults = tempresults[cols]
        tempresults.drop_duplicates(inplace=True)

        temp_parp = tempresults[tempresults.model=="PARP"]
        temp_parp = temp_parp.iloc[[0],:]
        temp_others = tempresults[tempresults.model!="PARP"]

        temp_all = pd.concat([temp_parp,temp_others],axis=0)
        temp_all.sort_values(["mean","std"],ascending=[True,False],inplace=True)
        temp_all.index = range(len(temp_all))
        temp_all["metric"] = metric
        temprecall.append(temp_all)

In [103]:
recall = pd.concat(temprecall)
recall.sort_values(["mean","std"],ascending=[True,False],inplace=True)
recall

Unnamed: 0,model,mean,std,input_w,pred_w,epochs,batch,method,metric
0,Scratch,0.015225,0.083819,,,10,128,Scratch,recall_score
0,Scratch,0.038841,0.10723,,,10,64,Scratch,recall_score
0,Scratch,0.04389,0.102089,,,20,128,Scratch,recall_score
1,DTL,0.067419,0.104129,DefaultWeights,DefaultWeights,10,128,DTL,recall_score
0,Scratch,0.110905,0.188505,,,10,32,Scratch,recall_score
...,...,...,...,...,...,...,...,...,...
7,PARP,0.753227,0.062813,,,100,64,DTL,recall_score
7,PARP,0.753227,0.062813,,,10,128,DTL,recall_score
7,PARP,0.753227,0.062813,,,20,128,DTL,recall_score
7,PARP,0.753227,0.062813,,,50,128,DTL,recall_score
