In [1]:
import pandas as pd
import yaml

In [20]:
def get_model_spec(job_name):
    file_path = f"benchmarks/tuner_config/DeepFM_{job_name}/model_config.yaml"
    with open(file_path) as file:
        models_list = yaml.load(file, Loader=yaml.FullLoader)
        df_specs = pd.DataFrame.from_dict(models_list, orient="index")
        col_names = ["batch_norm", "hidden_units", "embedding_dim", "embedding_regularizer", "learning_rate", "net_dropout"]
        df_specs = df_specs[col_names]
    return df_specs

In [25]:
def read_exp(job_name):
    file_path = f"benchmarks/DeepFM_{job_name}.csv"
    df_exp = pd.read_csv(file_path, 
                         names=["time", "reproducing command", "expid", "dataset_id", "train", "validation"])
    df_exp["val_auc"] = df_exp.validation.str.extractall(r'([0-9]\.\d+)').xs(1, level="match").astype("float")
    df_exp["expid"] = df_exp["expid"].str.split().str[1]
    df_exp = df_exp[["expid", "train", "validation", "val_auc"]]
    df_specs = get_model_spec(job_name)
    df_exp = df_exp.merge(df_specs, left_on="expid", right_index=True, how="left")
    return df_exp.set_index("expid")
def find_best_model(df_exp):
    return df_exp.loc[df_exp["val_auc"].idxmax()]

In [26]:
no_feature = read_exp("no_feature")
user_feature = read_exp("user_feature")
business_feature = read_exp("business_feature")
all_feature = read_exp("all_feature")
business_feature_no_reviewcount = read_exp("business_feature_no_reviewcount")
all_feature_no_reviewcount = read_exp("all_feature_no_reviewcount")

In [27]:
no_feature

Unnamed: 0_level_0,train,validation,val_auc,batch_norm,hidden_units,embedding_dim,embedding_regularizer,learning_rate,net_dropout
expid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
DeepFM_yelp_no_feature_001_c064a006,[train] logloss: 0.460143 - AUC: 0.844091,[val] logloss: 0.533441 - AUC: 0.743259,0.743259,False,"[400, 400, 400]",64,0.0,0.001,0.0
DeepFM_yelp_no_feature_002_1bdc1976,[train] logloss: 0.462986 - AUC: 0.847233,[val] logloss: 0.533922 - AUC: 0.743106,0.743106,False,"[400, 400, 400]",128,0.0,0.001,0.0
DeepFM_yelp_no_feature_003_c6d05b27,[train] logloss: 0.457007 - AUC: 0.843631,[val] logloss: 0.534583 - AUC: 0.743084,0.743084,False,"[400, 400, 400]",256,0.0,0.001,0.0
DeepFM_yelp_no_feature_004_c1c82b08,[train] logloss: 0.460498 - AUC: 0.843219,[val] logloss: 0.533481 - AUC: 0.743233,0.743233,False,"[400, 400, 400]",64,0.0,0.001,0.03
DeepFM_yelp_no_feature_005_865134c9,[train] logloss: 0.463117 - AUC: 0.847518,[val] logloss: 0.533930 - AUC: 0.743185,0.743185,False,"[400, 400, 400]",128,0.0,0.001,0.03
DeepFM_yelp_no_feature_006_6e943c64,[train] logloss: 0.457579 - AUC: 0.842657,[val] logloss: 0.534547 - AUC: 0.743137,0.743137,False,"[400, 400, 400]",256,0.0,0.001,0.03
DeepFM_yelp_no_feature_007_b2b68949,[train] logloss: 0.346338 - AUC: 0.941701,[val] logloss: 0.545113 - AUC: 0.728938,0.728938,False,"[400, 400, 400]",64,0.0,0.01,0.0
DeepFM_yelp_no_feature_008_f0d6ac33,[train] logloss: 0.261858 - AUC: 0.960212,[val] logloss: 0.564460 - AUC: 0.716228,0.716228,False,"[400, 400, 400]",128,0.0,0.01,0.0
DeepFM_yelp_no_feature_009_6bc59109,[train] logloss: 0.198191 - AUC: 0.973141,[val] logloss: 0.615003 - AUC: 0.691695,0.691695,False,"[400, 400, 400]",256,0.0,0.01,0.0
DeepFM_yelp_no_feature_010_011ede31,[train] logloss: 0.364284 - AUC: 0.935936,[val] logloss: 0.556556 - AUC: 0.717854,0.717854,False,"[400, 400, 400]",64,0.0,0.01,0.03


In [28]:
jobs = [no_feature, user_feature, business_feature, all_feature,
       business_feature_no_reviewcount, all_feature_no_reviewcount]
best_models = [find_best_model(job_name) for job_name in jobs]
pd.DataFrame(best_models)    

Unnamed: 0,train,validation,val_auc,batch_norm,hidden_units,embedding_dim,embedding_regularizer,learning_rate,net_dropout
DeepFM_yelp_no_feature_001_c064a006,[train] logloss: 0.460143 - AUC: 0.844091,[val] logloss: 0.533441 - AUC: 0.743259,0.743259,False,"[400, 400, 400]",64,0.0,0.001,0.0
DeepFM_yelp_user_feature_024_110a628a,[train] logloss: 0.440098 - AUC: 0.837953,[val] logloss: 0.458365 - AUC: 0.820935,0.820935,False,"[400, 400, 400]",256,0.01,0.01,0.03
DeepFM_yelp_business_feature_023_956d2fe1,[train] logloss: 0.456102 - AUC: 0.837478,[val] logloss: 0.533311 - AUC: 0.750399,0.750399,False,"[400, 400, 400]",128,0.01,0.01,0.03
DeepFM_yelp_all_feature_019_eabe7106,[train] logloss: 0.457232 - AUC: 0.823512,[val] logloss: 0.458665 - AUC: 0.822774,0.822774,False,"[400, 400, 400]",64,0.01,0.01,0.0
DeepFM_yelp_business_feature_no_reviewcount_004_65e9903d,[train] logloss: 0.459175 - AUC: 0.839088,[val] logloss: 0.524589 - AUC: 0.755297,0.755297,False,"[400, 400, 400]",64,0.0,0.001,0.03
DeepFM_yelp_all_feature_no_reviewcount_019_3c771df7,[train] logloss: 0.448807 - AUC: 0.829846,[val] logloss: 0.453373 - AUC: 0.826016,0.826016,False,"[400, 400, 400]",64,0.01,0.01,0.0
