In [1]:
import pandas as pd
import yaml

In [2]:
def get_model_spec(model_name, job_name):
    file_path = f"benchmarks/tuner_config/{model_name}_{job_name}/model_config.yaml"
    with open(file_path) as file:
        models_list = yaml.load(file, Loader=yaml.FullLoader)
        df_specs = pd.DataFrame.from_dict(models_list, orient="index")
        col_names = ["batch_norm", "embedding_dim", "embedding_regularizer", "learning_rate", "net_dropout"]
        df_specs = df_specs[col_names]
    return df_specs

In [3]:
def read_exp(model_name, job_name):
    file_path = f"benchmarks/{model_name}_{job_name}.csv"
    df_exp = pd.read_csv(file_path, 
                         names=["time", "reproducing command", "expid", "dataset_id", "train", "validation"])
    df_exp["val_auc"] = df_exp.validation.str.extractall(r'([0-9]\.\d+)').xs(1, level="match").astype("float")
    df_exp["expid"] = df_exp["expid"].str.split().str[1]
    df_exp = df_exp[["expid", "train", "validation", "val_auc"]]
    df_specs = get_model_spec(model_name, job_name)
    df_exp = df_exp.merge(df_specs, left_on="expid", right_index=True, how="left")
    return df_exp
def find_best_model(df_exp):
    return df_exp.loc[df_exp["val_auc"].idxmax()]

In [7]:
deepfm_no_feature = read_exp("DeepFM", "no_feature")
deepfm_user_feature = read_exp("DeepFM", "user_feature")
deepfm_business_feature = read_exp("DeepFM", "business_feature")
deepfm_all_feature = read_exp("DeepFM", "all_feature")
deepfm_business_feature_no_reviewcount = read_exp("DeepFM", "business_feature_no_reviewcount")
deepfm_all_feature_no_reviewcount = read_exp("DeepFM", "all_feature_no_reviewcount")
dcn_user_feature = read_exp("DCNv2", "user_feature")
deepfm_target_encoding = read_exp("DeepFM", "target_encoding")
deepfm_count_encoding = read_exp("DeepFM", "target_encoding")
deepfm_categorify = read_exp("DeepFM", "categorify")
# deepfm_feature_engineer = read_exp("DeepFM", "feature_engineer")

In [14]:
pwd

'/Users/jun/Library/Mobile Documents/com~apple~CloudDocs/jun_folder/skills/mids/w210-capstone/proj/deepfm'

In [15]:
pd.read_csv("benchmarks/DeepFM_feature_engineer.csv")

Unnamed: 0,20221128-075559,[command] python run_expid.py --version pytorch --config ./tuner_config/DeepFM_feature_engineer --expid DeepFM_yelp_feature_engineer_017_cdde7079 --gpu 0,[exp_id] DeepFM_yelp_feature_engineer_017_cdde7079,[dataset_id] yelp_feature_engineer_392d950c,[train] logloss: 11.126093 - AUC: 0.500000,[val] logloss: 11.120960 - AUC: 0.500000
0,20221128-075735,[command] python run_expid.py --version pytorc...,[exp_id] DeepFM_yelp_feature_engineer_001_8388...,[dataset_id] yelp_feature_engineer_392d950c,[train] logloss: 4.992074 - AUC: 0.499997,[val] logloss: 4.997325 - AUC: 0.499991
1,20221128-075821,[command] python run_expid.py --version pytorc...,[exp_id] DeepFM_yelp_feature_engineer_018_9d27...,[dataset_id] yelp_feature_engineer_392d950c,[train] logloss: 11.126093 - AUC: 0.500000,[val] logloss: 11.120960 - AUC: 0.500000
2,20221128-075915,[command] python run_expid.py --version pytorc...,[exp_id] DeepFM_yelp_feature_engineer_002_a0b5...,[dataset_id] yelp_feature_engineer_392d950c,[train] logloss: 4.992263 - AUC: 0.499995,[val] logloss: 4.997514 - AUC: 0.499983
3,20221128-075949,[command] python run_expid.py --version pytorc...,[exp_id] DeepFM_yelp_feature_engineer_019_108d...,[dataset_id] yelp_feature_engineer_392d950c,[train] logloss: 11.126093 - AUC: 0.500000,[val] logloss: 11.120960 - AUC: 0.500000
4,20221128-080128,[command] python run_expid.py --version pytorc...,[exp_id] DeepFM_yelp_feature_engineer_003_2691...,[dataset_id] yelp_feature_engineer_392d950c,[train] logloss: 11.126093 - AUC: 0.500000,[val] logloss: 11.120960 - AUC: 0.500000
5,20221128-080157,[command] python run_expid.py --version pytorc...,[exp_id] DeepFM_yelp_feature_engineer_020_ceab...,[dataset_id] yelp_feature_engineer_392d950c,[train] logloss: 4.992003 - AUC: 0.500000,[val] logloss: 4.997136 - AUC: 0.500000
6,20221128-080250,[command] python run_expid.py --version pytorc...,[exp_id] DeepFM_yelp_feature_engineer_004_d81e...,[dataset_id] yelp_feature_engineer_392d950c,[train] logloss: 4.992074 - AUC: 0.499997,[val] logloss: 4.997325 - AUC: 0.499991
7,20221128-080419,[command] python run_expid.py --version pytorc...,[exp_id] DeepFM_yelp_feature_engineer_021_c38a...,[dataset_id] yelp_feature_engineer_392d950c,[train] logloss: 4.992003 - AUC: 0.500000,[val] logloss: 4.997136 - AUC: 0.500000
8,20221128-080446,[command] python run_expid.py --version pytorc...,[exp_id] DeepFM_yelp_feature_engineer_005_0177...,[dataset_id] yelp_feature_engineer_392d950c,[train] logloss: 4.992263 - AUC: 0.499995,[val] logloss: 4.997514 - AUC: 0.499983
9,20221128-080600,[command] python run_expid.py --version pytorc...,[exp_id] DeepFM_yelp_feature_engineer_022_cd49...,[dataset_id] yelp_feature_engineer_392d950c,[train] logloss: 11.126093 - AUC: 0.500000,[val] logloss: 11.120960 - AUC: 0.500000


In [12]:
deepfm_count_encoding

Unnamed: 0,expid,train,validation,val_auc,batch_norm,embedding_dim,embedding_regularizer,learning_rate,net_dropout
0,DeepFM_yelp_target_encoding_001_f722b300,[train] logloss: 3.109159 - AUC: 0.690748,[val] logloss: 3.460628 - AUC: 0.669304,0.669304,False,64,0.0,0.001,0.0
1,DeepFM_yelp_target_encoding_002_c9567229,[train] logloss: 1.976968 - AUC: 0.749899,[val] logloss: 2.657613 - AUC: 0.687591,0.687591,False,128,0.0,0.001,0.0
2,DeepFM_yelp_target_encoding_003_9e76f344,[train] logloss: 2.157604 - AUC: 0.735420,[val] logloss: 2.865868 - AUC: 0.674289,0.674289,False,256,0.0,0.001,0.0
3,DeepFM_yelp_target_encoding_004_ef2a1424,[train] logloss: 2.202586 - AUC: 0.736793,[val] logloss: 2.837873 - AUC: 0.684501,0.684501,False,64,0.0,0.001,0.03
4,DeepFM_yelp_target_encoding_005_8fe3b01c,[train] logloss: 2.823474 - AUC: 0.706359,[val] logloss: 3.377609 - AUC: 0.670481,0.670481,False,128,0.0,0.001,0.03
5,DeepFM_yelp_target_encoding_006_8cb856c3,[train] logloss: 3.494208 - AUC: 0.669087,[val] logloss: 3.611921 - AUC: 0.663122,0.663122,False,256,0.0,0.001,0.03
6,DeepFM_yelp_target_encoding_007_75e70031,[train] logloss: 11.126093 - AUC: 0.500000,[val] logloss: 11.120960 - AUC: 0.500000,0.5,False,64,0.0,0.01,0.0
7,DeepFM_yelp_target_encoding_008_4360451b,[train] logloss: 4.992003 - AUC: 0.500000,[val] logloss: 4.997136 - AUC: 0.500000,0.5,False,128,0.0,0.01,0.0
8,DeepFM_yelp_target_encoding_009_836c54c2,[train] logloss: 4.992003 - AUC: 0.500000,[val] logloss: 4.997136 - AUC: 0.500000,0.5,False,256,0.0,0.01,0.0
9,DeepFM_yelp_target_encoding_010_c763e306,[train] logloss: 11.126093 - AUC: 0.500000,[val] logloss: 11.120960 - AUC: 0.500000,0.5,False,64,0.0,0.01,0.03


In [69]:
jobs = [deepfm_no_feature, deepfm_user_feature, deepfm_business_feature, deepfm_all_feature,
       dcn_user_feature]
best_models = [find_best_model(job_name) for job_name in jobs]
best_models = pd.DataFrame(best_models).set_index("expid")
expid_lst = best_models.index.str.split("_")
best_models["model"] = expid_lst.str[0]
best_models["feature"] = expid_lst.str[2] + "_" + expid_lst.str[3]
best_models

Unnamed: 0_level_0,train,validation,val_auc,batch_norm,embedding_dim,embedding_regularizer,learning_rate,net_dropout,model,feature
expid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
DeepFM_yelp_no_feature_001_c064a006,[train] logloss: 0.460143 - AUC: 0.844091,[val] logloss: 0.533441 - AUC: 0.743259,0.743259,False,64,0.0,0.001,0.0,DeepFM,no_feature
DeepFM_yelp_user_feature_024_110a628a,[train] logloss: 0.440098 - AUC: 0.837953,[val] logloss: 0.458365 - AUC: 0.820935,0.820935,False,256,0.01,0.01,0.03,DeepFM,user_feature
DeepFM_yelp_business_feature_023_956d2fe1,[train] logloss: 0.456102 - AUC: 0.837478,[val] logloss: 0.533311 - AUC: 0.750399,0.750399,False,128,0.01,0.01,0.03,DeepFM,business_feature
DeepFM_yelp_all_feature_019_eabe7106,[train] logloss: 0.457232 - AUC: 0.823512,[val] logloss: 0.458665 - AUC: 0.822774,0.822774,False,64,0.01,0.01,0.0,DeepFM,all_feature
DCNv2_yelp_user_feature_008_116299d6,[train] logloss: 0.442238 - AUC: 0.836762,[val] logloss: 0.458743 - AUC: 0.820642,0.820642,False,64,0.01,0.001,0.03,DCNv2,user_feature


In [44]:
dcn_user_feature

Unnamed: 0,expid,train,validation,val_auc,batch_norm,embedding_dim,embedding_regularizer,learning_rate,net_dropout
0,DCNv2_yelp_user_feature_001_b32da414,[train] logloss: 0.400012 - AUC: 0.881854,[val] logloss: 0.532635 - AUC: 0.783822,0.783822,False,64,0.0,0.001,0.0
1,DCNv2_yelp_user_feature_002_e8fd6881,[train] logloss: 0.473130 - AUC: 0.855812,[val] logloss: 0.550826 - AUC: 0.786990,0.78699,False,64,0.0,0.001,0.03
2,DCNv2_yelp_user_feature_003_e75ec078,[train] logloss: 0.430476 - AUC: 0.861929,[val] logloss: 0.522598 - AUC: 0.791705,0.791705,False,128,0.0,0.001,0.0
3,DCNv2_yelp_user_feature_004_3aab9b92,[train] logloss: 0.469999 - AUC: 0.851583,[val] logloss: 0.547965 - AUC: 0.790724,0.790724,False,128,0.0,0.001,0.03
4,DCNv2_yelp_user_feature_005_248017e9,[train] logloss: 0.460542 - AUC: 0.826417,[val] logloss: 0.471341 - AUC: 0.815621,0.815621,False,256,0.0,0.001,0.0
5,DCNv2_yelp_user_feature_006_92fb047b,[train] logloss: 0.472180 - AUC: 0.825664,[val] logloss: 0.484545 - AUC: 0.812465,0.812465,False,256,0.0,0.001,0.03
6,DCNv2_yelp_user_feature_007_3abd61fe,[train] logloss: 0.440955 - AUC: 0.838073,[val] logloss: 0.459384 - AUC: 0.820128,0.820128,False,64,0.01,0.001,0.0
7,DCNv2_yelp_user_feature_008_116299d6,[train] logloss: 0.442238 - AUC: 0.836762,[val] logloss: 0.458743 - AUC: 0.820642,0.820642,False,64,0.01,0.001,0.03
8,DCNv2_yelp_user_feature_009_e99b51ff,[train] logloss: 0.440565 - AUC: 0.838300,[val] logloss: 0.458623 - AUC: 0.820576,0.820576,False,128,0.01,0.001,0.0
9,DCNv2_yelp_user_feature_010_d4893bc7,[train] logloss: 0.439812 - AUC: 0.839103,[val] logloss: 0.459108 - AUC: 0.820329,0.820329,False,128,0.01,0.001,0.03
