In [72]:
import pandas as pd
import yaml

In [87]:
def get_model_spec(job_name):
    file_path = f"benchmarks/tuner_config/{job_name}_tuner_config/model_config.yaml"
    with open(file_path) as file:
        models_list = yaml.load(file, Loader=yaml.FullLoader)
        df_specs = pd.DataFrame.from_dict(models_list, orient="index")
        col_names = ["batch_norm", "hidden_units", "embedding_dim", "embedding_regularizer", "learning_rate", "net_dropout"]
        df_specs = df_specs[col_names]
    return df_specs
model_feature_specs = get_model_spec("DeepFM_feature")

In [88]:
def read_exp(job_name):
    file_path = f"benchmarks/{job_name}_tuner_config.csv"
    df_exp = pd.read_csv(file_path, 
                         names=["time", "reproducing command", "expid", "dataset_id", "train", "validation"])
    df_exp["val_auc"] = df_exp.validation.str.extractall(r'([0-9]\.\d+)').xs(1, level="match").astype("float")
    df_exp["expid"] = df_exp["expid"].str.split().str[1]
    df_exp = df_exp[["expid", "train", "validation", "val_auc"]]
    df_specs = get_model_spec(job_name)
    df_exp = df_exp.merge(df_specs, left_on="expid", right_index=True, how="left")
    return df_exp
def find_best_model(df_exp):
    return df_exp.loc[df_exp["val_auc"].idxmax()]

In [89]:
feature_exp = read_exp("DeepFM_feature")
nofeature_exp = read_exp("DeepFM_nofeature")

In [91]:
print("Best model with no feature data:")
find_best_model(nofeature_exp)

Best model with no feature data:


expid                          DeepFM_yelp_no_feature_001_c064a006
train                    [train] logloss: 0.460143 - AUC: 0.844091
validation                 [val] logloss: 0.533441 - AUC: 0.743259
val_auc                                                   0.743259
batch_norm                                                   False
hidden_units                                       [400, 400, 400]
embedding_dim                                                   64
embedding_regularizer                                          0.0
learning_rate                                                0.001
net_dropout                                                    0.0
Name: 0, dtype: object

In [92]:
print("Best model with feature data:")
find_best_model(feature_exp)

Best model with feature data:


expid                             DeepFM_yelp_feature_015_e310373d
train                    [train] logloss: 3.394628 - AUC: 0.683488
validation                 [val] logloss: 3.410920 - AUC: 0.684168
val_auc                                                   0.684168
batch_norm                                                   False
hidden_units                                       [400, 400, 400]
embedding_dim                                                  256
embedding_regularizer                                         0.01
learning_rate                                                0.001
net_dropout                                                    0.0
Name: 14, dtype: object

In [94]:
feature_exp

Unnamed: 0,expid,train,validation,val_auc,batch_norm,hidden_units,embedding_dim,embedding_regularizer,learning_rate,net_dropout
0,DeepFM_yelp_feature_001_03e4b53d,[train] logloss: 4.992003 - AUC: 0.500000,[val] logloss: 4.997136 - AUC: 0.500000,0.5,False,"[400, 400, 400]",64,0.0,0.001,0.0
1,DeepFM_yelp_feature_002_7f85efa5,[train] logloss: 4.992003 - AUC: 0.500000,[val] logloss: 4.997136 - AUC: 0.500000,0.5,False,"[400, 400, 400]",128,0.0,0.001,0.0
2,DeepFM_yelp_feature_003_b71ca503,[train] logloss: 3.591318 - AUC: 0.659805,[val] logloss: 3.755078 - AUC: 0.650685,0.650685,False,"[400, 400, 400]",256,0.0,0.001,0.0
3,DeepFM_yelp_feature_004_da66f817,[train] logloss: 4.992003 - AUC: 0.500000,[val] logloss: 4.997136 - AUC: 0.500000,0.5,False,"[400, 400, 400]",64,0.0,0.001,0.03
4,DeepFM_yelp_feature_005_2972f7f5,[train] logloss: 4.992003 - AUC: 0.500000,[val] logloss: 4.997136 - AUC: 0.500000,0.5,False,"[400, 400, 400]",128,0.0,0.001,0.03
5,DeepFM_yelp_feature_006_9adf532d,[train] logloss: 4.992003 - AUC: 0.500000,[val] logloss: 4.997136 - AUC: 0.500000,0.5,False,"[400, 400, 400]",256,0.0,0.001,0.03
6,DeepFM_yelp_feature_007_b53f1478,[train] logloss: 4.992003 - AUC: 0.500000,[val] logloss: 4.997136 - AUC: 0.500000,0.5,False,"[400, 400, 400]",64,0.0,0.01,0.0
7,DeepFM_yelp_feature_008_762e09c2,[train] logloss: 4.992003 - AUC: 0.500000,[val] logloss: 4.997136 - AUC: 0.500000,0.5,False,"[400, 400, 400]",128,0.0,0.01,0.0
8,DeepFM_yelp_feature_009_ed30b5fa,[train] logloss: 4.992003 - AUC: 0.500000,[val] logloss: 4.997136 - AUC: 0.500000,0.5,False,"[400, 400, 400]",256,0.0,0.01,0.0
9,DeepFM_yelp_feature_010_6ae2c020,[train] logloss: 4.992003 - AUC: 0.500000,[val] logloss: 4.997136 - AUC: 0.500000,0.5,False,"[400, 400, 400]",64,0.0,0.01,0.03


In [93]:
feature_exp[feature_exp.val_auc > 0.55]

Unnamed: 0,expid,train,validation,val_auc,batch_norm,hidden_units,embedding_dim,embedding_regularizer,learning_rate,net_dropout
2,DeepFM_yelp_feature_003_b71ca503,[train] logloss: 3.591318 - AUC: 0.659805,[val] logloss: 3.755078 - AUC: 0.650685,0.650685,False,"[400, 400, 400]",256,0.0,0.001,0.0
14,DeepFM_yelp_feature_015_e310373d,[train] logloss: 3.394628 - AUC: 0.683488,[val] logloss: 3.410920 - AUC: 0.684168,0.684168,False,"[400, 400, 400]",256,0.01,0.001,0.0
17,DeepFM_yelp_feature_018_8ec8411f,[train] logloss: 3.205235 - AUC: 0.685170,[val] logloss: 3.248633 - AUC: 0.683407,0.683407,False,"[400, 400, 400]",256,0.01,0.001,0.03
