In [44]:
import sys
import numpy as np
import pandas as pd 
from ampligraph.latent_features import ComplEx
from ampligraph.discovery import query_topn
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from ampligraph.latent_features import save_model
from statistics import mode
from imblearn.metrics import geometric_mean_score


def get_most_frequent_link(X):
    links = [t for t in X]
    return mode(links)

def get_score_dummy(X, predicted_class):
    y_true = [t[1]for t in X]
    y_pred = [predicted_class] * len(y_true)
    # y_true_bool = [True if item == 'solved' else False for item in y_true]
    # y_pred_bool = [True if item == 'solved' else False for item in y_pred]
    return geometric_mean_score(y_true, y_pred, labels=['solved', 'not-solved']), accuracy_score(y_true, y_pred), f1_score(y_true, y_pred, labels=['solved', 'not-solved'], pos_label=predicted_class)         
    
     

def HPO_KG_embedding(dim, budget, fixed_target, algo):
    X_train = np.load(f'../DATA/KG/{algo}/KG_dim_{dim}_budget_{budget}_target_{fixed_target:.5f}_train.npy')
    X_val = np.load(f'../DATA/KG/{algo}/KG_dim_{dim}_budget_{budget}_target_{fixed_target:.5f}_val.npy')
    X_test = np.load(f'../DATA/KG/{algo}/KG_dim_{dim}_budget_{budget}_target_{fixed_target:.5f}_test.npy')

    
    majority_link = get_most_frequent_link(np.append(X_train, X_val))

    dummy_gmean, dummy_acc, dummy_f1 = get_score_dummy(X_test, majority_link)
    print(f'{float(dummy_f1):.3f}')
    

if __name__ == "__main__":

    for dim in [30]:
        for fixed_target in [10, 1, 0.1]:
            for budget in [2000, 5000, 10000, 50000]:
                for algo in ['modCMA']:
                    best_model = HPO_KG_embedding(dim, budget, fixed_target, algo)

0.787
0.699
0.667
0.811
0.944
0.913
0.883
0.743
0.968
0.954
0.933
0.813


In [36]:
import sys
import numpy as np
import pandas as pd 
from ampligraph.latent_features import ComplEx
from ampligraph.discovery import query_topn
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from ampligraph.latent_features import save_model
from statistics import mode
from imblearn.metrics import geometric_mean_score
from scipy import stats
from sklearn.metrics import precision_score


def get_most_frequent_link(X):
    cnt_solved = 0
    cnt_not_solved = 0
    for l in X:
        if l == 'solved':
            cnt_solved +=1
        elif l == 'not-solved':
            cnt_not_solved += 1

    if cnt_solved >= cnt_not_solved:
        return 'solved'
    else:
        return 'not-solved'

def get_score_dummy(X, predicted_class):
    y_true = [t[1]for t in X]
    y_pred = [predicted_class] * len(y_true)
    # y_true_bool = [True if item == 'solved' else False for item in y_true]
    # y_pred_bool = [True if item == 'solved' else False for item in y_pred]
    return geometric_mean_score(y_true, y_pred, labels=['solved', 'not-solved']), accuracy_score(y_true, y_pred),f1_score(y_true, y_pred, labels=['solved', 'not-solved'], pos_label=predicted_class)         
    
     

def HPO_KG_embedding_random(dim, budget, fixed_target, algo, mode):
    scores = []
    for rand in range(1, 6):
        X_train = np.load(f'../DATA/KG_random_subset/{algo}/{mode}/rand_{rand}_KG_dim_{dim}_budget_{budget}_target_{fixed_target:.5f}_train.npy')
        X_train_links = np.load(f'../DATA/KG_random_subset/{algo}/{mode}/rand_{rand}_KG_dim_{dim}_budget_{budget}_target_{fixed_target:.5f}_train_links.npy')
        X_val = np.load(f'../DATA/KG_random_subset/{algo}/{mode}/rand_{rand}_KG_dim_{dim}_budget_{budget}_target_{fixed_target:.5f}_val.npy')
        X_test = np.load(f'../DATA/KG_random_subset/{algo}/{mode}/rand_{rand}_KG_dim_{dim}_budget_{budget}_target_{fixed_target:.5f}_test.npy')

        links_train = [t[1] for t in X_train_links]
        links_val = [t[1] for t in X_val]
        majority_link = get_most_frequent_link(np.append(links_train, links_val))

        dummy_gmean, dummy_acc, dummy_f1 = get_score_dummy(X_test, majority_link)
        scores.append(dummy_acc)
    print(f'{float(np.mean(scores)):.3f} ({float(np.std(scores)):.3f})')

if __name__ == "__main__":
    mode = 'problem'
    for budget in [2000, 5000, 10000, 50000]:
        for algo in ['modDE']:
            best_model = HPO_KG_embedding_random(5, budget, 0.1, algo, 'problem')

0.868 (0.005)
0.767 (0.010)
0.669 (0.010)
0.628 (0.012)
