In [None]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score

from config import Config
from optim.hgwos import HGWOS

def run():
    cfg = Config()
    Xtr = np.load(cfg.features_dir/"X_train.npy")
    ytr = np.load(cfg.features_dir/"y_train.npy")
    Xv  = np.load(cfg.features_dir/"X_val.npy")
    yv  = np.load(cfg.features_dir/"y_val.npy")

    D = Xtr.shape[1]

    def fitness(mask):
        # avoid empty selection
        if mask.sum() < 5:
            return -1e9

        Xtr_m = Xtr[:, mask==1]
        Xv_m  = Xv[:, mask==1]

        clf = LogisticRegression(max_iter=300, n_jobs=1)
        clf.fit(Xtr_m, ytr)
        pred = clf.predict(Xv_m)
        score = f1_score(yv, pred, average="macro")

        penalty = mask.sum() / D
        fit = cfg.hgwos_alpha * score - cfg.hgwos_beta * penalty
        return fit

    opt = HGWOS(pop=cfg.hgwos_pop, iters=cfg.hgwos_iters, switch_prob=0.5)
    best_mask, best_fit = opt.optimize(D, fitness)

    np.save(cfg.features_dir/"best_mask.npy", best_mask)
    print("HGWOS Best fitness:", best_fit)
    print("Selected features:", int(best_mask.sum()), "/", D)

if __name__ == "__main__":
    run()
