In [None]:
import os
os.chdir('..')
os.chdir('..')
from dataset.dataset import Dataset
from evaluation_metrics.diversity_metrics import Topic_diversity
from evaluation_metrics.topic_significance_metrics import KL_uniform
from skopt import gp_minimize, forest_minimize, dummy_minimize
from optimization.optimizer import Optimizer
from skopt.space.space import Real, Integer, Categorical
import multiprocessing as mp
from models import TorchETM
import torch
import numpy as np

In [None]:
dataset = Dataset()
dataset.load("preprocessed_datasets/20newsgroup_validation")

In [None]:
# Load model
model = TorchETM.ETM_Wrapper()

In [None]:
model.hyperparameters['num_epochs'] = 200
model.hyperparameters['num_topics'] = 10


In [None]:
model.partitioning(True)

In [None]:
model.train_model(dataset, model.hyperparameters, top_words= 10)

In [None]:
# Topic diversity
topic_diversity = Topic_diversity()

# KL_Uniform
#kl_uniform = KL_uniform()

In [None]:
# Define optimization parameters
opt_params = {}
opt_params["n_calls"] = 5
opt_params["minimizer"] = forest_minimize
opt_params["n_random_starts"] = 5
#opt_params["extra_metrics"] = [kl_uniform] # List of extra metrics
opt_params["n_jobs"] = mp.cpu_count() -1 # Enable multiprocessing
opt_params["verbose"] = True
opt_params["save_path"] = "results" #create folder if it doesn't exist

In [None]:
# Create search space for optimization
search_space = {"t_hidden_size": Categorical({100, 200, 300, 400, 500, 600, 700,
                                              800, 900, 1000}),
                "rho_size": Categorical({100, 200, 300}),
                "theta_act": Categorical({'tanh', 'relu', 'softplus', 'rrelu',
                                          'leakyrelu', 'elu', 'selu'}),#glu
                "enc_drop": Real(low=0.0, high=1.0),
                "lr": Real(low=1e-6, high=0.1, prior='log-uniform'),
                "optimizer": Categorical({'adam', 'adagrad', 'adadelta', 'rmsprop', 'asgd'}),
                "wdecay": Real(low=1e-6, high=0.1, prior='log-uniform'),
                "bow_norm": Categorical({0, 1}),
                "clip": Real(low=0.0, high=10.0)
                }

In [None]:
# Initialize optimizer
optimizer = Optimizer(
    model,
    dataset,
    topic_diversity,
    search_space,
    opt_params)

In [None]:
# Optimize
res = optimizer.optimize()

print(res.hyperparameters) # Best values for the hyperparameters
print(res.function_values) # Score of the optimized metric
print("Optimized metric: "+res.optimized_metric)
