In [1]:
import os
os.chdir(os.path.pardir)
from models.LDA import LDA_Model
from dataset.dataset import Dataset
from evaluation_metrics.diversity_metrics import Topic_diversity
from evaluation_metrics.topic_significance_metrics import KL_uniform
from skopt import gp_minimize, forest_minimize, dummy_minimize
from optimization.optimizer import Optimizer
from skopt.space.space import Real, Integer
import multiprocessing as mp
from gensim.models import Word2Vec

In [2]:
# Load dataset
dataset = Dataset()
dataset.load("preprocessed_datasets/dblp/dblp_lemmatized_1")

True

In [3]:
# Load model
model = LDA_Model()

In [4]:
# Set model hyperparameters
model.hyperparameters.update({'num_topics':20})

In [5]:
# Topic diversity
topic_diversity = Topic_diversity()

# KL Uniform
kl_uniform = KL_uniform()

In [6]:
# Define optimization parameters
opt_params = {}
opt_params["n_calls"] = 10
opt_params["minimizer"] = forest_minimize
opt_params["different_iteration"] = 3
opt_params["n_random_starts"] = 2
opt_params["extra_metrics"] = [kl_uniform] # List of extra metrics
opt_params["n_jobs"] = mp.cpu_count() # Enable multiprocessing
opt_params["verbose"] = True

In [7]:
# Create search space for optimization
search_space = {
    "alpha": Real(low=0.001, high=5.0),
    "eta": Real(low=0.001, high=5.0)
}

In [8]:
# Initialize optimizer
optimizer = Optimizer(
    model,
    dataset,
    topic_diversity,
    search_space,
    opt_params)

In [9]:
# Disable computing of topic document matrix to optimize performance
optimizer.topic_document_matrix = False

In [10]:
# Optimize
res = optimizer.optimize()

print(res.hyperparameters) # Best values for the hyperparameters
print(res.function_values) # Score of the optimized metric
print("Optimized metric: "+res.optimized_metric)

Start Bayesian Optimization
{'alpha': 0.45465639452257606, 'eta': 0.7759167762213843}
0.975
Optimized metric: Topic_diversity
