In [1]:
from models.NMF import NMF_Model
from dataset.dataset import Dataset
from evaluation_metrics.diversity_metrics import Topic_diversity
from optimization.optimizer import Optimizer
from skopt.space.space import Real, Integer

In [2]:
# Load dataset
dataset = Dataset()
dataset.load("preprocessed_datasets/newsgroup/newsgroup_lemmatized_10")

True

In [3]:
# Load model
model = NMF_Model(dataset)

In [7]:
# Create search space for optimization
num_topics = Integer(name='num_topics', low=5, high=30)
alpha = Real(name='alpha', low=0.001, high=5.0)
eta = Real(name='eta', low=0.001, high=5.0)

search_space = {
    "num_topics": num_topics,
    "alpha": alpha,
    "eta": eta
}

In [8]:
# Initialize optimizer
optimizer = Optimizer(model, Topic_diversity, {'topk': 10})

# Disable computing of topic document matrix and topic word matrix
# To optimize code
optimizer.topic_document_matrix = False
optimizer.topic_word_matrix = False

In [9]:
# Define optimization parameters
opt_params = {}
opt_params["n_calls"] = 30
opt_params["n_random_starts"] = 2

# Optimize
res = optimizer = optimizer.optimize(search_space, opt_params)

print(res[0]) # Best values for the hyperparameters
print(res[1].fun) # Score of the metric with the best hyperparameters


{'alpha': 3.7719558683269185, 'eta': 0.24968770277662894, 'num_topics': 5}
0.92
