In [1]:
import os
os.chdir(os.path.pardir)
from dataset.dataset import Dataset
from evaluation_metrics.diversity_metrics import Topic_diversity
from evaluation_metrics.topic_significance_metrics import KL_uniform
from skopt import gp_minimize, forest_minimize, dummy_minimize
from optimization.optimizer import Optimizer
from skopt.space.space import Real, Integer
import multiprocessing as mp
from models.TorchAvitm import TorchAvitm
from gensim.models import Word2Vec

In [2]:
# Load dataset
dataset = Dataset()
dataset.load("preprocessed_datasets/newsgroup/newsgroup_lemmatized_10")

True

In [3]:
# Load model
model = TorchAvitm()

In [8]:

# Set model hyperparameters
model.hyperparameters['prior_variance'] = 0.2
model.hyperparameters['num_topics'] = 10
model.hyperparameters['num_epochs'] = 1

In [9]:
model.hyperparameters

{'prior_variance': 0.2,
 'num_topics': 10,
 'num_epochs': 1,
 'model_type': 'prodLDA',
 'activation': 'softplus',
 'dropout': 0.2,
 'learn_priors': True,
 'batch_size': 64,
 'lr': 0.002,
 'momentum': 0.99,
 'solver': 'adam',
 'reduce_on_plateau': False,
 'prior_mean': 0.0,
 'hidden_sizes': (100, 100)}

In [10]:
model.partitioning(True)

In [11]:
model.train_model(dataset, model.hyperparameters, topic_word_matrix=False,
                    topic_document_matrix=False)

Settings: 
               N Components: 10
               Topic Prior Mean: 0.0
               Topic Prior Variance: 0.2
               Model Type: prodLDA
               Hidden Sizes: (100, 100)
               Activation: softplus
               Dropout: 0.2
               Learn Priors: True
               Learning Rate: 0.002
               Momentum: 0.99
               Reduce On Plateau: False
               Save Dir: None
Epoch: [1/1]	Samples: [10758/10758]	Train Loss: 587.91229380806	Time: 0:00:01.492657


{'topics': [['baby',
   'decent',
   'prepare',
   'cop',
   'plug',
   'detect',
   'bar',
   'spread',
   'debate',
   'generation'],
  ['manner',
   'solve',
   'direct',
   'touch',
   'button',
   'join',
   'engineering',
   'refuse',
   'deserve',
   'twice'],
  ['back',
   'different',
   'file',
   'already',
   'maybe',
   'thing',
   'application',
   'go',
   'example',
   'set'],
  ['engineering',
   'anywhere',
   'baby',
   'prior',
   'join',
   'fair',
   'cop',
   'baseball',
   'class',
   'rich'],
  ['seek',
   'generation',
   'manner',
   'basically',
   'prior',
   'originator',
   'judge',
   'animal',
   'capability',
   'ahead'],
  ['call',
   'give',
   'people',
   'use',
   'make',
   'number',
   'good',
   'life',
   'say',
   'way'],
  ['good',
   'also',
   'people',
   'time',
   'give',
   'first',
   'article',
   'find',
   'different',
   'well'],
  ['crime',
   'right',
   'hope',
   'little',
   'weapon',
   'vote',
   'gun',
   'power',
   'leas

In [6]:
# Topic diversity
topic_diversity = Topic_diversity()

# KL_Uniform
#kl_uniform = KL_uniform()

In [7]:
# Define optimization parameters
opt_params = {}
opt_params["n_calls"] = 30
opt_params["minimizer"] = forest_minimize
opt_params["different_iteration"] = 3
opt_params["n_random_starts"] = 5
#opt_params["extra_metrics"] = [kl_uniform] # List of extra metrics
opt_params["n_jobs"] = mp.cpu_count() -1 # Enable multiprocessing
opt_params["verbose"] = True
opt_params["save_path"] = "results" #create folder if it doesn't exist 

In [8]:
# Create search space for optimization
search_space = {
    "num_epochs": Integer(low=1, high=50),
    #"eta": Real(low=0.01, high=5.0)
}

In [9]:
# Initialize optimizer
optimizer = Optimizer(
    model,
    dataset,
    topic_diversity,
    search_space,
    opt_params)

In [None]:
# Disable computing of topic document matrix to optimize performance
optimizer.topic_document_matrix = False
optimizer.topic_word_matrix = False

In [None]:
# Optimize
res = optimizer.optimize()

print(res.hyperparameters) # Best values for the hyperparameters
print(res.function_values) # Score of the optimized metric
print("Optimized metric: "+res.optimized_metric)