### Install Libraries

In [None]:
%%capture
!pip install octis

### Import libraries

In [None]:
from octis.models.CTM import CTM
from octis.dataset.dataset import Dataset
from octis.optimization.optimizer import Optimizer
from skopt.space.space import Real, Categorical, Integer
from octis.evaluation_metrics.coherence_metrics import Coherence
from octis.models.LDA import LDA
from octis.evaluation_metrics.diversity_metrics import TopicDiversity
from octis.evaluation_metrics.coherence_metrics import Coherence

### Load Data

In [None]:
from octis.dataset.dataset import Dataset
dataset = Dataset()
dataset.load_custom_dataset_from_folder("STEM")

### Load Topic Model

In [None]:
model = CTM(num_topics=10,
            num_epochs=30,
            inference_type='zeroshot', 
            bert_model="distiluse-base-multilingual-cased")

### Define Evaluation Metrics

In [None]:
#Coherence
npmi = Coherence(texts=dataset.get_corpus())

#Diversity
topic_diversity = TopicDiversity(topk=10)

In [None]:
search_space = {"num_layers": Categorical({1, 2, 3}), 
                "num_neurons": Categorical({100, 200, 300}),
                "activation": Categorical({'relu', 'softplus'}), 
                "dropout": Real(0.0, 0.95)
}

In [None]:
optimization_runs=30
model_runs=1

In [None]:
optimizer=Optimizer()
optimization_result = optimizer.optimize(
    model, dataset, npmi, search_space, number_of_call=optimization_runs, 
    model_runs=model_runs, save_models=True, 
    extra_metrics=None, # to keep track of other metrics
    plot_best_seen=True, plot_model=True, plot_name="B0_plot",
    save_path='results2/test_ctm//')

Current call:  0
Current call:  1
Current call:  2
Current call:  3
Current call:  4
Current call:  5
Current call:  6
Current call:  7
Current call:  8
Current call:  9
Current call:  10
Current call:  11
Current call:  12
Current call:  13
Current call:  14
Current call:  15
Current call:  16
Current call:  17
Current call:  18
Current call:  19
Current call:  20
Current call:  21
Current call:  22
Current call:  23
Current call:  24
Current call:  25
Current call:  26
Current call:  27
Current call:  28
Current call:  29


### Save Results of Optimization

In [None]:
optimization_result.save_to_csv("results_ctm.csv")

### Analysis of the result

In [None]:
import json
res = json.load(open("results2/test_ctm/result.json",'r'))
res.keys()

dict_keys(['dataset_name', 'dataset_path', 'is_cached', 'kernel', 'acq_func', 'surrogate_model', 'optimization_type', 'model_runs', 'save_models', 'save_step', 'save_name', 'save_path', 'early_stop', 'early_step', 'plot_model', 'plot_best_seen', 'plot_name', 'log_scale_plot', 'search_space', 'model_name', 'model_attributes', 'use_partitioning', 'metric_name', 'extra_metric_names', 'metric_attributes', 'extra_metric_attributes', 'current_call', 'number_of_call', 'random_state', 'x0', 'y0', 'n_random_starts', 'initial_point_generator', 'topk', 'time_eval', 'dict_model_runs', 'f_val', 'x_iters'])

In [None]:
res['x_iters']['activation']
res["f_val"]

[-0.012696563489589929,
 0.05897518467201388,
 0.09764432158527746,
 0.03325646498718681,
 -0.010605091138006256,
 -0.013034346318947954,
 0.0001274738384572576,
 0.026845095309759075,
 0.10034473768889023,
 0.0684881494880597,
 0.019186981499777364,
 0.07814368834514229,
 0.09009253740505901,
 0.13083181181227352,
 0.079152198172026,
 0.0770071020984817,
 0.12020691795739855,
 0.07501535182346523,
 0.07204116320827589,
 0.07955485832140077,
 0.10674771374324746,
 0.0770850920407112,
 0.09114470531326471,
 0.12135515938870212,
 0.08592797544504167,
 0.06275972380628435,
 0.10379583911538526,
 0.1182030964387675,
 0.0508371731843613,
 0.09071049920663729]

In [None]:
import matplotlib.pyplot as plt

plt.plot(res["f_val"])

[<matplotlib.lines.Line2D at 0x7f9f1666e950>]

### Get hyperparameters

In [None]:
model.hyperparameters


{'activation': 'softplus',
 'batch_size': 64,
 'bert_model': 'distiluse-base-multilingual-cased',
 'bert_path': '',
 'dropout': 0.6528626452408228,
 'hidden_sizes': (100, 100, 100),
 'inference_type': 'zeroshot',
 'learn_priors': True,
 'lr': 0.002,
 'model_type': 'prodLDA',
 'momentum': 0.99,
 'num_epochs': 30,
 'num_layers': 3,
 'num_neurons': 100,
 'num_samples': 10,
 'num_topics': 10,
 'prior_mean': 0.0,
 'prior_variance': None,
 'reduce_on_plateau': False,
 'solver': 'adam'}