In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os
import time
import artm

# change log style
lc = artm.messages.ConfigureLoggingArgs()
lc.minloglevel = 3
lib = artm.wrapper.LibArtm(logging_config=lc)

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [2]:
from copy import deepcopy
from topicnet.cooking_machine.models.topic_model import TopicModel
from topicnet.cooking_machine.cubes import RegularizersModifierCube

from topicnet.cooking_machine.experiment import Experiment
from topicnet.cooking_machine.cubes import *
from topicnet.cooking_machine.dataset import Dataset

%load_ext autoreload
%autoreload 2

In [3]:
import topicnet.cooking_machine.cubes as tncubes

In [4]:
from IPython.core.display import display, HTML
from IPython.display import clear_output, display_html
display(HTML("""<style>
.container { width:90% !important; }
div.output_scroll .output_subarea { white-space: pre; }
</style>"""))

### Get dataset

In [5]:
DATA_PATH = "/home/sultan/datasets/PScience/PScience.csv"

In [6]:
dataset = Dataset(DATA_PATH)
dictionary = dataset.get_dictionary()

### Make initial model

In [7]:
from topicnet.cooking_machine.model_constructor import init_simple_default_model

n_topics = 200
n_background = 10
model_artm = init_simple_default_model(
    dataset=dataset,
    modalities_to_use=dataset.get_possible_modalities(),
    main_modality='@word',
    specific_topics=n_topics - n_background,
    background_topics=n_background,
)
specific_topics, background_topics = model_artm.topic_names[:-n_background], model_artm.topic_names[-n_background:]
model_artm.scores

[PerplexityScore@all, SparsityThetaScore, SparsityPhiScore@word, PerplexityScore@word, TopicKernel@word]

In [8]:
model_artm.regularizers.add(artm.DecorrelatorPhiRegularizer(
        name="decorrelation_phi",
        topic_names=specific_topics,
        tau=1,
        class_ids=['{modality}']))
model_artm.regularizers.add(artm.SmoothSparsePhiRegularizer(
        name="smooth_phi",
        topic_names=specific_topics,
        tau=1,
        class_ids=['{modality}']))
model_artm.regularizers.add(artm.SmoothSparseThetaRegularizer(
        name="smooth_theta",
        topic_names=specific_topics,
        tau=1))

### Define Topic Model

In [9]:
from topicnet.cooking_machine.models.blei_lafferty_score import BleiLaffertyScore

In [10]:
model = TopicModel(model_artm, model_id='MuratDemo', scores=BleiLaffertyScore(num_top_tokens=15))

In [13]:
from topicnet.cooking_machine.experiment import Experiment
if os.path.exists(f'./ExploratoryResearchDemo') == True:
    os.system("rm -rf ./ExploratoryResearchDemo")
experiment = Experiment(experiment_id='ExploratoryResearchDemo', save_path='ExploratoryResearchDemo', topic_model=model)

### Cubes

In [11]:
cube_0 = RegularizersModifierCube(
    num_iter=8,
    reg_search='mul',
    regularizer_parameters=[{
            'name':"decorrelation_phi"}],
    verbose=False,
    strategy=PerplexityStrategy(
        start_point=100000,
        step=10,
        max_len=6),
    tracked_score_function=retrieve_score_for_strategy('PerplexityScore@all'),
    relative_coefficients=False)
        
cube_1 = RegularizersModifierCube(
    num_iter=8,
    reg_search='add',
    strategy=PerplexityStrategy(
            start_point=0.25,
            step=0.25,
            max_len=6),
    regularizer_parameters={
        'name':"smooth_phi"},
    # parameters of this strategy are intended for revision
    tracked_score_function=retrieve_score_for_strategy('PerplexityScore@all'),
    verbose=False,
    relative_coefficients=False)

cube_2 = RegularizersModifierCube(
    num_iter=8,
    reg_search='add',
    strategy=PerplexityStrategy(
            start_point=-0.5,
            step=-0.5,
            max_len=6),
    regularizer_parameters={
        'name':"sparse_theta"},
    # parameters of this strategy are intended for revision
    tracked_score_function=retrieve_score_for_strategy('PerplexityScore@all'),
    verbose=False,
    relative_coefficients=False)

In [16]:
start_time = time.time()
models = cube_0(model, dataset)
print("--- %s seconds ---" % (time.time() - start_time))



--- 102.20537781715393 seconds ---


In [18]:
models = experiment.select(f"PerplexityScore@all < 1.01 * MINIMUM(PerplexityScore@all) and BleiLaffertyScore -> max")

In [14]:
start_time = time.time()
models = cube_1(models, dataset)
print("--- %s seconds ---" % (time.time() - start_time))



--- 321.52917313575745 seconds ---


In [15]:
models = experiment.select(f'PerplexityScore@all < 1.01 * MINIMUM(PerplexityScore@all) and BleiLaffertyScore -> max')

ValueError: There is no BleiLaffertyScore metric for model ##11h11m03s_11d11m2019y###.
The following scores are available: ['PerplexityScore@all', 'SparsityThetaScore', 'SparsityPhiScore@word', 'PerplexityScore@word', 'TopicKernel@word.average_coherence', 'TopicKernel@word.average_contrast', 'TopicKernel@word.average_purity', 'TopicKernel@word.average_size', 'TopicKernel@word.coherence', 'TopicKernel@word.contrast', 'TopicKernel@word.purity', 'TopicKernel@word.size', 'TopicKernel@word.tokens']