In [19]:
from functools import wraps


def singledispatch(fn):
  registry = {}
  
  def decorated_function(self, lda_modeller, vocab, words, id2word):
    my_func = registry.get(self.lib, None)
    if my_func is None:
      result = my_func(self, lda_modeller, vocab, words, id2word)
    return result
    
  def register(libr_str):
    def inner(method):
      registry[libr_str] = method
      return method
    return inner
  
  decorated_function.register = register
  decorated_function.registry = registry
  
  def dispatch(libr_str):
    return registry.get(libr_str, None)
    
  decorated_function.dispatch = dispatch

  
  return decorated_function

In [20]:
class ModelSelector:
  def __init__(self, vectorizer, lib, df=df, 
               limit=10, start=2, step=1, passes=200, iterations=200, 
               per_word_topics=False, alpha=8, eta=0.9, gamma_threshold=8, 
               verbose=True, lda_random_state = 12345):
    
    self.df = df
    self.limit = limit
    self.start = start
    self.step = step
    self.vectorizer = vectorizer
    self.lib = lib
    self.passes = passes
    self.iterations = iterations
    self.per_word_topics = per_word_topics
    self.alpha = alpha
    self.eta = eta
    self.gamma_threshold = gamma_threshold
    self.verbose = verbose
    self.lda_random_state = lda_random_state
    self.model_list = []
    self.coherence_list = []
    self.perplexity_list = []
    self.log_likelihood_list = []
    
  @singledispatch
  def calculate_metrics(self, lda_modeller, vocab, words, id2word):
    return None
    
  @calculate_metrics.register('skl')
  def calculate_metrics_lda_skl(self, lda_modeller, vocab, words, id2word):
    
    topics = []
    for i in range(lda_modeller.lda_model.n_components):
      topic_words = [vocab[j] for j in lda_modeller.lda_model.components_[i].argsort()[:-10 - 1:-1]]
      topics.append(topic_words)
      
    coherencemodel = CoherenceModel(topics=topics, texts=words, dictionary=id2word, coherence='c_v')
    self.coherence_list.append(coherencemodel.get_coherence())
      
    perplexity = np.log(lda_modeller.lda_model.perplexity(lda_modeller.vectorized_sentences))
    self.perplexity_list.append(perplexity)
        
    log_likelihood = lda_modeller.lda_model.score(lda_modeller.vectorized_sentences)
    self.log_likelihood_list.append(log_likelihood)
    

  def calculate_metrics_lda_gensim(self, lda_modeller, vocab, words, id2word):
    coherencemodel = CoherenceModel(model=lda_modeller.lda_model, texts=words, dictionary=id2word, coherence='c_v')
    self.coherence_list.append(coherencemodel.get_coherence())
        
    perplexity = lda_modeller.lda_model.log_perplexity(lda_modeller.corpus)
    self.perplexity_list.append(perplexity)
    
  def plot_metrics_lda_skl(self):
    
    fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(15, 4))
    
    x = range(self.start, self.limit, self.step)
    
    axs[0].plot(x, self.coherence_list)
    axs[0].set_title('Coherence')
    axs[0].set_xlabel('Num Topics')
    axs[0].set_ylabel('Coherence Score')

    axs[1].plot(x, self.perplexity_list)
    axs[1].set_title('Perplexity')
    axs[1].set_xlabel('Num Topics')
    axs[1].set_ylabel('Perplexity Score')

    axs[2].plot(x, self.log_likelihood_list)
    axs[2].set_title('Log likelihood')
    axs[2].set_xlabel('Num Topics')
    axs[2].set_ylabel('Log-likelihood Score')
    
    
  def plot_metrics_lda_gensim(self):
    fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(15, 4))
    
    x = range(self.start, self.limit, self.step)
    
    axs[0].plot(x, self.coherence_list)
    axs[0].set_title('Coherence')
    axs[0].set_xlabel('Num Topics')
    axs[0].set_ylabel('Coherence Score')

    axs[1].plot(x, self.perplexity_list)
    axs[1].set_title('Perplexity')
    axs[1].set_xlabel('Num Topics')
    axs[1].set_ylabel('Perplexity Score')
    

  
  def plot_metrics(self):
    registry = {'skl': self.plot_metrics_lda_skl,
                'gensim': self.plot_metrics_lda_gensim}
    
    fn = registry[self.lib]
    return fn()
  
  

  
  
  
  @time_dec
  def __call__(self):
    
    for num_topics in tqdm(range(self.start, self.limit, self.step)):
      lda_modeller = LDAmodeller(df = self.df, 
                                 vectorizer = self.vectorizer, 
                                 lib = self.lib, 
                                 num_topics = num_topics, 
                                 verbose = False,
                                 alpha = self.alpha,
                                 eta = self.eta,
                                 lda_random_state = self.lda_random_state,
                                 gamma_threshold = self.gamma_threshold
                                 )
      
      lda_modeller.train_lda()
      self.model_list.append(lda_modeller.lda_model)
      vocab, words, id2word = coherence_perp_logl(lda_modeller = lda_modeller, verbose = False
                                                 )
      
      self.calculate_metrics(lda_modeller=lda_modeller, vocab=vocab, words=words, id2word=id2word)

    self.plot_metrics()

    return self.model_list, self.coherence_list, self.perplexity_list, self.log_likelihood_list

NameError: name 'df' is not defined