In [12]:
class LDAmodeller:
  def __init__(self, df, vectorizer, stops=stops, min_df=2, max_df=0.95, num_topics=5, lda_random_state=12345, max_iter=100, topic_word_prior=0.6):
    self.df = df
    
    if vectorizer == 'count':
      self.vectorizer = CountVectorizer(stop_words = list(stops),
                                        min_df = min_df,
                                        max_df = max_df)
      
    elif vectorizer == 'tfidf':
      self.vectorizer = TfidfVectorizer(stop_words = list(stops), 
                                max_df = max_df, 
                                min_df = min_df, 
                                use_idf = True,
                                norm = None)
      
    else:
      raise ValueError('The vectorizer value can be either "count" or "tfidf"')
    
    
    
    self.lda = LatentDirichletAllocation(n_components = num_topics,
                                         random_state = lda_random_state,
                                         max_iter = max_iter,
                                         topic_word_prior = topic_word_prior)
    
    self.vectorized_sentences = None
    self.new_vectorized_sentences = None
  
  def vectorize(self):
    self.new_vectorized_sentences = self.vectorizer.fit_transform(self.df['text'])
    return self.new_vectorized_sentences
    
  
  def vectorize_fit(self):
    self.vectorized_sentences = self.vectorizer.fit_transform(self.df['text'])
    self.lda.fit(self.vectorized_sentences)
    return self.lda
  
  def transform(self, sentences=None):
    if sentences is None:
      vectors = self.lda.transform(self.vectorized_sentences)
    else:
      vectors = self.lda.transform(sentences)
    return vectors