# Load Data

In [1]:
import re
import ssl

import pandas as pd
import numpy as np
import tqdm
from scipy.stats import chi2_contingency

import nltk
from nltk.corpus import stopwords
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize

ssl._create_default_https_context = ssl._create_unverified_context
nltk.download('stopwords', quiet=True)
nltk.download('wordnet', quiet=True)

from langdetect import detect

import gensim
import gensim.corpora as corpora
from gensim.utils import simple_preprocess
from gensim.models import CoherenceModel

import spacy

from pprint import pprint


In [2]:
# load data
lords = pd.read_csv('/Users/javier/Desktop/cmgt-597/steam-review-data/lords_df.csv') # Lords of the Fallen
lords.shape

(14509, 8)

In [3]:
# filter helpful and actual reviews
# note: weighted_vote_score measures how helpful a review is. so reviews with a weighted_vote_score of 0 are filtered out

lords_df = lords[(lords['weighted_vote_score'] != 0.0) & # helpful reviews only
             (lords['playtime_at_review'] != 0.0) & # actual reviews only (can't review a game you never played!)
             (lords['review'].notnull()) # reviews with words only
             ]

lords_reviews = lords_df[['recommendationid', 'review', 'voted_up']]
lords_reviews

Unnamed: 0,recommendationid,review,voted_up
0,148136042,Having issues with performance? \n\nChange bot...,True
1,148896243,"This is a 'Souls-Like' through and through, if...",True
2,148457883,Giving this a positive review because it's gen...,True
3,148328489,This game is honestly way better than I origin...,True
4,148383765,This game has some of the most incredible vist...,True
...,...,...,...
6312,148177186,This game is absolute trash..\n\nYou should pl...,False
6313,148157180,after hour and half to learn controls and play...,True
6314,149433369,I'm gooning in Umbral Realm,True
6315,148133087,"released 5 minutes late, actually seething",False


# Text Prep

In [4]:
# First tokenize each sentence into a list of words, removing punctuations and unnecessary characters altogether

def sent_to_words(sentences): # define function sent_to_words (sentences to words), argument is the dataframe and column of your reviews.
    for sentence in sentences: # for every sentence (review) in the sentences (review df)
        yield(gensim.utils.simple_preprocess(str(sentence), deacc=True))  # deacc=True removes punctuations  | # use the gensim package to tokenize your reviews

lords_reviews['data_words'] = list(sent_to_words(lords_reviews['review'])) # declare a new column in the lop_review df called 'data_words'
                                                                        # then use the sent_to_words function to tokenize the review column of the lop_review df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lords_reviews['data_words'] = list(sent_to_words(lords_reviews['review'])) # declare a new column in the lop_review df called 'data_words'


In [5]:
# After tokenization remove stoword and apply lemmatization 

stop_words = stopwords.words('english') # declare english as the stopword language
# stop_words.extend(['app', 'bank', 'account','try','issue','problem', 'face'])  #adding my own stop words

def process_words(texts, stop_words=stop_words, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV']): # define function process_words. arguments are the column of the tokenized words,
                                                                                                 # stop words library, allowed_postags
    # remove stop words
    texts = [[word for word in simple_preprocess(str(doc)) if word not in stop_words] for doc in texts] # remove stop words from tokenized reviews
    texts_out = [] # define variable texts_out. it is an empty list.

    # Lemmatization
    nlp = spacy.load("en_core_web_sm",disable=['parser', 'ner']) # define variable nlp. this is for lemmatization
    for sent in texts: # for every sentence in texts (review df)
        doc = nlp(" ".join(sent)) # declare variable doc
        texts_out.append([token.lemma_ for token in doc if token.pos_ in allowed_postags]) # append the lemmatized words to the texts_out empty list
    
    # remove stopwords once more after lemmatization
    texts_out = [[word for word in simple_preprocess(str(doc)) if word not in stop_words] for doc in texts_out] # remove stopwords again
    return texts_out # return the texts_out list

lords_reviews['data_ready'] = process_words(lords_reviews['data_words'])  # processed Text Data!

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lords_reviews['data_ready'] = process_words(lords_reviews['data_words'])  # processed Text Data!


In [6]:
# cleaning the reviews further to remove non-English text

def detect_my(text):
    try:
        return detect(text)
    except:
        return 'unknown'
    
lords_reviews['language'] = lords_reviews['review'].apply(detect_my)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lords_reviews['language'] = lords_reviews['review'].apply(detect_my)


In [7]:
lords_reviews

Unnamed: 0,recommendationid,review,voted_up,data_words,data_ready,language
0,148136042,Having issues with performance? \n\nChange bot...,True,"[having, issues, with, performance, change, bo...","[issue, performance, change, reflection, globa...",en
1,148896243,"This is a 'Souls-Like' through and through, if...",True,"[this, is, souls, like, through, and, through,...","[soul, willing, take, leap, faith, good, tackl...",en
2,148457883,Giving this a positive review because it's gen...,True,"[giving, this, positive, review, because, it, ...","[give, positive, review, genuinely, great, als...",en
3,148328489,This game is honestly way better than I origin...,True,"[this, game, is, honestly, way, better, than, ...","[game, honestly, way, well, originally, expect...",en
4,148383765,This game has some of the most incredible vist...,True,"[this, game, has, some, of, the, most, incredi...","[game, incredible, vista, see, video, game, co...",en
...,...,...,...,...,...,...
6312,148177186,This game is absolute trash..\n\nYou should pl...,False,"[this, game, is, absolute, trash, you, should,...","[game, absolute, trash, play, literally, game]",en
6313,148157180,after hour and half to learn controls and play...,True,"[after, hour, and, half, to, learn, controls, ...","[hour, half, learn, control, playing, go, love]",en
6314,149433369,I'm gooning in Umbral Realm,True,"[gooning, in, umbral, realm]","[goone, umbral, realm]",tl
6315,148133087,"released 5 minutes late, actually seething",False,"[released, minutes, late, actually, seething]","[release, minute, late, actually, seethe]",en


In [8]:
drop_languages = ['ar', 'fa', 'hi', 'hu', 'ko', 'pt', 'ru', 'uk', 'zh-cn'] # these review tagged with these languages are not in English

lords_reviews = lords_reviews[(~lords_reviews['language'].isin(drop_languages)) &
                          ~((lords_reviews['language'] == 'unknown') & (lords_reviews['data_ready'].apply(len) == 0))
                          ]

lords_reviews = lords_reviews.drop(columns='language')

In [9]:
lords_reviews

Unnamed: 0,recommendationid,review,voted_up,data_words,data_ready
0,148136042,Having issues with performance? \n\nChange bot...,True,"[having, issues, with, performance, change, bo...","[issue, performance, change, reflection, globa..."
1,148896243,"This is a 'Souls-Like' through and through, if...",True,"[this, is, souls, like, through, and, through,...","[soul, willing, take, leap, faith, good, tackl..."
2,148457883,Giving this a positive review because it's gen...,True,"[giving, this, positive, review, because, it, ...","[give, positive, review, genuinely, great, als..."
3,148328489,This game is honestly way better than I origin...,True,"[this, game, is, honestly, way, better, than, ...","[game, honestly, way, well, originally, expect..."
4,148383765,This game has some of the most incredible vist...,True,"[this, game, has, some, of, the, most, incredi...","[game, incredible, vista, see, video, game, co..."
...,...,...,...,...,...
6312,148177186,This game is absolute trash..\n\nYou should pl...,False,"[this, game, is, absolute, trash, you, should,...","[game, absolute, trash, play, literally, game]"
6313,148157180,after hour and half to learn controls and play...,True,"[after, hour, and, half, to, learn, controls, ...","[hour, half, learn, control, playing, go, love]"
6314,149433369,I'm gooning in Umbral Realm,True,"[gooning, in, umbral, realm]","[goone, umbral, realm]"
6315,148133087,"released 5 minutes late, actually seething",False,"[released, minutes, late, actually, seething]","[release, minute, late, actually, seethe]"


# Base LDA Model

In [10]:
# Create the Dictionary and Corpus needed for Topic Modelling


#  Dictionary
id2word = corpora.Dictionary(lords_reviews['data_ready'])

#  Corpus: Term Document Frequency
corpus = [id2word.doc2bow(text) for text in lords_reviews['data_ready']]

In [11]:
# Build LDA model
lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus,
                                            id2word=id2word,
                                            num_topics=10, 
                                            random_state=100,
                                            chunksize=100,
                                            passes=10,
                                            per_word_topics=True)

pprint(lda_model.print_topics())

[(0,
  '0.106*"ring" + 0.100*"elden" + 0.099*"soul" + 0.097*"game" + 0.068*"good" + '
  '0.059*"dark" + 0.036*"bad" + 0.035*"play" + 0.032*"well" + 0.025*"buy"'),
 (1,
  '0.006*"seemless" + 0.006*"meaning" + 0.005*"working" + 0.001*"candy" + '
  '0.000*"tomorrow" + 0.000*"rad" + 0.000*"bb" + 0.000*"fecking" + '
  '0.000*"rollback" + 0.000*"sack"'),
 (2,
  '0.018*"feel" + 0.017*"attack" + 0.016*"lock" + 0.013*"control" + '
  '0.012*"garbage" + 0.011*"combat" + 0.011*"character" + 0.010*"make" + '
  '0.010*"animation" + 0.010*"hit"'),
 (3,
  '0.091*"fall" + 0.072*"lord" + 0.035*"soul" + 0.020*"experience" + '
  '0.015*"title" + 0.015*"dark" + 0.015*"genre" + 0.012*"love" + 0.011*"game" '
  '+ 0.010*"challenge"'),
 (4,
  '0.073*"op" + 0.057*"co" + 0.041*"friend" + 0.040*"host" + '
  '0.027*"multiplayer" + 0.027*"player" + 0.026*"coop" + 0.026*"play" + '
  '0.021*"get" + 0.018*"seamless"'),
 (5,
  '0.114*"game" + 0.026*"play" + 0.017*"get" + 0.015*"run" + 0.015*"issue" + '
  '0.014*"review

In [12]:
print('\nPerplexity: ', lda_model.log_perplexity(corpus))  # a measure of how good the model is. lower the better.

coherence_model_lda = CoherenceModel(model=lda_model, texts=lords_reviews['data_ready'], dictionary=id2word, coherence='c_v')
coherence_lda = coherence_model_lda.get_coherence()
print('\nCoherence Score: ', coherence_lda)


Perplexity:  -7.467307780209206

Coherence Score:  0.4109652903667495


# Hyperparameter Tuning

In [None]:
def compute_coherence_values(corpus, dictionary, k, a, b):
    
    lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus, # changed the LDA method to Rolly John's
                                                id2word=dictionary,
                                                num_topics=k, 
                                                random_state=100,
                                                chunksize=100,
                                                passes=10,
                                                alpha=a,
                                                eta=b)
    
    coherence_model_lda = CoherenceModel(model=lda_model, texts=lords_reviews['data_ready'], dictionary=id2word, coherence='c_v')
    
    return coherence_model_lda.get_coherence()

In [None]:
grid = {}
grid['Validation_Set'] = {}

# Topics range
min_topics = 2
max_topics = 11
step_size = 1
topics_range = range(min_topics, max_topics, step_size)

# Alpha parameter
alpha = list(np.arange(0.01, 1, 0.3))
alpha.append('symmetric')
alpha.append('asymmetric')

# Beta parameter
beta = list(np.arange(0.01, 1, 0.3))
beta.append('symmetric')

# Validation sets
num_of_docs = len(corpus)
corpus_sets = [gensim.utils.ClippedCorpus(corpus, int(num_of_docs*0.75)), 
               corpus]

corpus_title = ['75% Corpus', '100% Corpus']

model_results = {'Validation_Set': [],
                 'Topics': [],
                 'Alpha': [],
                 'Beta': [],
                 'Coherence': []
                }

# Can take a long time to run
if 1 == 1:
    pbar = tqdm.tqdm(total=(len(beta)*len(alpha)*len(topics_range)*len(corpus_title)))
    
    # iterate through validation corpuses
    for i in range(len(corpus_sets)):
        # iterate through number of topics
        for k in topics_range:
            # iterate through alpha values
            for a in alpha:
                # iterare through beta values
                for b in beta:
                    # get the coherence score for the given parameters
                    cv = compute_coherence_values(corpus=corpus_sets[i], dictionary=id2word, 
                                                  k=k, a=a, b=b)
                    # Save the model results
                    model_results['Validation_Set'].append(corpus_title[i])
                    model_results['Topics'].append(k)
                    model_results['Alpha'].append(a)
                    model_results['Beta'].append(b)
                    model_results['Coherence'].append(cv)
                    
                    pbar.update(1)

    # pd.DataFrame(model_results).to_csv('./results/lda_tuning_results.csv', index=False)
                    
    lda_tuning_results = pd.DataFrame(model_results)

    file_name = 'lords-lda-tuning-results.csv'

    lda_tuning_results.to_csv(f'{file_name}', index = False)
    output_filepath = f'/Users/javier/Desktop/cmgt-597/lords-of-the-fallen/{file_name}'
    lda_tuning_results.to_csv(output_filepath, index = False)
    
    pbar.close()

In [None]:
lda_tuning_results = pd.read_csv('/Users/javier/Desktop/cmgt-597/lords-of-the-fallen/lords-lda-tuning-results.csv')
lda_tuning_results

# Final Model

In [13]:
lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus,
                                            id2word=id2word,
                                            num_topics=9, 
                                            random_state=100,
                                            chunksize=100,
                                            passes=10,
                                            alpha=0.01,
                                            eta=0.6)
                                            #the alpha and beta will yield the highest coherence score, according to the tuning results
                                            

pprint(lda_model.print_topics())

[(0,
  '0.003*"poop" + 0.002*"bucket" + 0.001*"cheek" + 0.001*"pet" + 0.001*"dog" + '
  '0.001*"fever" + 0.001*"mucho" + 0.001*"tiene" + 0.001*"wah" + 0.001*"werk"'),
 (1,
  '0.005*"grind" + 0.003*"average" + 0.003*"paint" + 0.003*"dry" + '
  '0.003*"long" + 0.002*"spare" + 0.002*"brain" + 0.002*"adult" + 0.002*"boi" '
  '+ 0.002*"easy"'),
 (2,
  '0.010*"cancer" + 0.005*"disorder" + 0.004*"disease" + 0.004*"syndrome" + '
  '0.003*"anal" + 0.002*"infection" + 0.001*"prostate" + 0.001*"personality" + '
  '0.001*"cold" + 0.001*"fatigue"'),
 (3,
  '0.004*"brown" + 0.004*"pink" + 0.002*"birth" + 0.002*"unclean" + '
  '0.002*"atmospheric" + 0.002*"rpgs" + 0.002*"strategic" + 0.001*"depth" + '
  '0.001*"glory" + 0.001*"shave"'),
 (4,
  '0.048*"op" + 0.039*"co" + 0.027*"host" + 0.013*"seamless" + 0.012*"friend" '
  '+ 0.006*"coop" + 0.005*"item" + 0.005*"guest" + 0.004*"loot" + '
  '0.004*"partner"'),
 (5,
  '0.083*"game" + 0.021*"play" + 0.013*"good" + 0.013*"get" + 0.013*"soul" + '
  '0.009*

In [14]:
# Final Model Evaluation
print('\nPerplexity: ', lda_model.log_perplexity(corpus))  # a measure of how good the model is. lower the better.

coherence_model_lda = CoherenceModel(model=lda_model, texts=lords_reviews['data_ready'], dictionary=id2word, coherence='c_v')
coherence_lda = coherence_model_lda.get_coherence()
print('\nCoherence Score: ', coherence_lda)


Perplexity:  -7.164313506560036

Coherence Score:  0.5271473191212089


# Topic-Review Allocation

In [15]:
def format_topics_sentences(ldamodel=None, corpus=None, texts=None):
    # Initialize an empty list to store data
    data = []

    # Get main topic in each document
    for i, row_list in enumerate(ldamodel[corpus]):
        row = row_list[0] if ldamodel.per_word_topics else row_list            
        row = sorted(row, key=lambda x: (x[1]), reverse=True)
        
        # Initialize variables to store topic information
        dominant_topic = None
        perc_contribution = None
        topic_keywords = None
        
        # Loop through topics in the row
        for j, (topic_num, prop_topic) in enumerate(row):
            if j == 0:  # => dominant topic
                wp = ldamodel.show_topic(topic_num)
                topic_keywords = ", ".join([word for word, prop in wp])
                dominant_topic = int(topic_num)
                perc_contribution = round(prop_topic, 4)
                break
        
        # Store the extracted information
        data.append([dominant_topic, perc_contribution, topic_keywords])

    # Convert the list of data into a DataFrame
    sent_topics_df = pd.DataFrame(data, columns=['Dominant_Topic', 'Perc_Contribution', 'Topic_Keywords'])

    # Add original text to the end of the output
    contents = pd.Series(texts)
    contents.reset_index(drop=True, inplace=True)
    sent_topics_df = pd.concat([sent_topics_df, contents], axis=1)
    return sent_topics_df

# Call the function with appropriate arguments
df_topic_sents_keywords = format_topics_sentences(ldamodel=lda_model, corpus=corpus, texts=lords_reviews.review)

# Format the DataFrame
df_dominant_topic = df_topic_sents_keywords.reset_index()
df_dominant_topic.columns = ['Document_No', 'Dominant_Topic', 'Topic_Perc_Contrib', 'Keywords', 'review']

# Display the first 5 rows of the DataFrame
display(df_dominant_topic.head(5))


Unnamed: 0,Document_No,Dominant_Topic,Topic_Perc_Contrib,Keywords,review
0,0,5,0.9947,"game, play, good, get, soul, run, issue, perfo...",Having issues with performance? \n\nChange bot...
1,1,5,0.5895,"game, play, good, get, soul, run, issue, perfo...","This is a 'Souls-Like' through and through, if..."
2,2,5,0.9953,"game, play, good, get, soul, run, issue, perfo...",Giving this a positive review because it's gen...
3,3,8,0.9988,"game, enemy, boss, get, feel, make, soul, time...",This game is honestly way better than I origin...
4,4,8,0.8843,"game, enemy, boss, get, feel, make, soul, time...",This game has some of the most incredible vist...


In [16]:
lords_reviews = df_dominant_topic.merge(lords_reviews, how='left')
lords_reviews.drop_duplicates(subset=['Document_No'], keep='first', inplace=True)

In [17]:
lords_reviews = lords_reviews[['Document_No', 'review', 'data_words', 'data_ready', 'Keywords', 'Dominant_Topic', 'Topic_Perc_Contrib', 'voted_up']]

In [18]:
# final dataframe
lords_reviews

Unnamed: 0,Document_No,review,data_words,data_ready,Keywords,Dominant_Topic,Topic_Perc_Contrib,voted_up
0,0,Having issues with performance? \n\nChange bot...,"[having, issues, with, performance, change, bo...","[issue, performance, change, reflection, globa...","game, play, good, get, soul, run, issue, perfo...",5,0.9947,True
1,1,"This is a 'Souls-Like' through and through, if...","[this, is, souls, like, through, and, through,...","[soul, willing, take, leap, faith, good, tackl...","game, play, good, get, soul, run, issue, perfo...",5,0.5895,True
2,2,Giving this a positive review because it's gen...,"[giving, this, positive, review, because, it, ...","[give, positive, review, genuinely, great, als...","game, play, good, get, soul, run, issue, perfo...",5,0.9953,True
3,3,This game is honestly way better than I origin...,"[this, game, is, honestly, way, better, than, ...","[game, honestly, way, well, originally, expect...","game, enemy, boss, get, feel, make, soul, time...",8,0.9988,True
4,4,This game has some of the most incredible vist...,"[this, game, has, some, of, the, most, incredi...","[game, incredible, vista, see, video, game, co...","game, enemy, boss, get, feel, make, soul, time...",8,0.8843,True
...,...,...,...,...,...,...,...,...
6632,6220,This game is absolute trash..\n\nYou should pl...,"[this, game, is, absolute, trash, you, should,...","[game, absolute, trash, play, literally, game]","game, play, good, get, soul, run, issue, perfo...",5,0.9869,False
6633,6221,after hour and half to learn controls and play...,"[after, hour, and, half, to, learn, controls, ...","[hour, half, learn, control, playing, go, love]","game, play, good, get, soul, run, issue, perfo...",5,0.9887,True
6634,6222,I'm gooning in Umbral Realm,"[gooning, in, umbral, realm]","[goone, umbral, realm]","game, enemy, boss, get, feel, make, soul, time...",8,0.9741,True
6635,6223,"released 5 minutes late, actually seething","[released, minutes, late, actually, seething]","[release, minute, late, actually, seethe]","game, play, good, get, soul, run, issue, perfo...",5,0.9843,False


In [None]:
file_name = 'lords-lda.csv'

lords_reviews.to_csv(f'{file_name}', index = False)
output_filepath = f'/Users/javier/Desktop/cmgt-597/lords-of-the-fallen/{file_name}'
lords_reviews.to_csv(output_filepath, index = False)

# Data Analysis

In [19]:
lords_lda_final = pd.read_csv('/Users/javier/Desktop/cmgt-597/lords-of-the-fallen/lords-lda.csv')
lords_lda_final

Unnamed: 0,Document_No,review,data_words,data_ready,Keywords,Dominant_Topic,Topic_Perc_Contrib,voted_up
0,0,Having issues with performance? \n\nChange bot...,"['having', 'issues', 'with', 'performance', 'c...","['issue', 'performance', 'change', 'reflection...","fps, setting, run, hate, ultra, high, low, gra...",2,0.8939,True
1,1,"This is a 'Souls-Like' through and through, if...","['this', 'is', 'souls', 'like', 'through', 'an...","['soul', 'willing', 'take', 'leap', 'faith', '...","game, enemy, boss, soul, get, feel, good, make...",6,0.8535,True
2,2,Giving this a positive review because it's gen...,"['giving', 'this', 'positive', 'review', 'beca...","['give', 'positive', 'review', 'genuinely', 'g...","game, play, get, crash, review, good, issue, b...",5,0.9953,True
3,3,This game is honestly way better than I origin...,"['this', 'game', 'is', 'honestly', 'way', 'bet...","['game', 'honestly', 'way', 'well', 'originall...","game, enemy, boss, soul, get, feel, good, make...",6,0.9988,True
4,4,This game has some of the most incredible vist...,"['this', 'game', 'has', 'some', 'of', 'the', '...","['game', 'incredible', 'vista', 'see', 'video'...","game, enemy, boss, soul, get, feel, good, make...",6,0.8887,True
...,...,...,...,...,...,...,...,...
6220,6220,This game is absolute trash..\n\nYou should pl...,"['this', 'game', 'is', 'absolute', 'trash', 'y...","['game', 'absolute', 'trash', 'play', 'literal...","game, play, get, crash, review, good, issue, b...",5,0.9869,False
6221,6221,after hour and half to learn controls and play...,"['after', 'hour', 'and', 'half', 'to', 'learn'...","['hour', 'half', 'learn', 'control', 'playing'...","game, enemy, boss, soul, get, feel, good, make...",6,0.9887,True
6222,6222,I'm gooning in Umbral Realm,"['gooning', 'in', 'umbral', 'realm']","['goone', 'umbral', 'realm']","game, enemy, boss, soul, get, feel, good, make...",6,0.9741,True
6223,6223,"released 5 minutes late, actually seething","['released', 'minutes', 'late', 'actually', 's...","['release', 'minute', 'late', 'actually', 'see...","game, play, get, crash, review, good, issue, b...",5,0.7994,False


In [20]:
# distribution of topics across reviews

topic_count = lords_lda_final['Dominant_Topic'].value_counts()
topic_count

Dominant_Topic
6    3532
5    2316
2     169
0      97
3      44
1      23
8      17
7      15
4      12
Name: count, dtype: int64

In [21]:
# Visualize topics

import pyLDAvis
import pyLDAvis.gensim

pyLDAvis.enable_notebook()
vis = pyLDAvis.gensim.prepare(lda_model, corpus, id2word)
vis

---