In [None]:
!pip install -U pip setuptools wheel
!pip install -U spacy

In [None]:
!pip install spacy download en_core_web_sm

In [None]:
!pip install -U gensim

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import spacy
import random

from gensim import models,corpora
from gensim import similarities
from gensim.models.coherencemodel import CoherenceModel
from wordcloud import WordCloud

from spacy.lang.en import stop_words
from gensim import corpora


In [None]:
with open("/kaggle/input/cnn-articles-text-lda/cnn_articles.txt","r") as f:
    articles=f.read().split("@delimiter")

In [None]:
print("Length of articles is: ",len(articles))

In [None]:
rand_i=random.sample(articles,1)
rand_i

In [None]:
dataset=articles[:20000]

In [None]:
len(dataset)

In [None]:
nlp = spacy.load('en_core_web_sm',disable=["parser","ner"])
nlp.Defaults.stop_words.add("say")
def basic_filter(tokenized_doc):
    return [t.lemma_ for t in tokenized_doc if t.is_alpha and not t.is_punct and not t.is_space and t not in stop_words and t.pos_ in ["NOUN","VERB","ADJ"]]

In [None]:
tokenized_articles=list(map(basic_filter,nlp.pipe(dataset,n_process=4)))

In [None]:
len(tokenized_articles[5])

In [None]:
NUM_TOPICS=20

In [None]:
%%time

dictionary=corpora.Dictionary(tokenized_articles)

In [None]:
sample_token="news"

In [None]:
print(f"Id for \'{sample_token}\' token: {dictionary.token2id[sample_token]}")

In [None]:
%%time
corpus_bow=[dictionary.doc2bow(article) for article in tokenized_articles]

In [None]:
print(len(corpus_bow))

In [None]:
%%time
ldamodel=models.LdaModel(corpus=corpus_bow,
                                num_topics=NUM_TOPICS,
                                id2word=dictionary,
                                random_state=1)

In [None]:
ldamodel.print_topics()

In [None]:
len(dictionary)

In [None]:
dictionary.filter_extremes(no_below=5,no_above=0.6)


In [None]:
len(dictionary)

In [None]:
corpus_bow_w_pos_filtered=[dictionary.doc2bow(article) for article in tokenized_articles]

In [None]:
len(corpus_bow_w_pos_filtered)

In [None]:
%%time
ldamodel=models.LdaModel(corpus=corpus_bow_w_pos_filtered,
                        num_topics=NUM_TOPICS,
                        id2word=dictionary,
                        random_state=1,
                        passes=10,
                        eta=0.1,
                        alpha=0.1)

In [None]:
ldamodel.print_topics()

In [None]:
print(ldamodel.eta)
print(ldamodel.alpha)

In [None]:
def show_articles_models(article_idx=10):
    print(dataset[article_idx][:300])
    print("--------------------------\n")
    topics=sorted(ldamodel.get_document_topics(corpus_bow_w_pos_filtered[article_idx]),key=lambda tup:tup[1])[::-1]
    return topics

In [None]:
topics=show_articles_models(10)

In [None]:
for pair in topics:
    print(sorted(ldamodel.show_topic(pair[0]),key=lambda tup:tup[1])[::-1])

In [None]:
topics=show_articles_models(100)

In [None]:
ldamodel.show_topic(topics[2][0])

In [None]:
def get_top_topics(article_idx,min_topic_prob):
    topic_prob_pairs=sorted(ldamodel.get_document_topics(corpus_bow_w_pos_filtered[article_idx],
                                                         minimum_probability=min_topic_prob),
                            key=lambda tup:tup[1])[::-1]
    word_prob_pairs=[ldamodel.show_topic(pair[0]) for pair in topic_prob_pairs]
    
    topic_words=[[pair[0] for pair in collection] for collection in word_prob_pairs]
    data={
        "Major Topics":topic_prob_pairs,
        "Topic Words":topic_words
    }
    return pd.DataFrame(data)
    

In [None]:
pd.set_option("max_colwidth",600)
snippet_length=300
min_topic_prob=0.30
article_idx=1
print(dataset[article_idx][:snippet_length])
get_top_topics(article_idx,min_topic_prob)

In [None]:
article_idx=10
print(dataset[article_idx][:snippet_length])
get_top_topics(article_idx,min_topic_prob)

In [None]:
article_idx=1000
print(dataset[article_idx][:snippet_length])
get_top_topics(article_idx,min_topic_prob)

In [None]:
article_idx=10000
print(dataset[article_idx][:snippet_length])
get_top_topics(article_idx,min_topic_prob)

In [None]:
import pyLDAvis.gensim
pyLDAvis.enable_notebook()
pyLDAvis.gensim.prepare(ldamodel, corpus_bow_w_pos_filtered, dictionary)

In [None]:
import os
os.makedirs("file",exist_ok=True)
def render_word_cloud(model,rows,cols,max_words):
    word_cloud=WordCloud(background_color="white",max_words=max_words,prefer_horizontal=1.0)
    fig,axes=plt.subplots(rows,cols,figsize=(15,15))
    
    for i ,ax in enumerate(axes.flatten()):
        fig.add_subplot(ax)
        topic_words=dict(model.show_topic(i))
        word_cloud.generate_from_frequencies(topic_words)
        plt.gca().imshow(word_cloud,interpolation="bilinear")
        plt.gca().set_title("Topic {id}".format(id=i))
        plt.gca().axis("off")
    plt.axis("off")
    plt.savefig("file/WordCloud.jpg")
    plt.show()
    

In [None]:
render_word_cloud(ldamodel,3,3,10)

In [None]:
lda_index=similarities.MatrixSimilarity(ldamodel[corpus_bow_w_pos_filtered],num_features=len(dictionary))
def get_similar_articles(index, model, article_bow, top_n=5, first_m_words=300):
    

      similar_docs = index[model[article_bow]]
      top_n_docs = sorted(enumerate(similar_docs), key=lambda item: -item[1])[1:top_n+1]
  
      return list(map(lambda entry: (entry[0], entry[1], articles[entry[0]][:first_m_words]), top_n_docs))

In [None]:
%%time
coherence_model_lda = CoherenceModel(model=ldamodel, texts=tokenized_articles, dictionary=dictionary, coherence='u_mass')
coherence_lda = coherence_model_lda.get_coherence()
print('\nCoherence Score: ', coherence_lda)

In [None]:
article_index=0
print(dataset[article_index][:snippet_length],"\n")
get_similar_articles(lda_index,ldamodel,corpus_bow_w_pos_filtered[article_idx])

In [None]:
article_index=100
print(dataset[article_index][:snippet_length],"\n")
get_similar_articles(lda_index,ldamodel,corpus_bow_w_pos_filtered[article_idx])

In [None]:
test_article = "Capricorn Business Acquisitions Inc. (TSXV: CAK.H) (the “Company“)  is pleased to announce that its board has approved the issuance of 70,000 stock options (“Stock Options“) to directors on April 19, 2020."
article_tokens=list(map(basic_filter,[nlp(test_article)]))[0]
article_bow=dictionary.doc2bow(article_tokens)

In [None]:
get_similar_articles(lda_index,ldamodel,article_bow)

In [None]:
test_article = "DEA agent sentenced to 12 years in prison for conspiring with Colombian drug cartel."
article_tokens=list(map(basic_filter,[nlp(test_article)]))[0]
article_bow=dictionary.doc2bow(article_tokens)
get_similar_articles(lda_index,ldamodel,article_bow)

In [None]:
def evaluate_perplexity(corpus_bow_w_pos_filtered, dictionary, num_topics_list):
    perplexities = []
    for num_topics in num_topics_list:
        lda_model = models.LdaModel(corpus=corpus_bow_w_pos_filtered,
                        num_topics=num_topics,
                        id2word=dictionary,
                        random_state=1,
                        passes=10,
                        eta=0.1,
                        alpha=0.1)
        perplexity = lda_model.log_perplexity(corpus_bow_w_pos_filtered)
        perplexities.append(perplexity)
        print(f'Number of topics: {num_topics}, Perplexity: {perplexity}')
    return perplexities

# Example usage
num_topics_list = [5, 10, 15, 20, 25]
perplexities = evaluate_perplexity(corpus_bow_w_pos_filtered, dictionary, num_topics_list)
best_num_topics = num_topics_list[np.argmin(perplexities)]
print(f'Best number of topics based on perplexity: {best_num_topics}')


In [None]:
from gensim.models.coherencemodel import CoherenceModel

def evaluate_topic_models(corpus_bow_w_pos_filtered, dictionary, num_topics_list):
    coherence_scores = []
    for num_topics in num_topics_list:
        lda_model = models.LdaModel(corpus_bow_w_pos_filtered, num_topics=num_topics, id2word=dictionary, passes=15)
        coherence_model = CoherenceModel(model=lda_model, texts=tokenized_articles, dictionary=dictionary, coherence='c_v')
        coherence_score = coherence_model.get_coherence()
        coherence_scores.append(coherence_score)
        print(f'Number of topics: {num_topics}, Coherence Score: {coherence_score}')
    return coherence_scores

num_topics_list = [5, 10, 15, 20, 25]
coherence_scores = evaluate_topic_models(corpus_bow_w_pos_filtered, dictionary, num_topics_list)
best_num_topics = num_topics_list[np.argmax(coherence_scores)]
print(f'Best number of topics: {best_num_topics}')


In [None]:
from gensim.models import HdpModel


hdp_model = HdpModel(corpus_bow_w_pos_filtered, id2word=dictionary)

for topic_id, topic in hdp_model.show_topics(formatted=False):
    print(f"Topic {topic_id}:")
    print(" ".join([word for word, _ in topic]))
    

coherence_model = CoherenceModel(model=hdp_model, texts=tokenized_articles, dictionary=dictionary, coherence='c_v')
coherence_score = coherence_model.get_coherence()
print(f'Coherence Score: {coherence_score}')

In [None]:
def calculate_coherence_score(NUM_TOPICS=20, alpha=0.1, eta=0.1):
    # Train the LDA model
    lda_model = models.LdaModel(
        corpus=corpus_bow_w_pos_filtered,
        num_topics=NUM_TOPICS,
        id2word=dictionary,
        random_state=1,
        passes=10,
        eta=eta,
        alpha=alpha
    )
    
    # Calculate coherence score
    coherence_model_lda = CoherenceModel(
        model=lda_model,
        texts=tokenized_articles,  # Tokenized documents
        dictionary=dictionary,
        coherence='c_v'
    )
    coherence_lda = coherence_model_lda.get_coherence()
    
    return coherence_lda, lda_model

In [None]:
NUM_TOPICS=20

In [None]:
alpha_list = ['symmetric',0.3,0.5,0.7]
beta_list = ['auto',0.3,0.5,0.7]

for alpha in alpha_list:
        for beta in beta_list:
            coherence_score = calculate_coherence_score(NUM_TOPICS, alpha, beta)
            print(f"alpha : {alpha} ; beta : {beta} ; Score : {coherence_score}")

In [None]:
coherence_score,lda_model= calculate_coherence_score(NUM_TOPICS=20, alpha="symmetric", eta=0.7)
print("Coherence score is: ",coherence_score)

In [None]:
def show_articles_models(article_idx=10):
    print(dataset[article_idx][:300])
    print("--------------------------\n")
    topics=sorted(ldamodel.get_document_topics(corpus_bow_w_pos_filtered[article_idx]),key=lambda tup:tup[1])[::-1]
    return topics
topics=show_articles_models()
print(topics)

In [None]:
def get_top_topics(article_idx,min_topic_prob):
    topic_prob_pairs=sorted(ldamodel.get_document_topics(corpus_bow_w_pos_filtered[article_idx],
                                                         minimum_probability=min_topic_prob),
                            key=lambda tup:tup[1])[::-1]
    word_prob_pairs=[ldamodel.show_topic(pair[0]) for pair in topic_prob_pairs]
    
    topic_words=[[pair[0] for pair in collection] for collection in word_prob_pairs]
    data={
        "Major Topics":topic_prob_pairs,
        "Topic Words":topic_words
    }
    return pd.DataFrame(data)

In [None]:
get_top_topics(1000,0.12)

In [None]:
ldamodel.show_topic(topics[2][0])

In [None]:
pyLDAvis.enable_notebook()
pyLDAvis.gensim.prepare(ldamodel, corpus_bow_w_pos_filtered, dictionary)

In [None]:
def render_word_cloud(model,rows,cols,max_words):
    word_cloud=WordCloud(background_color="white",max_words=max_words,prefer_horizontal=1.0)
    fig,axes=plt.subplots(rows,cols,figsize=(15,15))
    
    for i ,ax in enumerate(axes.flatten()):
        fig.add_subplot(ax)
        topic_words=dict(model.show_topic(i))
        word_cloud.generate_from_frequencies(topic_words)
        plt.gca().imshow(word_cloud,interpolation="bilinear")
        plt.gca().set_title("Topic {id}".format(id=i))
        plt.gca().axis("off")
    plt.axis("off")
    plt.savefig("file/WordCloud.jpg")
    plt.show()
    

In [None]:
render_word_cloud(ldamodel,4,4,10)

In [None]:
lda_index=similarities.MatrixSimilarity(ldamodel[corpus_bow_w_pos_filtered],num_features=len(dictionary))

def get_similar_articles(index, model, article_bow, top_n=5, first_m_words=300):
    

      similar_docs = index[model[article_bow]]
      top_n_docs = sorted(enumerate(similar_docs), key=lambda item: -item[1])[1:top_n+1]
  
      return list(map(lambda entry: (entry[0], entry[1], articles[entry[0]][:first_m_words]), top_n_docs))

In [None]:
%%time
coherence_model_lda = CoherenceModel(model=ldamodel, texts=tokenized_articles, dictionary=dictionary, coherence='c_v')
coherence_lda = coherence_model_lda.get_coherence()
print('\nCoherence Score: ', coherence_lda)


In [None]:
%%time
coherence_model_lda = CoherenceModel(model=ldamodel, texts=tokenized_articles, dictionary=dictionary, coherence='u_mass')
coherence_lda = coherence_model_lda.get_coherence()
print('\nCoherence Score: ', coherence_lda)

In [None]:
article_index=0
print(dataset[article_index][:snippet_length],"\n")
get_similar_articles(lda_index,ldamodel,corpus_bow_w_pos_filtered[article_idx])

In [None]:
test_article = "DEA agent sentenced to 12 years in prison for conspiring with Colombian drug cartel."
article_tokens=list(map(basic_filter,[nlp(test_article)]))[0]
article_bow=dictionary.doc2bow(article_tokens)
get_similar_articles(lda_index,ldamodel,article_bow)

In [None]:
lda_model.save('/kaggle/working/file/lda_model.gensim')

# **Second Datasets for Topic Modelling**


In [None]:
df_articles=pd.read_csv('/kaggle/input/topic-modeling-for-research-articles/test.csv')
df_articles

In [None]:
df_articles.value_counts()

In [None]:
df_articles.isna().sum()

In [None]:
df_articles_create=df_articles.drop(columns=["TITLE","ID"])

In [None]:
df_articles_create.tail(2)

In [None]:
research_articles=list(df_articles_create["ABSTRACT"])

In [None]:
research_articles[:10]

In [None]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS

# Your custom stop words
custom_stop_words = set(STOP_WORDS)

In [None]:
def basic_filter(tokenized_doc):
    return [t.lemma_ for t in tokenized_doc if t.is_alpha and not t.is_punct and not t.is_space and t not in custom_stop_words and t.pos_ in ["NOUN","VERB","ADJ"]]

In [None]:
tokenized_research_articles=list(map(basic_filter,nlp.pipe(research_articles,n_process=4)))


In [None]:
%%time
dictionary_research_artcles=corpora.Dictionary(tokenized_research_articles)

In [None]:
sample_token_research="science"
print(f"Id for \'{sample_token_research}\' token: {dictionary_research_artcles.token2id[sample_token_research]}")

In [None]:
%%time
corpus_bow_research_articles=[dictionary_research_artcles.doc2bow(article) for article in tokenized_research_articles]

In [None]:
print(len(corpus_bow_research_articles))

In [None]:
dictionary_research_artcles.filter_extremes(no_below=5,no_above=0.6)
corpus_bow_research_articles_filtered=[dictionary_research_artcles.doc2bow(article) for article in tokenized_research_articles]

In [None]:
def evaluate_perplexity(corpus_bow_w_pos_filtered, dictionary, num_topics_list):
    perplexities = []
    for num_topics in num_topics_list:
        lda_model = models.LdaModel(corpus=corpus_bow_w_pos_filtered,
                        num_topics=num_topics,
                        id2word=dictionary,
                        random_state=1,
                        passes=10,
                        eta=0.1,
                        alpha=0.1)
        perplexity = lda_model.log_perplexity(corpus_bow_w_pos_filtered)
        perplexities.append(perplexity)
        print(f'Number of topics: {num_topics}, Perplexity: {perplexity}')
    return perplexities

# Example usage
num_topics_list = [3, 6, 9, 12, 15]
perplexities = evaluate_perplexity(corpus_bow_research_articles_filtered, dictionary_research_artcles, num_topics_list)
best_num_topics = num_topics_list[np.argmin(perplexities)]
print(f'Best number of topics based on perplexity: {best_num_topics}')

In [None]:
from gensim.models.coherencemodel import CoherenceModel

def evaluate_topic_models(corpus_bow_w_pos_filtered, dictionary, num_topics_list):
    coherence_scores = []
    for num_topics in num_topics_list:
        lda_model = models.LdaModel(corpus_bow_w_pos_filtered, num_topics=num_topics, id2word=dictionary, passes=15)
        coherence_model = CoherenceModel(model=lda_model, texts=tokenized_research_articles, dictionary=dictionary, coherence='c_v')
        coherence_score = coherence_model.get_coherence()
        coherence_scores.append(coherence_score)
        print(f'Number of topics: {num_topics}, Coherence Score: {coherence_score}')
    return coherence_scores

num_topics_list = [3, 6, 9, 12, 15]
coherence_scores = evaluate_topic_models(corpus_bow_research_articles_filtered, dictionary_research_artcles, num_topics_list)
best_num_topics = num_topics_list[np.argmax(coherence_scores)]
print(f'Best number of topics: {best_num_topics}')


In [None]:
num_topics_list = [30, 27, 18, 21, 24]
coherence_scores = evaluate_topic_models(corpus_bow_research_articles_filtered, dictionary_research_artcles, num_topics_list)
best_num_topics = num_topics_list[np.argmax(coherence_scores)]
print(f'Best number of topics: {best_num_topics}')

In [None]:
from gensim.models import HdpModel


hdp_model = HdpModel(corpus_bow_research_articles_filtered, id2word=dictionary_research_artcles)

for topic_id, topic in hdp_model.show_topics(formatted=False):
    print(f"Topic {topic_id}:")
    print(" ".join([word for word, _ in topic]))
    

coherence_model = CoherenceModel(model=hdp_model, texts=tokenized_research_articles, dictionary=dictionary_research_artcles, coherence='c_v')
coherence_score = coherence_model.get_coherence()
print(f'Coherence Score: {coherence_score}')

In [None]:
import pyLDAvis.gensim
pyLDAvis.enable_notebook()
pyLDAvis.gensim.prepare(hdp_model, corpus_bow_research_articles_filtered, dictionary_research_artcles)

In [None]:
def calculate_coherence_score(NUM_TOPICS=6, alpha=0.1, eta=0.1):
    # Train the LDA model
    lda_model = models.LdaModel(
        corpus=corpus_bow_research_articles_filtered,
        num_topics=NUM_TOPICS,
        id2word=dictionary_research_artcles,
        random_state=1,
        passes=10,
        eta=eta,
        alpha=alpha
    )
    
    # Calculate coherence score
    coherence_model_lda = CoherenceModel(
        model=lda_model,
        texts=tokenized_research_articles,  # Tokenized documents
        dictionary=dictionary_research_artcles,
        coherence='c_v'
    )
    coherence_lda = coherence_model_lda.get_coherence()
    
    return coherence_lda, lda_model


In [None]:
NUM_TOPICS=6

In [None]:
alpha_list = ['symmetric',0.3,0.5,0.7]
beta_list = ['auto',0.3,0.5,0.7]

for alpha in alpha_list:
        for beta in beta_list:
            coherence_score,lda_model = calculate_coherence_score(NUM_TOPICS, alpha, beta)
            print(f"alpha : {alpha} ; beta : {beta} ; Score : {coherence_score}")

In [None]:
coherence_score,lda_model= calculate_coherence_score(NUM_TOPICS=6, alpha=0.7, eta=0.7)
print("Coherence score is: ",coherence_score)

In [None]:
import pyLDAvis.gensim
pyLDAvis.enable_notebook()
pyLDAvis.gensim.prepare(lda_model, corpus_bow_research_articles_filtered, dictionary_research_artcles)

In [None]:
def show_articles_models(article_idx=10):
    print(research_articles[article_idx][:300])
    print("--------------------------\n")
    topics=sorted(lda_model.get_document_topics(corpus_bow_research_articles_filtered[article_idx]),key=lambda tup:tup[1])[::-1]
    return topics
topics=show_articles_models()

In [None]:
for pair in topics:
    print(sorted(lda_model.show_topic(pair[0]),key=lambda tup:tup[1])[::-1])

In [None]:
import os
os.makedirs("file",exist_ok=True)
def render_word_cloud(model,rows,cols,max_words):
    word_cloud=WordCloud(background_color="white",max_words=max_words,prefer_horizontal=1.0)
    fig,axes=plt.subplots(rows,cols,figsize=(15,15))
    
    for i ,ax in enumerate(axes.flatten()):
        fig.add_subplot(ax)
        topic_words=dict(model.show_topic(i))
        word_cloud.generate_from_frequencies(topic_words)
        plt.gca().imshow(word_cloud,interpolation="bilinear")
        plt.gca().set_title("Topic {id}".format(id=i))
        plt.gca().axis("off")
    plt.axis("off")
    plt.savefig("file/WordCloud.jpg")
    plt.show()

In [None]:
def get_top_topics(article_idx,min_topic_prob=0.25):
    topic_prob_pairs=sorted(lda_model.get_document_topics(corpus_bow_research_articles_filtered[article_idx],
                                                         minimum_probability=min_topic_prob),
                            key=lambda tup:tup[1])[::-1]
    word_prob_pairs=[lda_model.show_topic(pair[0]) for pair in topic_prob_pairs]
    
    topic_words=[[pair[0] for pair in collection] for collection in word_prob_pairs]
    data={
        "Major Topics":topic_prob_pairs,
        "Topic Words":topic_words
    }
    return pd.DataFrame(data)

In [None]:
pd.set_option("max_colwidth",600)
snippet_length=300
min_topic_prob=0.30
article_idx=1
print(research_articles[article_idx][:snippet_length])
get_top_topics(article_idx,min_topic_prob)

In [None]:
article_idx=78
print(research_articles[article_idx][:snippet_length])
get_top_topics(article_idx,min_topic_prob)

In [None]:
render_word_cloud(lda_model,2,2,10)

In [None]:
lda_index=similarities.MatrixSimilarity(lda_model[corpus_bow_research_articles_filtered],num_features=len(dictionary_research_artcles))
def get_similar_articles(index, model, article_bow, top_n=5, first_m_words=300):
    

      similar_docs = index[model[article_bow]]
      top_n_docs = sorted(enumerate(similar_docs), key=lambda item: -item[1])[1:top_n+1]
  
      return list(map(lambda entry: (entry[0], entry[1], research_articles[entry[0]][:first_m_words]), top_n_docs))

In [None]:
lda_index

In [None]:
article_index=0
print(research_articles[article_index][:snippet_length],"\n")
get_similar_articles(lda_index,lda_model,corpus_bow_research_articles_filtered[article_idx])

In [None]:
test_article = "We study the proportional chore division problem where a protocol wants to\ndivide an undesirable object, called chore, among $n$ different players. The\ngoal is to find an allocation such that the cost of the chore assigned to each\nplayer be at most $1/n$ of the total cost. This problem is the dual"
article_tokens=list(map(basic_filter,[nlp(test_article)]))[0]
article_bow=dictionary_research_artcles.doc2bow(article_tokens)
get_similar_articles(lda_index,lda_model,article_bow)
# list(map(basic_filter,nlp.pipe(research_articles,n_process=4)))

In [None]:
lda_model.save('/kaggle/working/file/lda_model_research_articles.gensim')

In [None]:
num_topics = 6

for i in range(num_topics):
    plt.figure()
    # Extracting the words and their weights for the ith topic
    topic_words = {word: value for word, value in lda_model.show_topic(i, topn=50)}
    
    # Generating the word cloud
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(topic_words)
    
    # Display the word cloud using matplotlib
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.title(f' \n\nTopic #{i+1}')
    plt.show()