# T-SNE Interactive Visualization

In [1]:
from gensim.corpora import Dictionary, MmCorpus
from gensim.models.ldamodel import LdaModel

In [2]:
tweets_dict = Dictionary.load('dictionary_tweets.dict')
tweets_corpus = MmCorpus('corpus_tweets.mm')
tweets_lda = LdaModel.load('lda_tweet.model')

In [3]:
tweets_lda.show_topics()

[(0,
  '0.018*"year" + 0.014*"talk" + 0.013*"thei" + 0.012*"wa" + 0.010*"care" + 0.009*"love" + 0.009*"like" + 0.009*"work" + 0.009*"chang" + 0.008*"great"'),
 (1,
  '0.018*"trump" + 0.014*"hi" + 0.010*"know" + 0.009*"nation" + 0.008*"question" + 0.008*"sai" + 0.008*"white" + 0.008*"ha" + 0.008*"dai" + 0.007*"onli"'),
 (2,
  '0.016*"ai" + 0.014*"data" + 0.014*"releas" + 0.012*"spaci" + 0.011*"python" + 0.011*"open" + 0.010*"model" + 0.010*"like" + 0.010*"nlp" + 0.009*"wa"')]

In [14]:
from collections import OrderedDict
import numpy as np

lda_corpus = tweets_lda[tweets_corpus]
num_topics = 3

def get_doc_topic_dist(model, corpus, kwords=False):
    
    '''
    LDA transformation, for each doc only returns topics with non-zero weight
    This function makes a matrix transformation of docs in the topic space.
    '''
    top_dist =[]
    keys = []

    for d in corpus:
        tmp = {i:0 for i in range(num_topics)}
        tmp.update(dict(model[d]))
        vals = list(OrderedDict(tmp).values())
        top_dist += [np.array(vals)]
        if kwords:
            keys += [np.array(vals).argmax()]

    return np.array(top_dist), keys

top_dist, lda_keys= get_doc_topic_dist(tweets_lda, tweets_corpus, True)

In [15]:
from sklearn.manifold import TSNE

tsne_model = TSNE(n_components=2, verbose=1, random_state=0, angle=.99, init='pca')

tsne_lda = tsne_model.fit_transform(top_dist)

[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 788 samples in 0.001s...
[t-SNE] Computed neighbors for 788 samples in 0.009s...
[t-SNE] Computed conditional probabilities for sample 788 / 788
[t-SNE] Mean sigma: 0.000000
[t-SNE] KL divergence after 250 iterations with early exaggeration: 46.890472
[t-SNE] Error after 1000 iterations: 0.193966


In [None]:
import bokeh.plotting as bp
from bokeh.plotting import save
from bokeh.models import HoverTool

colormap = {0: 'blue', 1:'green', 2: 'yellow'}