### Thesis ADS: Rosa Lucassen

# Loading the data

In [1]:
import pickle
import pandas as pd
import numpy as np
from numpy import array

import spacy
import stop_words
nlp = spacy.load("en_core_web_sm")
nlp.vocab["covid"].is_stop = True
nlp.vocab["corona"].is_stop = True
nlp.vocab["covid19"].is_stop = True
nlp.vocab["covid-19"].is_stop = True
nlp.vocab["coronavirus"].is_stop = True

import re
import emoji
from collections import Counter
from scipy.stats import chi2_contingency
from urllib.parse import urlparse
from nltk.stem.porter import PorterStemmer

import gensim
from gensim.models import Phrases
from gensim.corpora import Dictionary
from gensim.models.wrappers import LdaMallet
from gensim.models.coherencemodel import CoherenceModel
from gensim.models.ldamodel import LdaModel
from gensim.corpora.dictionary import Dictionary

import logging
import pyLDAvis.gensim
import json

import warnings
warnings.filterwarnings('ignore')  # To ignore all warnings that arise here to enhance clarity
stemmer = PorterStemmer()

# Function: Cleaning data

In [2]:
def remove_single_char(text):
    single_char_pattern = r'\s+[a-zA-Z]\s+'
    without_sc = re.sub(pattern=single_char_pattern, repl=" ", string=text)
    return without_sc

def remove_numbers(text):
    number_pattern = r'\b(?<![0-9-])(\d+)(?![0-9-])'
    without_number = re.sub(pattern=number_pattern, repl=" ", string=text)
    return without_number


def remove_urls(text):
    url_pattern = r'https?://\S+|www\.\S+'
    without_urls = re.sub(pattern=url_pattern, repl=" ", string=text)
    return without_urls

def remove_nonalpha(text):
    nonalpha_pattern = "[+$@#?~]"
    without_nonalpha = re.sub(pattern=nonalpha_pattern, repl=" ", string=text)
    return without_nonalpha
    
def remove_rt(text):
    rt_pattern = 'rt @[\w_]+: '
    without_pattern = re.sub(pattern = rt_pattern, repl=" ", string = text)
    return without_pattern 

def remove_extra_spaces(text):
    space_pattern = r'\s+'
    without_space = re.sub(pattern=space_pattern, repl=" ", string=text)
    return without_space

def strip_list_noempty(mylist):
    newlist = (item.strip() if hasattr(item, 'strip') else item for item in mylist)
    return [item for item in newlist if item != '']

# Function: BiGrams

In [3]:
from gensim.models import Phrases
def make_n_grams(docs):
    # Add bigrams and trigrams to docs (only ones that appear 20 times or more).
    bigram = Phrases(docs, min_count=1)
    for idx in range(len(docs)):
        for token in bigram[docs[idx]]:
            if '_' in token:
                # Token is a bigram, add to document.
                docs[idx].append(token)
    return docs

# Function: Lemmatizing texts

In [4]:
def pre_processer(corpus):
    corpus = [remove_urls(text) for text in corpus]
    corpus = [remove_single_char(text) for text in corpus]
    corpus = [remove_numbers(text) for text in corpus]
    corpus = [remove_rt(text) for text in corpus]
    corpus = [remove_nonalpha(text) for text in corpus]
    corpus = [text.encode("ascii", "ignore") for text in corpus]
    corpus = [text.decode() for text in corpus]
    
    my_doc = [nlp(text) for text in corpus]
    
    token_list = []
    for token in my_doc:
        token_list.append(token.text)

    # Create list of word tokens after removing stopwords
    filtered_posts = []
    for post in token_list:
        words = post.split()
        post_new = ""
        for word in words:
            if nlp.vocab[word].is_stop == False:
                post_new += " " + word
        filtered_posts.append(post_new)
    filtered_posts = [remove_extra_spaces(text) for text in filtered_posts]
    filtered_posts = strip_list_noempty(filtered_posts)
    processed_texts = [text for text in nlp.pipe(filtered_posts, 
                                              disable=["ner",
                                                       "parser"])]
    tokenized_texts = [[word.lemma_ for word in lemma_ if not word.is_punct if not word.is_stop] 
                            for lemma_ in processed_texts]
    tokenized_texts = make_n_grams(tokenized_texts)
    
    return filtered_posts, tokenized_texts

# Function: Topic modelling LDA Gensim

In [5]:
# Shows all words instead of just the 10 most popular words
def topic_modelling_results(tokenized_text, n_topics, n_iterations):
    results = []

    dictionary = Dictionary(tokenized_text) # get the vocabulary
    corpus = [dictionary.doc2bow(text) for text in tokenized_text]

    PATH_TO_MALLET = '/Users/rosalucassen/Documents/UU/DataMining/mallet-2.0.8/bin/mallet'
    N_TOPICS = n_topics
    N_ITERATIONS = n_iterations

    lda = LdaMallet(PATH_TO_MALLET,
                    corpus=corpus,
                    id2word=dictionary,
                    num_topics=N_TOPICS,
                    iterations=N_ITERATIONS)
    
    for topic in range(N_TOPICS):
        words = lda.show_topic(topic)
        topic_n_words = ' '.join([word[0] for word in words])
        result = {'Topic':str(topic), 
                    'Words':topic_n_words}
        results.append(result)
    results_DF = pd.DataFrame(results)
    pd.set_option('display.max_colwidth', None)
    return results_DF, lda, dictionary, corpus

# Function: Compute Coherence Scores

In [7]:
def compute_coherence_values(texts, start, step, limit, topn):
    dictionary = Dictionary(texts) # get the vocabulary
    dictionary.filter_extremes(no_below=5, no_above=0.5)
    corpus = [dictionary.doc2bow(text) for text in texts]

    PATH_TO_MALLET = '/Users/rosalucassen/Documents/UU/DataMining/mallet-2.0.8/bin/mallet'
    
    coherence_values = []
    model_list = []
    for num_topics in range(start, limit, step):
        model = gensim.models.wrappers.LdaMallet(PATH_TO_MALLET, corpus=corpus, num_topics=num_topics, id2word=dictionary, alpha=0.1, iterations = 400)
        model_list.append(model)
        coherencemodel = CoherenceModel(model=model, texts=texts, dictionary=dictionary, coherence='c_v', topn=topn)
        coherence_values.append(coherencemodel.get_coherence())

    return model_list, coherence_values

## Exploring the data

In [8]:
d_test = pd.read_excel("/Users/rosalucassen/Desktop/Thesis/Code/Data/Constraint_English_Test2.xlsx", engine='openpyxl')
d_train = pd.read_excel("/Users/rosalucassen/Desktop/Thesis/Code/Data/Constraint_English_Train.xlsx", engine='openpyxl')
d_val = pd.read_excel("/Users/rosalucassen/Desktop/Thesis/Code/Data/Constraint_English_Val.xlsx", engine='openpyxl')
d_test_labeled = pd.read_excel("/Users/rosalucassen/Desktop/Thesis/Code/Data/english_test_with_labels.xlsx", engine='openpyxl')

#### Combine data

In [9]:
data_train = pd.concat([d_train, d_test_labeled, d_val])
data_train.head()

Unnamed: 0,id,tweet,label
0,1,The CDC currently reports 99031 deaths. In gen...,real
1,2,States reported 1121 deaths a small rise from ...,real
2,3,Politically Correct Woman (Almost) Uses Pandem...,fake
3,4,#IndiaFightsCorona: We have 1524 #COVID testin...,real
4,5,Populous states can generate large case counts...,real


In [9]:
data_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10700 entries, 0 to 2139
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      10700 non-null  int64 
 1   tweet   10700 non-null  object
 2   label   10700 non-null  object
dtypes: int64(1), object(2)
memory usage: 334.4+ KB


In [10]:
grouped_train = data_train.groupby('label').count() 
grouped_train.head()

Unnamed: 0_level_0,id,tweet
label,Unnamed: 1_level_1,Unnamed: 2_level_1
fake,5100,5100
real,5600,5600


# Pre-processing emotion detection data

In [10]:
corpus_fake = data_train[data_train.label == "fake"]["tweet"]
corpus_real = data_train[data_train.label == "real"]["tweet"]
#corpus = data_train["tweet"]
corpus_fake, tokenized_texts_fake = pre_processer(corpus_fake)
print(corpus_fake)
corpus_real, tokenized_texts_real = pre_processer(corpus_real)
print(corpus_real)
#corpus = pre_processer_fake(corpus)





In [11]:
count = 0
for tweet in corpus_fake:
    if (sum(len(i) for i in tweet) < 200):
        count += 1
print(count)

4843


In [12]:
count = 0
for tweet in corpus_real:
    if (sum(len(i) for i in tweet) < 200):
        count += 1
print(count)

5067


## Results: Topic modelling LDA Gensim

#### Fake news

In [11]:
topics, lda_fake, dictionary, corpus_f = topic_modelling_results(tokenized_texts_fake, 8, n_iterations = 1000)
topics

Unnamed: 0,Topic,Words
0,0,claim video show post time China Facebook man share lockdown
1,1,people government home NEWS Gates number Bill lockdown Bill_Gates stay
2,2,pandemic Coronavirus covid-19 health medium social donaldtrump fact check news
3,3,virus mask spread wear day work amp stop message covid
4,4,COVID-19 die patient doctor hospital covid-19 Italy year treat image
5,5,vaccine death COVID19 COVID world start treatment call find good
6,6,COVID-19 test cure positive kill Covid-19 rate water flu test_positive
7,7,case Trump India President country state Donald report infection day


#### Real news

In [13]:
topics_real, lda_real, dictionary, corpus_r = topic_modelling_results(tokenized_texts_real, 27, n_iterations = 1000)
topics_real

Unnamed: 0,Topic,Words
0,0,case time April pm death state continue discharge confirm March
1,1,test positive testing result negative state big antibody cumulative positive_rate
2,2,RT PIB_India drharshvardhan RT_PIB_India issue PMOIndia Health RT_drharshvardhan AshwiniKChoubey ddnewslive
3,3,number test total complete yesterday bring date laboratory test_complete total_number
4,4,case total active active_case confirm_case Health confirm COVID-19 number total_number
5,5,people COVID-19 hospital person infection care today group North ICU
6,6,case death confirm COVID19Nigeria discharge case_COVID19Nigeria discharged discharged_death discharge_death Akwa
7,7,high recovery rate India low covid19 record day indiafightscorona Rate
8,8,case confirm COVID19 confirm_case States total state COVID19_Nigeria Total Nigeria
9,9,amp lab COVID19 Nigeria covid19 capacity support Lagos State TakeResponsibility


## Document topic distribution

###### Loop over documents to get topic distribution: Fake

In [12]:
transformed_docs = lda_fake.load_document_topics()

In [13]:
for i, document in enumerate(transformed_docs):
    print('Topic distributions for document {}'.format(i))
    for topic in document:
        print(topic)

Topic distributions for document 0
(0, 0.13749999999999998)
(1, 0.1208333333333333)
(2, 0.15416666666666665)
(3, 0.10416666666666664)
(4, 0.13749999999999998)
(5, 0.1208333333333333)
(6, 0.1208333333333333)
(7, 0.10416666666666664)
Topic distributions for document 1
(0, 0.10775862068965517)
(1, 0.125)
(2, 0.14224137931034483)
(3, 0.15948275862068967)
(4, 0.125)
(5, 0.125)
(6, 0.10775862068965517)
(7, 0.10775862068965517)
Topic distributions for document 2
(0, 0.12083333333333335)
(1, 0.13750000000000004)
(2, 0.13750000000000004)
(3, 0.10416666666666669)
(4, 0.10416666666666669)
(5, 0.18750000000000003)
(6, 0.10416666666666669)
(7, 0.10416666666666669)
Topic distributions for document 3
(0, 0.11693548387096774)
(1, 0.11693548387096774)
(2, 0.10080645161290322)
(3, 0.11693548387096774)
(4, 0.14919354838709678)
(5, 0.16532258064516128)
(6, 0.10080645161290322)
(7, 0.13306451612903225)
Topic distributions for document 4
(0, 0.125)
(1, 0.0946969696969697)
(2, 0.125)
(3, 0.10984848484848485)

(2, 0.11885245901639344)
(3, 0.10245901639344263)
(4, 0.11885245901639344)
(5, 0.10245901639344263)
(6, 0.15163934426229508)
(7, 0.20081967213114754)
Topic distributions for document 212
(0, 0.13942307692307693)
(1, 0.1201923076923077)
(2, 0.1201923076923077)
(3, 0.1201923076923077)
(4, 0.1201923076923077)
(5, 0.13942307692307693)
(6, 0.1201923076923077)
(7, 0.1201923076923077)
Topic distributions for document 213
(0, 0.15416666666666667)
(1, 0.12083333333333333)
(2, 0.10416666666666667)
(3, 0.10416666666666667)
(4, 0.10416666666666667)
(5, 0.1375)
(6, 0.10416666666666667)
(7, 0.17083333333333334)
Topic distributions for document 214
(0, 0.1267123287671233)
(1, 0.1267123287671233)
(2, 0.09931506849315068)
(3, 0.1404109589041096)
(4, 0.11301369863013698)
(5, 0.09931506849315068)
(6, 0.08561643835616438)
(7, 0.2089041095890411)
Topic distributions for document 215
(0, 0.11885245901639346)
(1, 0.16803278688524592)
(2, 0.1352459016393443)
(3, 0.11885245901639346)
(4, 0.11885245901639346)
(

Topic distributions for document 378
(0, 0.09765625)
(1, 0.11328125)
(2, 0.16015625)
(3, 0.20703125)
(4, 0.11328125)
(5, 0.11328125)
(6, 0.09765625)
(7, 0.09765625)
Topic distributions for document 379
(0, 0.11328125)
(1, 0.12890625)
(2, 0.09765625)
(3, 0.09765625)
(4, 0.16015625)
(5, 0.09765625)
(6, 0.11328125)
(7, 0.19140625)
Topic distributions for document 380
(0, 0.10593220338983052)
(1, 0.17372881355932207)
(2, 0.12288135593220341)
(3, 0.10593220338983052)
(4, 0.10593220338983052)
(5, 0.12288135593220341)
(6, 0.12288135593220341)
(7, 0.1398305084745763)
Topic distributions for document 381
(0, 0.14473684210526316)
(1, 0.12719298245614036)
(2, 0.12719298245614036)
(3, 0.10964912280701754)
(4, 0.10964912280701754)
(5, 0.12719298245614036)
(6, 0.14473684210526316)
(7, 0.10964912280701754)
Topic distributions for document 382
(0, 0.1059322033898305)
(1, 0.17372881355932204)
(2, 0.13983050847457626)
(3, 0.1228813559322034)
(4, 0.1228813559322034)
(5, 0.1059322033898305)
(6, 0.12288135

(0, 0.125)
(1, 0.10775862068965517)
(2, 0.125)
(3, 0.10775862068965517)
(4, 0.14224137931034483)
(5, 0.125)
(6, 0.14224137931034483)
(7, 0.125)
Topic distributions for document 601
(0, 0.09920634920634921)
(1, 0.14682539682539686)
(2, 0.09920634920634921)
(3, 0.09920634920634921)
(4, 0.130952380952381)
(5, 0.19444444444444448)
(6, 0.09920634920634921)
(7, 0.130952380952381)
Topic distributions for document 602
(0, 0.09191176470588237)
(1, 0.18014705882352944)
(2, 0.16544117647058826)
(3, 0.09191176470588237)
(4, 0.10661764705882354)
(5, 0.1360294117647059)
(6, 0.12132352941176472)
(7, 0.10661764705882354)
Topic distributions for document 603
(0, 0.10775862068965517)
(1, 0.15948275862068967)
(2, 0.125)
(3, 0.10775862068965517)
(4, 0.10775862068965517)
(5, 0.14224137931034483)
(6, 0.10775862068965517)
(7, 0.14224137931034483)
Topic distributions for document 604
(0, 0.10080645161290322)
(1, 0.1814516129032258)
(2, 0.1975806451612903)
(3, 0.10080645161290322)
(4, 0.11693548387096774)
(5, 

(2, 0.14436619718309862)
(3, 0.10211267605633806)
(4, 0.21478873239436624)
(5, 0.08802816901408451)
(6, 0.1302816901408451)
(7, 0.11619718309859157)
Topic distributions for document 823
(0, 0.11153846153846152)
(1, 0.09615384615384613)
(2, 0.15769230769230766)
(3, 0.11153846153846152)
(4, 0.09615384615384613)
(5, 0.14230769230769227)
(6, 0.11153846153846152)
(7, 0.17307692307692304)
Topic distributions for document 824
(0, 0.11328125)
(1, 0.14453125)
(2, 0.19140625)
(3, 0.09765625)
(4, 0.09765625)
(5, 0.09765625)
(6, 0.11328125)
(7, 0.14453125)
Topic distributions for document 825
(0, 0.113013698630137)
(1, 0.0993150684931507)
(2, 0.113013698630137)
(3, 0.19520547945205483)
(4, 0.15410958904109592)
(5, 0.0993150684931507)
(6, 0.12671232876712332)
(7, 0.0993150684931507)
Topic distributions for document 826
(0, 0.09191176470588237)
(1, 0.1360294117647059)
(2, 0.09191176470588237)
(3, 0.09191176470588237)
(4, 0.1360294117647059)
(5, 0.19485294117647062)
(6, 0.1360294117647059)
(7, 0.1213

(3, 0.19202898550724637)
(4, 0.09057971014492754)
(5, 0.13405797101449277)
(6, 0.10507246376811594)
(7, 0.09057971014492754)
Topic distributions for document 1045
(0, 0.1342592592592593)
(1, 0.11574074074074076)
(2, 0.11574074074074076)
(3, 0.11574074074074076)
(4, 0.11574074074074076)
(5, 0.11574074074074076)
(6, 0.17129629629629634)
(7, 0.11574074074074076)
Topic distributions for document 1046
(0, 0.10245901639344263)
(1, 0.15163934426229508)
(2, 0.15163934426229508)
(3, 0.11885245901639344)
(4, 0.11885245901639344)
(5, 0.15163934426229508)
(6, 0.10245901639344263)
(7, 0.10245901639344263)
Topic distributions for document 1047
(0, 0.10964912280701751)
(1, 0.12719298245614033)
(2, 0.16228070175438594)
(3, 0.10964912280701751)
(4, 0.12719298245614033)
(5, 0.12719298245614033)
(6, 0.10964912280701751)
(7, 0.12719298245614033)
Topic distributions for document 1048
(0, 0.13306451612903225)
(1, 0.10080645161290322)
(2, 0.13306451612903225)
(3, 0.13306451612903225)
(4, 0.10080645161290322)

(0, 0.16228070175438597)
(1, 0.10964912280701754)
(2, 0.12719298245614036)
(3, 0.12719298245614036)
(4, 0.14473684210526316)
(5, 0.10964912280701754)
(6, 0.10964912280701754)
(7, 0.10964912280701754)
Topic distributions for document 1212
(0, 0.12692307692307692)
(1, 0.20384615384615384)
(2, 0.11153846153846154)
(3, 0.11153846153846154)
(4, 0.11153846153846154)
(5, 0.12692307692307692)
(6, 0.09615384615384616)
(7, 0.11153846153846154)
Topic distributions for document 1213
(0, 0.1294642857142857)
(1, 0.1651785714285714)
(2, 0.11160714285714284)
(3, 0.11160714285714284)
(4, 0.11160714285714284)
(5, 0.1294642857142857)
(6, 0.11160714285714284)
(7, 0.1294642857142857)
Topic distributions for document 1214
(0, 0.1398305084745763)
(1, 0.12288135593220342)
(2, 0.12288135593220342)
(3, 0.12288135593220342)
(4, 0.10593220338983053)
(5, 0.17372881355932207)
(6, 0.10593220338983053)
(7, 0.10593220338983053)
Topic distributions for document 1215
(0, 0.12288135593220341)
(1, 0.15677966101694918)
(2,

(5, 0.09269662921348314)
(6, 0.09269662921348314)
(7, 0.1039325842696629)
Topic distributions for document 1430
(0, 0.09666666666666666)
(1, 0.16333333333333333)
(2, 0.08333333333333333)
(3, 0.17666666666666667)
(4, 0.09666666666666666)
(5, 0.11)
(6, 0.09666666666666666)
(7, 0.17666666666666667)
Topic distributions for document 1431
(0, 0.12946428571428573)
(1, 0.11160714285714286)
(2, 0.11160714285714286)
(3, 0.11160714285714286)
(4, 0.11160714285714286)
(5, 0.11160714285714286)
(6, 0.16517857142857142)
(7, 0.14732142857142858)
Topic distributions for document 1432
(0, 0.09765625)
(1, 0.09765625)
(2, 0.14453125)
(3, 0.11328125)
(4, 0.12890625)
(5, 0.09765625)
(6, 0.12890625)
(7, 0.19140625)
Topic distributions for document 1433
(0, 0.12719298245614036)
(1, 0.10964912280701754)
(2, 0.16228070175438597)
(3, 0.14473684210526316)
(4, 0.12719298245614036)
(5, 0.10964912280701754)
(6, 0.10964912280701754)
(7, 0.10964912280701754)
Topic distributions for document 1434
(0, 0.13942307692307693

(7, 0.08928571428571429)
Topic distributions for document 1600
(0, 0.07621951219512195)
(1, 0.10060975609756098)
(2, 0.07621951219512195)
(3, 0.07621951219512195)
(4, 0.07621951219512195)
(5, 0.10060975609756098)
(6, 0.11280487804878049)
(7, 0.38109756097560976)
Topic distributions for document 1601
(0, 0.10245901639344264)
(1, 0.1352459016393443)
(2, 0.10245901639344264)
(3, 0.1516393442622951)
(4, 0.11885245901639345)
(5, 0.11885245901639345)
(6, 0.1516393442622951)
(7, 0.11885245901639345)
Topic distributions for document 1602
(0, 0.0992063492063492)
(1, 0.11507936507936507)
(2, 0.13095238095238096)
(3, 0.14682539682539683)
(4, 0.13095238095238096)
(5, 0.0992063492063492)
(6, 0.0992063492063492)
(7, 0.17857142857142858)
Topic distributions for document 1603
(0, 0.11693548387096774)
(1, 0.10080645161290322)
(2, 0.11693548387096774)
(3, 0.10080645161290322)
(4, 0.24596774193548387)
(5, 0.10080645161290322)
(6, 0.10080645161290322)
(7, 0.11693548387096774)
Topic distributions for docum

Topic distributions for document 1822
(0, 0.08680555555555555)
(1, 0.11458333333333333)
(2, 0.08680555555555555)
(3, 0.1701388888888889)
(4, 0.08680555555555555)
(5, 0.21180555555555555)
(6, 0.1284722222222222)
(7, 0.11458333333333333)
Topic distributions for document 1823
(0, 0.125)
(1, 0.0946969696969697)
(2, 0.0946969696969697)
(3, 0.0946969696969697)
(4, 0.0946969696969697)
(5, 0.2159090909090909)
(6, 0.125)
(7, 0.1553030303030303)
Topic distributions for document 1824
(0, 0.1375)
(1, 0.10416666666666667)
(2, 0.12083333333333333)
(3, 0.15416666666666667)
(4, 0.1375)
(5, 0.10416666666666667)
(6, 0.12083333333333333)
(7, 0.12083333333333333)
Topic distributions for document 1825
(0, 0.11574074074074076)
(1, 0.11574074074074076)
(2, 0.11574074074074076)
(3, 0.11574074074074076)
(4, 0.18981481481481485)
(5, 0.11574074074074076)
(6, 0.11574074074074076)
(7, 0.11574074074074076)
Topic distributions for document 1826
(0, 0.10775862068965517)
(1, 0.125)
(2, 0.10775862068965517)
(3, 0.14224

(1, 0.125)
(2, 0.125)
(3, 0.125)
(4, 0.125)
(5, 0.14224137931034483)
(6, 0.10775862068965517)
(7, 0.125)
Topic distributions for document 2045
(0, 0.13749999999999998)
(1, 0.1208333333333333)
(2, 0.13749999999999998)
(3, 0.1208333333333333)
(4, 0.13749999999999998)
(5, 0.1208333333333333)
(6, 0.1208333333333333)
(7, 0.10416666666666664)
Topic distributions for document 2046
(0, 0.11885245901639344)
(1, 0.10245901639344263)
(2, 0.11885245901639344)
(3, 0.1680327868852459)
(4, 0.15163934426229508)
(5, 0.10245901639344263)
(6, 0.13524590163934427)
(7, 0.10245901639344263)
Topic distributions for document 2047
(0, 0.2138157894736842)
(1, 0.10855263157894737)
(2, 0.08223684210526316)
(3, 0.08223684210526316)
(4, 0.12171052631578948)
(5, 0.10855263157894737)
(6, 0.12171052631578948)
(7, 0.1611842105263158)
Topic distributions for document 2048
(0, 0.14224137931034483)
(1, 0.10775862068965517)
(2, 0.15948275862068967)
(3, 0.10775862068965517)
(4, 0.125)
(5, 0.125)
(6, 0.125)
(7, 0.10775862068

(1, 0.113013698630137)
(2, 0.14041095890410962)
(3, 0.18150684931506852)
(4, 0.14041095890410962)
(5, 0.0993150684931507)
(6, 0.14041095890410962)
(7, 0.08561643835616439)
Topic distributions for document 2249
(0, 0.1024590163934426)
(1, 0.23360655737704913)
(2, 0.1024590163934426)
(3, 0.13524590163934425)
(4, 0.11885245901639341)
(5, 0.1024590163934426)
(6, 0.1024590163934426)
(7, 0.1024590163934426)
Topic distributions for document 2250
(0, 0.09615384615384617)
(1, 0.14230769230769233)
(2, 0.11153846153846156)
(3, 0.14230769230769233)
(4, 0.11153846153846156)
(5, 0.1730769230769231)
(6, 0.12692307692307694)
(7, 0.09615384615384617)
Topic distributions for document 2251
(0, 0.11160714285714286)
(1, 0.11160714285714286)
(2, 0.16517857142857142)
(3, 0.11160714285714286)
(4, 0.12946428571428573)
(5, 0.14732142857142858)
(6, 0.11160714285714286)
(7, 0.11160714285714286)
Topic distributions for document 2252
(0, 0.1541666666666667)
(1, 0.10416666666666669)
(2, 0.12083333333333335)
(3, 0.10

Topic distributions for document 2433
(0, 0.11363636363636363)
(1, 0.11363636363636363)
(2, 0.11363636363636363)
(3, 0.11363636363636363)
(4, 0.1318181818181818)
(5, 0.11363636363636363)
(6, 0.18636363636363637)
(7, 0.11363636363636363)
Topic distributions for document 2434
(0, 0.13524590163934425)
(1, 0.1024590163934426)
(2, 0.15163934426229506)
(3, 0.1024590163934426)
(4, 0.1024590163934426)
(5, 0.1844262295081967)
(6, 0.1024590163934426)
(7, 0.11885245901639341)
Topic distributions for document 2435
(0, 0.1228813559322034)
(1, 0.1059322033898305)
(2, 0.1228813559322034)
(3, 0.1059322033898305)
(4, 0.1906779661016949)
(5, 0.1228813559322034)
(6, 0.1059322033898305)
(7, 0.1228813559322034)
Topic distributions for document 2436
(0, 0.15416666666666667)
(1, 0.1375)
(2, 0.10416666666666667)
(3, 0.12083333333333333)
(4, 0.1375)
(5, 0.10416666666666667)
(6, 0.12083333333333333)
(7, 0.12083333333333333)
Topic distributions for document 2437
(0, 0.19805194805194806)
(1, 0.09415584415584416)


(3, 0.09328358208955224)
(4, 0.10820895522388059)
(5, 0.12313432835820895)
(6, 0.12313432835820895)
(7, 0.09328358208955224)
Topic distributions for document 2620
(0, 0.1398305084745763)
(1, 0.12288135593220341)
(2, 0.10593220338983052)
(3, 0.12288135593220341)
(4, 0.10593220338983052)
(5, 0.1398305084745763)
(6, 0.12288135593220341)
(7, 0.1398305084745763)
Topic distributions for document 2621
(0, 0.13983050847457626)
(1, 0.1228813559322034)
(2, 0.1059322033898305)
(3, 0.13983050847457626)
(4, 0.13983050847457626)
(5, 0.1228813559322034)
(6, 0.1228813559322034)
(7, 0.1059322033898305)
Topic distributions for document 2622
(0, 0.1228813559322034)
(1, 0.15677966101694915)
(2, 0.1059322033898305)
(3, 0.13983050847457626)
(4, 0.1228813559322034)
(5, 0.1059322033898305)
(6, 0.13983050847457626)
(7, 0.1059322033898305)
Topic distributions for document 2623
(0, 0.10245901639344263)
(1, 0.11885245901639344)
(2, 0.15163934426229508)
(3, 0.10245901639344263)
(4, 0.15163934426229508)
(5, 0.10245

(6, 0.09615384615384616)
(7, 0.1576923076923077)
Topic distributions for document 2822
(0, 0.10964912280701754)
(1, 0.10964912280701754)
(2, 0.10964912280701754)
(3, 0.16228070175438597)
(4, 0.14473684210526316)
(5, 0.14473684210526316)
(6, 0.10964912280701754)
(7, 0.10964912280701754)
Topic distributions for document 2823
(0, 0.15948275862068967)
(1, 0.10775862068965517)
(2, 0.125)
(3, 0.14224137931034483)
(4, 0.125)
(5, 0.10775862068965517)
(6, 0.10775862068965517)
(7, 0.125)
Topic distributions for document 2824
(0, 0.14015151515151514)
(1, 0.10984848484848485)
(2, 0.125)
(3, 0.10984848484848485)
(4, 0.14015151515151514)
(5, 0.125)
(6, 0.125)
(7, 0.125)
Topic distributions for document 2825
(0, 0.12288135593220341)
(1, 0.10593220338983052)
(2, 0.10593220338983052)
(3, 0.10593220338983052)
(4, 0.19067796610169493)
(5, 0.12288135593220341)
(6, 0.10593220338983052)
(7, 0.1398305084745763)
Topic distributions for document 2826
(0, 0.1342592592592593)
(1, 0.15277777777777782)
(2, 0.11574

(7, 0.125)
Topic distributions for document 3044
(0, 0.1594827586206897)
(1, 0.10775862068965518)
(2, 0.10775862068965518)
(3, 0.10775862068965518)
(4, 0.1594827586206897)
(5, 0.12500000000000003)
(6, 0.10775862068965518)
(7, 0.12500000000000003)
Topic distributions for document 3045
(0, 0.09269662921348315)
(1, 0.0702247191011236)
(2, 0.0702247191011236)
(3, 0.10393258426966293)
(4, 0.09269662921348315)
(5, 0.13764044943820225)
(6, 0.0702247191011236)
(7, 0.36235955056179775)
Topic distributions for document 3046
(0, 0.15163934426229508)
(1, 0.10245901639344263)
(2, 0.10245901639344263)
(3, 0.13524590163934427)
(4, 0.11885245901639344)
(5, 0.10245901639344263)
(6, 0.13524590163934427)
(7, 0.15163934426229508)
Topic distributions for document 3047
(0, 0.11574074074074074)
(1, 0.11574074074074074)
(2, 0.11574074074074074)
(3, 0.1527777777777778)
(4, 0.13425925925925927)
(5, 0.11574074074074074)
(6, 0.11574074074074074)
(7, 0.13425925925925927)
Topic distributions for document 3048
(0, 0

(0, 0.10245901639344263)
(1, 0.11885245901639344)
(2, 0.11885245901639344)
(3, 0.10245901639344263)
(4, 0.11885245901639344)
(5, 0.20081967213114754)
(6, 0.13524590163934427)
(7, 0.10245901639344263)
Topic distributions for document 3267
(0, 0.0992063492063492)
(1, 0.13095238095238096)
(2, 0.0992063492063492)
(3, 0.1626984126984127)
(4, 0.13095238095238096)
(5, 0.13095238095238096)
(6, 0.13095238095238096)
(7, 0.11507936507936507)
Topic distributions for document 3268
(0, 0.13306451612903225)
(1, 0.13306451612903225)
(2, 0.13306451612903225)
(3, 0.14919354838709678)
(4, 0.11693548387096774)
(5, 0.10080645161290322)
(6, 0.11693548387096774)
(7, 0.11693548387096774)
Topic distributions for document 3269
(0, 0.08802816901408451)
(1, 0.10211267605633804)
(2, 0.15845070422535215)
(3, 0.21478873239436622)
(4, 0.11619718309859156)
(5, 0.14436619718309862)
(6, 0.08802816901408451)
(7, 0.08802816901408451)
Topic distributions for document 3270
(0, 0.14919354838709678)
(1, 0.10080645161290322)
(

(6, 0.10820895522388059)
(7, 0.10820895522388059)
Topic distributions for document 3433
(0, 0.11363636363636363)
(1, 0.15)
(2, 0.1318181818181818)
(3, 0.11363636363636363)
(4, 0.1318181818181818)
(5, 0.11363636363636363)
(6, 0.1318181818181818)
(7, 0.11363636363636363)
Topic distributions for document 3434
(0, 0.12719298245614036)
(1, 0.14473684210526316)
(2, 0.10964912280701754)
(3, 0.10964912280701754)
(4, 0.12719298245614036)
(5, 0.12719298245614036)
(6, 0.10964912280701754)
(7, 0.14473684210526316)
Topic distributions for document 3435
(0, 0.10964912280701751)
(1, 0.12719298245614033)
(2, 0.12719298245614033)
(3, 0.14473684210526314)
(4, 0.12719298245614033)
(5, 0.12719298245614033)
(6, 0.12719298245614033)
(7, 0.10964912280701751)
Topic distributions for document 3436
(0, 0.13306451612903225)
(1, 0.11693548387096774)
(2, 0.10080645161290322)
(3, 0.13306451612903225)
(4, 0.13306451612903225)
(5, 0.10080645161290322)
(6, 0.14919354838709678)
(7, 0.13306451612903225)
Topic distributi

Topic distributions for document 3629
(0, 0.11363636363636363)
(1, 0.11363636363636363)
(2, 0.15)
(3, 0.11363636363636363)
(4, 0.15)
(5, 0.1318181818181818)
(6, 0.11363636363636363)
(7, 0.11363636363636363)
Topic distributions for document 3630
(0, 0.11363636363636363)
(1, 0.11363636363636363)
(2, 0.11363636363636363)
(3, 0.11363636363636363)
(4, 0.1318181818181818)
(5, 0.16818181818181818)
(6, 0.11363636363636363)
(7, 0.1318181818181818)
Topic distributions for document 3631
(0, 0.09765625)
(1, 0.11328125)
(2, 0.12890625)
(3, 0.09765625)
(4, 0.09765625)
(5, 0.12890625)
(6, 0.09765625)
(7, 0.23828125)
Topic distributions for document 3632
(0, 0.15909090909090912)
(1, 0.12012987012987014)
(2, 0.08116883116883118)
(3, 0.15909090909090912)
(4, 0.09415584415584417)
(5, 0.08116883116883118)
(6, 0.15909090909090912)
(7, 0.14610389610389612)
Topic distributions for document 3633
(0, 0.13749999999999998)
(1, 0.10416666666666664)
(2, 0.1208333333333333)
(3, 0.1708333333333333)
(4, 0.10416666666

(3, 0.125)
(4, 0.10984848484848485)
(5, 0.125)
(6, 0.0946969696969697)
(7, 0.10984848484848485)
Topic distributions for document 3822
(0, 0.1398305084745763)
(1, 0.10593220338983053)
(2, 0.1398305084745763)
(3, 0.10593220338983053)
(4, 0.15677966101694918)
(5, 0.1398305084745763)
(6, 0.10593220338983053)
(7, 0.10593220338983053)
Topic distributions for document 3823
(0, 0.09191176470588236)
(1, 0.13602941176470587)
(2, 0.13602941176470587)
(3, 0.15073529411764705)
(4, 0.09191176470588236)
(5, 0.15073529411764705)
(6, 0.1213235294117647)
(7, 0.1213235294117647)
Topic distributions for document 3824
(0, 0.1201923076923077)
(1, 0.1201923076923077)
(2, 0.1201923076923077)
(3, 0.15865384615384615)
(4, 0.1201923076923077)
(5, 0.1201923076923077)
(6, 0.1201923076923077)
(7, 0.1201923076923077)
Topic distributions for document 3825
(0, 0.12719298245614036)
(1, 0.14473684210526316)
(2, 0.10964912280701754)
(3, 0.12719298245614036)
(4, 0.10964912280701754)
(5, 0.12719298245614036)
(6, 0.14473684

(5, 0.0946969696969697)
(6, 0.0946969696969697)
(7, 0.0946969696969697)
Topic distributions for document 4044
(0, 0.15)
(1, 0.11363636363636363)
(2, 0.1318181818181818)
(3, 0.15)
(4, 0.11363636363636363)
(5, 0.11363636363636363)
(6, 0.11363636363636363)
(7, 0.11363636363636363)
Topic distributions for document 4045
(0, 0.0992063492063492)
(1, 0.0992063492063492)
(2, 0.13095238095238096)
(3, 0.13095238095238096)
(4, 0.0992063492063492)
(5, 0.11507936507936507)
(6, 0.2261904761904762)
(7, 0.0992063492063492)
Topic distributions for document 4046
(0, 0.1284722222222222)
(1, 0.1423611111111111)
(2, 0.1284722222222222)
(3, 0.1423611111111111)
(4, 0.08680555555555555)
(5, 0.1423611111111111)
(6, 0.1423611111111111)
(7, 0.08680555555555555)
Topic distributions for document 4047
(0, 0.11693548387096774)
(1, 0.1975806451612903)
(2, 0.11693548387096774)
(3, 0.10080645161290322)
(4, 0.10080645161290322)
(5, 0.13306451612903225)
(6, 0.11693548387096774)
(7, 0.11693548387096774)
Topic distributions

(6, 0.13805970149253732)
(7, 0.10820895522388059)
Topic distributions for document 4266
(0, 0.14919354838709678)
(1, 0.10080645161290322)
(2, 0.14919354838709678)
(3, 0.13306451612903225)
(4, 0.16532258064516128)
(5, 0.10080645161290322)
(6, 0.10080645161290322)
(7, 0.10080645161290322)
Topic distributions for document 4267
(0, 0.1318181818181818)
(1, 0.11363636363636363)
(2, 0.11363636363636363)
(3, 0.11363636363636363)
(4, 0.16818181818181818)
(5, 0.1318181818181818)
(6, 0.11363636363636363)
(7, 0.11363636363636363)
Topic distributions for document 4268
(0, 0.13425925925925927)
(1, 0.11574074074074074)
(2, 0.13425925925925927)
(3, 0.11574074074074074)
(4, 0.11574074074074074)
(5, 0.13425925925925927)
(6, 0.13425925925925927)
(7, 0.11574074074074074)
Topic distributions for document 4269
(0, 0.11574074074074076)
(1, 0.11574074074074076)
(2, 0.11574074074074076)
(3, 0.1342592592592593)
(4, 0.17129629629629634)
(5, 0.11574074074074076)
(6, 0.11574074074074076)
(7, 0.11574074074074076)
T

Topic distributions for document 4488
(0, 0.1059322033898305)
(1, 0.1059322033898305)
(2, 0.1228813559322034)
(3, 0.13983050847457626)
(4, 0.1228813559322034)
(5, 0.1228813559322034)
(6, 0.17372881355932204)
(7, 0.1059322033898305)
Topic distributions for document 4489
(0, 0.1228813559322034)
(1, 0.1059322033898305)
(2, 0.1059322033898305)
(3, 0.1059322033898305)
(4, 0.1059322033898305)
(5, 0.1059322033898305)
(6, 0.2076271186440678)
(7, 0.13983050847457626)
Topic distributions for document 4490
(0, 0.15163934426229506)
(1, 0.1024590163934426)
(2, 0.11885245901639341)
(3, 0.1024590163934426)
(4, 0.2172131147540983)
(5, 0.1024590163934426)
(6, 0.1024590163934426)
(7, 0.1024590163934426)
Topic distributions for document 4491
(0, 0.12083333333333335)
(1, 0.12083333333333335)
(2, 0.13750000000000004)
(3, 0.1541666666666667)
(4, 0.1541666666666667)
(5, 0.10416666666666669)
(6, 0.10416666666666669)
(7, 0.10416666666666669)
Topic distributions for document 4492
(0, 0.10775862068965517)
(1, 0.

(1, 0.13851351351351351)
(2, 0.11148648648648649)
(3, 0.08445945945945946)
(4, 0.125)
(5, 0.125)
(6, 0.125)
(7, 0.20608108108108109)
Topic distributions for document 4673
(0, 0.10964912280701754)
(1, 0.12719298245614036)
(2, 0.10964912280701754)
(3, 0.10964912280701754)
(4, 0.10964912280701754)
(5, 0.12719298245614036)
(6, 0.17982456140350878)
(7, 0.12719298245614036)
Topic distributions for document 4674
(0, 0.10661764705882353)
(1, 0.1213235294117647)
(2, 0.15073529411764705)
(3, 0.09191176470588236)
(4, 0.09191176470588236)
(5, 0.15073529411764705)
(6, 0.13602941176470587)
(7, 0.15073529411764705)
Topic distributions for document 4675
(0, 0.115625)
(1, 0.140625)
(2, 0.103125)
(3, 0.078125)
(4, 0.128125)
(5, 0.128125)
(6, 0.203125)
(7, 0.103125)
Topic distributions for document 4676
(0, 0.09920634920634921)
(1, 0.11507936507936509)
(2, 0.09920634920634921)
(3, 0.11507936507936509)
(4, 0.11507936507936509)
(5, 0.16269841269841273)
(6, 0.09920634920634921)
(7, 0.19444444444444448)
Topi

(6, 0.10775862068965518)
(7, 0.10775862068965518)
Topic distributions for document 4877
(0, 0.1516393442622951)
(1, 0.1352459016393443)
(2, 0.10245901639344264)
(3, 0.11885245901639345)
(4, 0.11885245901639345)
(5, 0.11885245901639345)
(6, 0.1352459016393443)
(7, 0.11885245901639345)
Topic distributions for document 4878
(0, 0.14015151515151514)
(1, 0.10984848484848485)
(2, 0.0946969696969697)
(3, 0.0946969696969697)
(4, 0.2765151515151515)
(5, 0.0946969696969697)
(6, 0.0946969696969697)
(7, 0.0946969696969697)
Topic distributions for document 4879
(0, 0.1228813559322034)
(1, 0.1228813559322034)
(2, 0.1228813559322034)
(3, 0.17372881355932204)
(4, 0.1228813559322034)
(5, 0.1228813559322034)
(6, 0.1059322033898305)
(7, 0.1059322033898305)
Topic distributions for document 4880
(0, 0.0992063492063492)
(1, 0.13095238095238096)
(2, 0.17857142857142858)
(3, 0.14682539682539683)
(4, 0.13095238095238096)
(5, 0.0992063492063492)
(6, 0.11507936507936507)
(7, 0.0992063492063492)
Topic distributio

(7, 0.10080645161290322)
Topic distributions for document 5099
(0, 0.1318181818181818)
(1, 0.1318181818181818)
(2, 0.1318181818181818)
(3, 0.11363636363636363)
(4, 0.1318181818181818)
(5, 0.1318181818181818)
(6, 0.11363636363636363)
(7, 0.11363636363636363)


In [14]:
lda_corpus = [max(prob,key=lambda y:y[1]) for prob in lda_fake[corpus_f] ]
playlists = [[] for i in range(8)]
for i, x in enumerate(lda_corpus):
    playlists[x[0]].append(corpus_fake[i])

In [17]:
playlists[1]

['BREAKING NEWS president Cryill Ramaphosa asked foreign nations depart south Africa june increasing cases COVID .',
 'George Soros people responsible new born laboratory.',
 'article claiming "Bill Gates\' vaccine" modify human DNA.',
 'person Galicia / / .',
 'Camila Pitanga Brazilian actress contracted COVID-19 said caught malaria chloroquine admitting wrong.',
 "Bill Gates profits vaccination wants reduce world's population.",
 'Indore India Police doctors injecting positive blood Muslims.',
 'Indian politician Uddhav Thackeray asked Christians pray Jesus stop COVID-19.',
 '"States like New York, Illinois California vocal demands funding bail pension systems, failing long COVID-19 outbreak, programs aren t related crisis."',
 'Trumps effort blame Obama sluggish testing.',
 'Coronavirus spread bananas',
 'Covid19 NYC people saw this, stay home.HORRIBLE. StayHome StayAtHome StaySafe NYC NYCLockdown FridayFeeling',
 'puts facemask skin, gets lockdown &gt;&gt;&gt;',
 'Simpsons predicte

In [15]:
transformed_docs = lda_fake.load_document_topics()
topic_distributions_fake = pd.DataFrame([[x[1] for x in doc] for doc in transformed_docs], 
             columns=['topic_{}'.format(i) for i in range(8)])
topic_distributions_fake.tail()
topic_distributions_fake.mean().sort_values(ascending=False)

topic_7    0.125586
topic_4    0.125574
topic_0    0.125395
topic_2    0.125382
topic_6    0.124854
topic_3    0.124627
topic_1    0.124529
topic_5    0.124053
dtype: float64

In [27]:
count = 0
for i in range(8):
    count += len(playlists[i])
print(count)

5100


###### Loop over documents to get topic distribution: Real

In [32]:
transformed_docs = lda_real.load_document_topics()
for i, document in enumerate(transformed_docs):
    print('Topic distributions for document {}'.format(i))
    for topic in document:
        print(topic)
        
lda_corpus = [max(prob,key=lambda y:y[1]) for prob in lda_real[corpus_r]]
playlists_real = [[] for i in range(27)]
for i, x in enumerate(lda_corpus):
    playlists_real[x[0]].append(corpus_real[i])

Topic distributions for document 0
(0, 0.02723311546840959)
(1, 0.02723311546840959)
(2, 0.02723311546840959)
(3, 0.02723311546840959)
(4, 0.04193899782135077)
(5, 0.07135076252723313)
(6, 0.04193899782135077)
(7, 0.02723311546840959)
(8, 0.05664488017429195)
(9, 0.02723311546840959)
(10, 0.02723311546840959)
(11, 0.02723311546840959)
(12, 0.02723311546840959)
(13, 0.02723311546840959)
(14, 0.02723311546840959)
(15, 0.04193899782135077)
(16, 0.04193899782135077)
(17, 0.02723311546840959)
(18, 0.11546840958605666)
(19, 0.02723311546840959)
(20, 0.02723311546840959)
(21, 0.02723311546840959)
(22, 0.02723311546840959)
(23, 0.04193899782135077)
(24, 0.02723311546840959)
(25, 0.04193899782135077)
(26, 0.04193899782135077)
Topic distributions for document 1
(0, 0.02986857825567504)
(1, 0.02986857825567504)
(2, 0.02986857825567504)
(3, 0.0782556750298686)
(4, 0.02986857825567504)
(5, 0.09438470728793312)
(6, 0.02986857825567504)
(7, 0.02986857825567504)
(8, 0.02986857825567504)
(9, 0.02986857

(25, 0.04938271604938273)
(26, 0.02374169040835708)
Topic distributions for document 73
(0, 0.026455026455026443)
(1, 0.026455026455026443)
(2, 0.04074074074074072)
(3, 0.026455026455026443)
(4, 0.026455026455026443)
(5, 0.026455026455026443)
(6, 0.04074074074074072)
(7, 0.1264550264550264)
(8, 0.026455026455026443)
(9, 0.026455026455026443)
(10, 0.06931216931216928)
(11, 0.04074074074074072)
(12, 0.026455026455026443)
(13, 0.026455026455026443)
(14, 0.026455026455026443)
(15, 0.026455026455026443)
(16, 0.055026455026455)
(17, 0.06931216931216928)
(18, 0.026455026455026443)
(19, 0.026455026455026443)
(20, 0.026455026455026443)
(21, 0.04074074074074072)
(22, 0.026455026455026443)
(23, 0.026455026455026443)
(24, 0.04074074074074072)
(25, 0.026455026455026443)
(26, 0.026455026455026443)
Topic distributions for document 74
(0, 0.03906646372399797)
(1, 0.02536783358701166)
(2, 0.03906646372399797)
(3, 0.02536783358701166)
(4, 0.02536783358701166)
(5, 0.02536783358701166)
(6, 0.0390664637239

(12, 0.02893518518518518)
(13, 0.02893518518518518)
(14, 0.02893518518518518)
(15, 0.02893518518518518)
(16, 0.02893518518518518)
(17, 0.02893518518518518)
(18, 0.02893518518518518)
(19, 0.13831018518518515)
(20, 0.02893518518518518)
(21, 0.04456018518518518)
(22, 0.02893518518518518)
(23, 0.07581018518518517)
(24, 0.02893518518518518)
(25, 0.04456018518518518)
(26, 0.04456018518518518)
Topic distributions for document 127
(0, 0.026838432635534086)
(1, 0.026838432635534086)
(2, 0.026838432635534086)
(3, 0.273215244229737)
(4, 0.026838432635534086)
(5, 0.026838432635534086)
(6, 0.026838432635534086)
(7, 0.026838432635534086)
(8, 0.026838432635534086)
(9, 0.026838432635534086)
(10, 0.026838432635534086)
(11, 0.026838432635534086)
(12, 0.026838432635534086)
(13, 0.041331186258722494)
(14, 0.026838432635534086)
(15, 0.026838432635534086)
(16, 0.026838432635534086)
(17, 0.026838432635534086)
(18, 0.026838432635534086)
(19, 0.026838432635534086)
(20, 0.026838432635534086)
(21, 0.026838432635

(0, 0.030358227079538575)
(1, 0.07953855494839107)
(2, 0.04675166970248941)
(3, 0.030358227079538575)
(4, 0.04675166970248941)
(5, 0.04675166970248941)
(6, 0.030358227079538575)
(7, 0.030358227079538575)
(8, 0.030358227079538575)
(9, 0.030358227079538575)
(10, 0.06314511232544023)
(11, 0.030358227079538575)
(12, 0.030358227079538575)
(13, 0.030358227079538575)
(14, 0.030358227079538575)
(15, 0.04675166970248941)
(16, 0.030358227079538575)
(17, 0.030358227079538575)
(18, 0.030358227079538575)
(19, 0.030358227079538575)
(20, 0.030358227079538575)
(21, 0.04675166970248941)
(22, 0.030358227079538575)
(23, 0.030358227079538575)
(24, 0.04675166970248941)
(25, 0.030358227079538575)
(26, 0.030358227079538575)
Topic distributions for document 181
(0, 0.02608242044861763)
(1, 0.040166927490871154)
(2, 0.02608242044861763)
(3, 0.02608242044861763)
(4, 0.040166927490871154)
(5, 0.040166927490871154)
(6, 0.05425143453312468)
(7, 0.02608242044861763)
(8, 0.02608242044861763)
(9, 0.02608242044861763)

(15, 0.05425143453312468)
(16, 0.02608242044861763)
(17, 0.0683359415753782)
(18, 0.02608242044861763)
(19, 0.02608242044861763)
(20, 0.02608242044861763)
(21, 0.02608242044861763)
(22, 0.02608242044861763)
(23, 0.02608242044861763)
(24, 0.02608242044861763)
(25, 0.05425143453312468)
(26, 0.02608242044861763)
Topic distributions for document 234
(0, 0.02436647173489278)
(1, 0.02436647173489278)
(2, 0.03752436647173489)
(3, 0.02436647173489278)
(4, 0.02436647173489278)
(5, 0.02436647173489278)
(6, 0.02436647173489278)
(7, 0.02436647173489278)
(8, 0.03752436647173489)
(9, 0.03752436647173489)
(10, 0.02436647173489278)
(11, 0.02436647173489278)
(12, 0.02436647173489278)
(13, 0.02436647173489278)
(14, 0.02436647173489278)
(15, 0.02436647173489278)
(16, 0.02436647173489278)
(17, 0.02436647173489278)
(18, 0.2612085769980506)
(19, 0.02436647173489278)
(20, 0.03752436647173489)
(21, 0.02436647173489278)
(22, 0.02436647173489278)
(23, 0.06384015594541909)
(24, 0.02436647173489278)
(25, 0.037524

(3, 0.027233115468409595)
(4, 0.027233115468409595)
(5, 0.04193899782135077)
(6, 0.04193899782135077)
(7, 0.027233115468409595)
(8, 0.027233115468409595)
(9, 0.027233115468409595)
(10, 0.027233115468409595)
(11, 0.027233115468409595)
(12, 0.04193899782135077)
(13, 0.1007625272331155)
(14, 0.027233115468409595)
(15, 0.04193899782135077)
(16, 0.05664488017429196)
(17, 0.027233115468409595)
(18, 0.027233115468409595)
(19, 0.027233115468409595)
(20, 0.027233115468409595)
(21, 0.027233115468409595)
(22, 0.027233115468409595)
(23, 0.027233115468409595)
(24, 0.027233115468409595)
(25, 0.04193899782135077)
(26, 0.1007625272331155)
Topic distributions for document 288
(0, 0.03248862897985705)
(1, 0.03248862897985705)
(2, 0.03248862897985705)
(3, 0.03248862897985705)
(4, 0.03248862897985705)
(5, 0.03248862897985705)
(6, 0.03248862897985705)
(7, 0.03248862897985705)
(8, 0.08512020792722548)
(9, 0.03248862897985705)
(10, 0.03248862897985705)
(11, 0.03248862897985705)
(12, 0.03248862897985705)
(13,

(18, 0.028058361391694736)
(19, 0.028058361391694736)
(20, 0.028058361391694736)
(21, 0.043209876543209895)
(22, 0.028058361391694736)
(23, 0.028058361391694736)
(24, 0.058361391694725054)
(25, 0.028058361391694736)
(26, 0.028058361391694736)
Topic distributions for document 341
(0, 0.023441162681669018)
(1, 0.08673230192217536)
(2, 0.048757618377871556)
(3, 0.036099390529770285)
(4, 0.036099390529770285)
(5, 0.023441162681669018)
(6, 0.036099390529770285)
(7, 0.048757618377871556)
(8, 0.023441162681669018)
(9, 0.023441162681669018)
(10, 0.036099390529770285)
(11, 0.036099390529770285)
(12, 0.023441162681669018)
(13, 0.023441162681669018)
(14, 0.048757618377871556)
(15, 0.0740740740740741)
(16, 0.048757618377871556)
(17, 0.023441162681669018)
(18, 0.023441162681669018)
(19, 0.036099390529770285)
(20, 0.023441162681669018)
(21, 0.023441162681669018)
(22, 0.023441162681669018)
(23, 0.023441162681669018)
(24, 0.08673230192217536)
(25, 0.023441162681669018)
(26, 0.036099390529770285)
Topic

(22, 0.029394473838918297)
(23, 0.029394473838918297)
(24, 0.09288653733098182)
(25, 0.04526748971193418)
(26, 0.029394473838918297)
Topic distributions for document 395
(0, 0.02986857825567504)
(1, 0.02986857825567504)
(2, 0.02986857825567504)
(3, 0.02986857825567504)
(4, 0.02986857825567504)
(5, 0.02986857825567504)
(6, 0.02986857825567504)
(7, 0.02986857825567504)
(8, 0.02986857825567504)
(9, 0.02986857825567504)
(10, 0.02986857825567504)
(11, 0.02986857825567504)
(12, 0.20728793309438476)
(13, 0.02986857825567504)
(14, 0.02986857825567504)
(15, 0.02986857825567504)
(16, 0.045997610513739566)
(17, 0.02986857825567504)
(18, 0.02986857825567504)
(19, 0.02986857825567504)
(20, 0.02986857825567504)
(21, 0.02986857825567504)
(22, 0.02986857825567504)
(23, 0.02986857825567504)
(24, 0.02986857825567504)
(25, 0.02986857825567504)
(26, 0.02986857825567504)
Topic distributions for document 396
(0, 0.02469135802469136)
(1, 0.02469135802469136)
(2, 0.02469135802469136)
(3, 0.02469135802469136)


(17, 0.023441162681669018)
(18, 0.31458040318799824)
(19, 0.023441162681669018)
(20, 0.023441162681669018)
(21, 0.023441162681669018)
(22, 0.023441162681669018)
(23, 0.048757618377871556)
(24, 0.023441162681669018)
(25, 0.023441162681669018)
(26, 0.023441162681669018)
Topic distributions for document 466
(0, 0.03367003367003368)
(1, 0.03367003367003368)
(2, 0.03367003367003368)
(3, 0.03367003367003368)
(4, 0.03367003367003368)
(5, 0.03367003367003368)
(6, 0.03367003367003368)
(7, 0.03367003367003368)
(8, 0.03367003367003368)
(9, 0.05185185185185188)
(10, 0.03367003367003368)
(11, 0.03367003367003368)
(12, 0.03367003367003368)
(13, 0.05185185185185188)
(14, 0.03367003367003368)
(15, 0.03367003367003368)
(16, 0.03367003367003368)
(17, 0.03367003367003368)
(18, 0.05185185185185188)
(19, 0.03367003367003368)
(20, 0.03367003367003368)
(21, 0.03367003367003368)
(22, 0.03367003367003368)
(23, 0.05185185185185188)
(24, 0.05185185185185188)
(25, 0.03367003367003368)
(26, 0.03367003367003368)
To

(5, 0.02939447383891829)
(6, 0.02939447383891829)
(7, 0.02939447383891829)
(8, 0.02939447383891829)
(9, 0.02939447383891829)
(10, 0.02939447383891829)
(11, 0.07701352145796592)
(12, 0.02939447383891829)
(13, 0.02939447383891829)
(14, 0.12463256907701355)
(15, 0.02939447383891829)
(16, 0.045267489711934165)
(17, 0.045267489711934165)
(18, 0.02939447383891829)
(19, 0.02939447383891829)
(20, 0.02939447383891829)
(21, 0.02939447383891829)
(22, 0.02939447383891829)
(23, 0.02939447383891829)
(24, 0.02939447383891829)
(25, 0.045267489711934165)
(26, 0.045267489711934165)
Topic distributions for document 520
(0, 0.02436647173489278)
(1, 0.03752436647173489)
(2, 0.02436647173489278)
(3, 0.02436647173489278)
(4, 0.03752436647173489)
(5, 0.02436647173489278)
(6, 0.03752436647173489)
(7, 0.03752436647173489)
(8, 0.03752436647173489)
(9, 0.05068226120857699)
(10, 0.02436647173489278)
(11, 0.11647173489278749)
(12, 0.1296296296296296)
(13, 0.02436647173489278)
(14, 0.02436647173489278)
(15, 0.037524

(20, 0.02683843263553409)
(21, 0.02683843263553409)
(22, 0.02683843263553409)
(23, 0.02683843263553409)
(24, 0.02683843263553409)
(25, 0.0413311862587225)
(26, 0.0413311862587225)
Topic distributions for document 573
(0, 0.02763957987838585)
(1, 0.02763957987838585)
(2, 0.02763957987838585)
(3, 0.02763957987838585)
(4, 0.02763957987838585)
(5, 0.02763957987838585)
(6, 0.042564953012714214)
(7, 0.02763957987838585)
(8, 0.07241569928137094)
(9, 0.042564953012714214)
(10, 0.02763957987838585)
(11, 0.02763957987838585)
(12, 0.02763957987838585)
(13, 0.02763957987838585)
(14, 0.02763957987838585)
(15, 0.02763957987838585)
(16, 0.07241569928137094)
(17, 0.02763957987838585)
(18, 0.042564953012714214)
(19, 0.057490326147042574)
(20, 0.02763957987838585)
(21, 0.042564953012714214)
(22, 0.042564953012714214)
(23, 0.02763957987838585)
(24, 0.07241569928137094)
(25, 0.042564953012714214)
(26, 0.02763957987838585)
Topic distributions for document 574
(0, 0.024691358024691357)
(1, 0.024691358024691

Topic distributions for document 637
(0, 0.028058361391694736)
(1, 0.058361391694725054)
(2, 0.028058361391694736)
(3, 0.028058361391694736)
(4, 0.028058361391694736)
(5, 0.028058361391694736)
(6, 0.08866442199775536)
(7, 0.028058361391694736)
(8, 0.058361391694725054)
(9, 0.028058361391694736)
(10, 0.043209876543209895)
(11, 0.028058361391694736)
(12, 0.028058361391694736)
(13, 0.043209876543209895)
(14, 0.028058361391694736)
(15, 0.043209876543209895)
(16, 0.043209876543209895)
(17, 0.028058361391694736)
(18, 0.028058361391694736)
(19, 0.028058361391694736)
(20, 0.028058361391694736)
(21, 0.043209876543209895)
(22, 0.043209876543209895)
(23, 0.028058361391694736)
(24, 0.028058361391694736)
(25, 0.043209876543209895)
(26, 0.043209876543209895)
Topic distributions for document 638
(0, 0.02763957987838585)
(1, 0.02763957987838585)
(2, 0.02763957987838585)
(3, 0.042564953012714214)
(4, 0.042564953012714214)
(5, 0.02763957987838585)
(6, 0.07241569928137094)
(7, 0.02763957987838585)
(8, 0.

(19, 0.024050024050024033)
(20, 0.05002405002404999)
(21, 0.024050024050024033)
(22, 0.024050024050024033)
(23, 0.037037037037037014)
(24, 0.037037037037037014)
(25, 0.024050024050024033)
(26, 0.05002405002404999)
Topic distributions for document 698
(0, 0.054251434533124664)
(1, 0.026082420448617624)
(2, 0.026082420448617624)
(3, 0.026082420448617624)
(4, 0.026082420448617624)
(5, 0.026082420448617624)
(6, 0.054251434533124664)
(7, 0.054251434533124664)
(8, 0.026082420448617624)
(9, 0.026082420448617624)
(10, 0.026082420448617624)
(11, 0.026082420448617624)
(12, 0.026082420448617624)
(13, 0.026082420448617624)
(14, 0.026082420448617624)
(15, 0.026082420448617624)
(16, 0.026082420448617624)
(17, 0.026082420448617624)
(18, 0.026082420448617624)
(19, 0.04016692749087115)
(20, 0.026082420448617624)
(21, 0.054251434533124664)
(22, 0.026082420448617624)
(23, 0.026082420448617624)
(24, 0.18101199791340633)
(25, 0.026082420448617624)
(26, 0.04016692749087115)
Topic distributions for document 

Topic distributions for document 755
(0, 0.14469135802469135)
(1, 0.05135802469135803)
(2, 0.03802469135802469)
(3, 0.024691358024691357)
(4, 0.024691358024691357)
(5, 0.024691358024691357)
(6, 0.03802469135802469)
(7, 0.024691358024691357)
(8, 0.03802469135802469)
(9, 0.03802469135802469)
(10, 0.024691358024691357)
(11, 0.024691358024691357)
(12, 0.024691358024691357)
(13, 0.024691358024691357)
(14, 0.06469135802469136)
(15, 0.024691358024691357)
(16, 0.03802469135802469)
(17, 0.024691358024691357)
(18, 0.024691358024691357)
(19, 0.05135802469135803)
(20, 0.024691358024691357)
(21, 0.03802469135802469)
(22, 0.024691358024691357)
(23, 0.05135802469135803)
(24, 0.03802469135802469)
(25, 0.024691358024691357)
(26, 0.024691358024691357)
Topic distributions for document 756
(0, 0.1795735129068463)
(1, 0.0432098765432099)
(2, 0.02805836139169474)
(3, 0.02805836139169474)
(4, 0.02805836139169474)
(5, 0.02805836139169474)
(6, 0.02805836139169474)
(7, 0.02805836139169474)
(8, 0.058361391694725

(22, 0.023441162681669018)
(23, 0.048757618377871556)
(24, 0.023441162681669018)
(25, 0.03609939052977029)
(26, 0.023441162681669018)
Topic distributions for document 805
(0, 0.022862368541380882)
(1, 0.035208047553726564)
(2, 0.022862368541380882)
(3, 0.047553726566072235)
(4, 0.047553726566072235)
(5, 0.022862368541380882)
(6, 0.022862368541380882)
(7, 0.035208047553726564)
(8, 0.022862368541380882)
(9, 0.022862368541380882)
(10, 0.035208047553726564)
(11, 0.022862368541380882)
(12, 0.022862368541380882)
(13, 0.022862368541380882)
(14, 0.022862368541380882)
(15, 0.047553726566072235)
(16, 0.035208047553726564)
(17, 0.022862368541380882)
(18, 0.035208047553726564)
(19, 0.035208047553726564)
(20, 0.022862368541380882)
(21, 0.022862368541380882)
(22, 0.022862368541380882)
(23, 0.25743026977594874)
(24, 0.022862368541380882)
(25, 0.022862368541380882)
(26, 0.022862368541380882)
Topic distributions for document 806
(0, 0.029394473838918293)
(1, 0.029394473838918293)
(2, 0.0293944738389182

(10, 0.04074074074074072)
(11, 0.04074074074074072)
(12, 0.04074074074074072)
(13, 0.09788359788359785)
(14, 0.04074074074074072)
(15, 0.026455026455026443)
(16, 0.026455026455026443)
(17, 0.055026455026455)
(18, 0.026455026455026443)
(19, 0.04074074074074072)
(20, 0.026455026455026443)
(21, 0.026455026455026443)
(22, 0.026455026455026443)
(23, 0.026455026455026443)
(24, 0.026455026455026443)
(25, 0.026455026455026443)
(26, 0.026455026455026443)
Topic distributions for document 859
(0, 0.023741690408357077)
(1, 0.0365622032288699)
(2, 0.023741690408357077)
(3, 0.023741690408357077)
(4, 0.023741690408357077)
(5, 0.0365622032288699)
(6, 0.04938271604938272)
(7, 0.0365622032288699)
(8, 0.04938271604938272)
(9, 0.07502374169040836)
(10, 0.0365622032288699)
(11, 0.0365622032288699)
(12, 0.11348528015194681)
(13, 0.023741690408357077)
(14, 0.023741690408357077)
(15, 0.0365622032288699)
(16, 0.023741690408357077)
(17, 0.0365622032288699)
(18, 0.0365622032288699)
(19, 0.06220322886989554)
(20,

(11, 0.02893518518518518)
(12, 0.02893518518518518)
(13, 0.02893518518518518)
(14, 0.02893518518518518)
(15, 0.02893518518518518)
(16, 0.02893518518518518)
(17, 0.04456018518518518)
(18, 0.02893518518518518)
(19, 0.060185185185185175)
(20, 0.04456018518518518)
(21, 0.13831018518518515)
(22, 0.02893518518518518)
(23, 0.02893518518518518)
(24, 0.02893518518518518)
(25, 0.04456018518518518)
(26, 0.02893518518518518)
Topic distributions for document 920
(0, 0.027233115468409595)
(1, 0.04193899782135077)
(2, 0.05664488017429196)
(3, 0.027233115468409595)
(4, 0.027233115468409595)
(5, 0.027233115468409595)
(6, 0.04193899782135077)
(7, 0.027233115468409595)
(8, 0.027233115468409595)
(9, 0.027233115468409595)
(10, 0.027233115468409595)
(11, 0.04193899782135077)
(12, 0.027233115468409595)
(13, 0.027233115468409595)
(14, 0.04193899782135077)
(15, 0.027233115468409595)
(16, 0.04193899782135077)
(17, 0.027233115468409595)
(18, 0.027233115468409595)
(19, 0.027233115468409595)
(20, 0.027233115468409

(9, 0.055026455026455)
(10, 0.055026455026455)
(11, 0.04074074074074072)
(12, 0.026455026455026443)
(13, 0.04074074074074072)
(14, 0.055026455026455)
(15, 0.026455026455026443)
(16, 0.04074074074074072)
(17, 0.04074074074074072)
(18, 0.026455026455026443)
(19, 0.026455026455026443)
(20, 0.026455026455026443)
(21, 0.026455026455026443)
(22, 0.026455026455026443)
(23, 0.026455026455026443)
(24, 0.026455026455026443)
(25, 0.04074074074074072)
(26, 0.04074074074074072)
Topic distributions for document 984
(0, 0.022862368541380886)
(1, 0.03520804755372657)
(2, 0.022862368541380886)
(3, 0.022862368541380886)
(4, 0.022862368541380886)
(5, 0.022862368541380886)
(6, 0.022862368541380886)
(7, 0.022862368541380886)
(8, 0.022862368541380886)
(9, 0.03520804755372657)
(10, 0.022862368541380886)
(11, 0.022862368541380886)
(12, 0.04755372656607225)
(13, 0.04755372656607225)
(14, 0.04755372656607225)
(15, 0.022862368541380886)
(16, 0.022862368541380886)
(17, 0.03520804755372657)
(18, 0.0228623685413808

(11, 0.020128824476650566)
(12, 0.020128824476650566)
(13, 0.020128824476650566)
(14, 0.020128824476650566)
(15, 0.020128824476650566)
(16, 0.020128824476650566)
(17, 0.020128824476650566)
(18, 0.4440418679549116)
(19, 0.020128824476650566)
(20, 0.020128824476650566)
(21, 0.020128824476650566)
(22, 0.020128824476650566)
(23, 0.020128824476650566)
(24, 0.020128824476650566)
(25, 0.020128824476650566)
(26, 0.020128824476650566)
Topic distributions for document 1049
(0, 0.028058361391694736)
(1, 0.028058361391694736)
(2, 0.028058361391694736)
(3, 0.028058361391694736)
(4, 0.028058361391694736)
(5, 0.028058361391694736)
(6, 0.043209876543209895)
(7, 0.028058361391694736)
(8, 0.028058361391694736)
(9, 0.22502805836139178)
(10, 0.028058361391694736)
(11, 0.028058361391694736)
(12, 0.028058361391694736)
(13, 0.028058361391694736)
(14, 0.028058361391694736)
(15, 0.028058361391694736)
(16, 0.028058361391694736)
(17, 0.028058361391694736)
(18, 0.028058361391694736)
(19, 0.028058361391694736)
(20

(26, 0.03192848020434229)
Topic distributions for document 1100
(0, 0.02178649237472767)
(1, 0.05708061002178649)
(2, 0.03355119825708061)
(3, 0.03355119825708061)
(4, 0.02178649237472767)
(5, 0.02178649237472767)
(6, 0.03355119825708061)
(7, 0.02178649237472767)
(8, 0.03355119825708061)
(9, 0.02178649237472767)
(10, 0.02178649237472767)
(11, 0.03355119825708061)
(12, 0.02178649237472767)
(13, 0.03355119825708061)
(14, 0.03355119825708061)
(15, 0.03355119825708061)
(16, 0.05708061002178649)
(17, 0.19825708061002179)
(18, 0.02178649237472767)
(19, 0.05708061002178649)
(20, 0.03355119825708061)
(21, 0.02178649237472767)
(22, 0.02178649237472767)
(23, 0.03355119825708061)
(24, 0.02178649237472767)
(25, 0.02178649237472767)
(26, 0.03355119825708061)
Topic distributions for document 1101
(0, 0.025025025025025033)
(1, 0.025025025025025033)
(2, 0.025025025025025033)
(3, 0.025025025025025033)
(4, 0.025025025025025033)
(5, 0.025025025025025033)
(6, 0.05205205205205207)
(7, 0.025025025025025033)

(23, 0.030864197530864185)
(24, 0.030864197530864185)
(25, 0.030864197530864185)
(26, 0.030864197530864185)
Topic distributions for document 1162
(0, 0.029868578255675036)
(1, 0.029868578255675036)
(2, 0.06212664277180408)
(3, 0.04599761051373956)
(4, 0.029868578255675036)
(5, 0.029868578255675036)
(6, 0.029868578255675036)
(7, 0.029868578255675036)
(8, 0.06212664277180408)
(9, 0.029868578255675036)
(10, 0.029868578255675036)
(11, 0.029868578255675036)
(12, 0.029868578255675036)
(13, 0.029868578255675036)
(14, 0.029868578255675036)
(15, 0.029868578255675036)
(16, 0.029868578255675036)
(17, 0.029868578255675036)
(18, 0.029868578255675036)
(19, 0.029868578255675036)
(20, 0.029868578255675036)
(21, 0.029868578255675036)
(22, 0.14277180406212667)
(23, 0.029868578255675036)
(24, 0.029868578255675036)
(25, 0.029868578255675036)
(26, 0.029868578255675036)
Topic distributions for document 1163
(0, 0.025025025025025033)
(1, 0.11961961961961964)
(2, 0.025025025025025033)
(3, 0.03853853853853855)

(1, 0.03960905349794239)
(2, 0.0257201646090535)
(3, 0.0257201646090535)
(4, 0.0257201646090535)
(5, 0.0257201646090535)
(6, 0.0257201646090535)
(7, 0.03960905349794239)
(8, 0.0257201646090535)
(9, 0.03960905349794239)
(10, 0.03960905349794239)
(11, 0.0257201646090535)
(12, 0.0257201646090535)
(13, 0.03960905349794239)
(14, 0.0257201646090535)
(15, 0.0257201646090535)
(16, 0.0257201646090535)
(17, 0.1646090534979424)
(18, 0.0257201646090535)
(19, 0.0257201646090535)
(20, 0.03960905349794239)
(21, 0.0257201646090535)
(22, 0.0257201646090535)
(23, 0.03960905349794239)
(24, 0.03960905349794239)
(25, 0.08127572016460906)
(26, 0.0257201646090535)
Topic distributions for document 1232
(0, 0.027233115468409595)
(1, 0.027233115468409595)
(2, 0.08605664488017431)
(3, 0.05664488017429196)
(4, 0.04193899782135077)
(5, 0.027233115468409595)
(6, 0.027233115468409595)
(7, 0.027233115468409595)
(8, 0.027233115468409595)
(9, 0.027233115468409595)
(10, 0.04193899782135077)
(11, 0.027233115468409595)
(1

(22, 0.05135802469135803)
(23, 0.024691358024691357)
(24, 0.03802469135802469)
(25, 0.024691358024691357)
(26, 0.03802469135802469)
Topic distributions for document 1287
(0, 0.09288653733098179)
(1, 0.02939447383891829)
(2, 0.045267489711934165)
(3, 0.02939447383891829)
(4, 0.02939447383891829)
(5, 0.02939447383891829)
(6, 0.02939447383891829)
(7, 0.02939447383891829)
(8, 0.02939447383891829)
(9, 0.02939447383891829)
(10, 0.02939447383891829)
(11, 0.02939447383891829)
(12, 0.02939447383891829)
(13, 0.02939447383891829)
(14, 0.045267489711934165)
(15, 0.02939447383891829)
(16, 0.02939447383891829)
(17, 0.02939447383891829)
(18, 0.02939447383891829)
(19, 0.02939447383891829)
(20, 0.02939447383891829)
(21, 0.045267489711934165)
(22, 0.045267489711934165)
(23, 0.02939447383891829)
(24, 0.07701352145796592)
(25, 0.061140505584950045)
(26, 0.02939447383891829)
Topic distributions for document 1288
(0, 0.02436647173489278)
(1, 0.02436647173489278)
(2, 0.02436647173489278)
(3, 0.03752436647173

Topic distributions for document 1352
(0, 0.027639579878385855)
(1, 0.027639579878385855)
(2, 0.042564953012714214)
(3, 0.05749032614704258)
(4, 0.027639579878385855)
(5, 0.027639579878385855)
(6, 0.027639579878385855)
(7, 0.027639579878385855)
(8, 0.027639579878385855)
(9, 0.042564953012714214)
(10, 0.027639579878385855)
(11, 0.027639579878385855)
(12, 0.027639579878385855)
(13, 0.042564953012714214)
(14, 0.027639579878385855)
(15, 0.027639579878385855)
(16, 0.07241569928137094)
(17, 0.027639579878385855)
(18, 0.027639579878385855)
(19, 0.027639579878385855)
(20, 0.027639579878385855)
(21, 0.027639579878385855)
(22, 0.027639579878385855)
(23, 0.042564953012714214)
(24, 0.027639579878385855)
(25, 0.10226644555002767)
(26, 0.07241569928137094)
Topic distributions for document 1353
(0, 0.0284900284900285)
(1, 0.0284900284900285)
(2, 0.0284900284900285)
(3, 0.05925925925925928)
(4, 0.07464387464387467)
(5, 0.10541310541310546)
(6, 0.0284900284900285)
(7, 0.04387464387464389)
(8, 0.0438746

Topic distributions for document 1410
(0, 0.023741690408357084)
(1, 0.023741690408357084)
(2, 0.023741690408357084)
(3, 0.03656220322886991)
(4, 0.25451092117758795)
(5, 0.023741690408357084)
(6, 0.023741690408357084)
(7, 0.023741690408357084)
(8, 0.023741690408357084)
(9, 0.023741690408357084)
(10, 0.023741690408357084)
(11, 0.07502374169040839)
(12, 0.023741690408357084)
(13, 0.023741690408357084)
(14, 0.023741690408357084)
(15, 0.023741690408357084)
(16, 0.023741690408357084)
(17, 0.03656220322886991)
(18, 0.023741690408357084)
(19, 0.023741690408357084)
(20, 0.023741690408357084)
(21, 0.023741690408357084)
(22, 0.023741690408357084)
(23, 0.03656220322886991)
(24, 0.023741690408357084)
(25, 0.06220322886989556)
(26, 0.023741690408357084)
Topic distributions for document 1411
(0, 0.02723311546840959)
(1, 0.02723311546840959)
(2, 0.02723311546840959)
(3, 0.02723311546840959)
(4, 0.02723311546840959)
(5, 0.02723311546840959)
(6, 0.02723311546840959)
(7, 0.02723311546840959)
(8, 0.08605

(9, 0.026838432635534086)
(10, 0.026838432635534086)
(11, 0.026838432635534086)
(12, 0.026838432635534086)
(13, 0.041331186258722494)
(14, 0.026838432635534086)
(15, 0.07031669350509931)
(16, 0.026838432635534086)
(17, 0.026838432635534086)
(18, 0.026838432635534086)
(19, 0.026838432635534086)
(20, 0.026838432635534086)
(21, 0.026838432635534086)
(22, 0.026838432635534086)
(23, 0.041331186258722494)
(24, 0.026838432635534086)
(25, 0.08480944712828771)
(26, 0.026838432635534086)
Topic distributions for document 1466
(0, 0.03138731952291275)
(1, 0.03138731952291275)
(2, 0.03138731952291275)
(3, 0.03138731952291275)
(4, 0.03138731952291275)
(5, 0.03138731952291275)
(6, 0.03138731952291275)
(7, 0.048336472065285636)
(8, 0.03138731952291275)
(9, 0.03138731952291275)
(10, 0.048336472065285636)
(11, 0.03138731952291275)
(12, 0.03138731952291275)
(13, 0.03138731952291275)
(14, 0.03138731952291275)
(15, 0.048336472065285636)
(16, 0.03138731952291275)
(17, 0.048336472065285636)
(18, 0.0313873195

Topic distributions for document 1527
(0, 0.025367833587011668)
(1, 0.025367833587011668)
(2, 0.025367833587011668)
(3, 0.025367833587011668)
(4, 0.20345002536783358)
(5, 0.025367833587011668)
(6, 0.025367833587011668)
(7, 0.025367833587011668)
(8, 0.025367833587011668)
(9, 0.025367833587011668)
(10, 0.025367833587011668)
(11, 0.10755961440892949)
(12, 0.025367833587011668)
(13, 0.025367833587011668)
(14, 0.025367833587011668)
(15, 0.052765093860984276)
(16, 0.025367833587011668)
(17, 0.025367833587011668)
(18, 0.025367833587011668)
(19, 0.025367833587011668)
(20, 0.025367833587011668)
(21, 0.025367833587011668)
(22, 0.025367833587011668)
(23, 0.052765093860984276)
(24, 0.025367833587011668)
(25, 0.025367833587011668)
(26, 0.025367833587011668)
Topic distributions for document 1528
(0, 0.03192848020434229)
(1, 0.03192848020434229)
(2, 0.06641123882503196)
(3, 0.03192848020434229)
(4, 0.049169859514687116)
(5, 0.03192848020434229)
(6, 0.03192848020434229)
(7, 0.03192848020434229)
(8, 0.

(8, 0.02608242044861763)
(9, 0.02608242044861763)
(10, 0.040166927490871154)
(11, 0.05425143453312468)
(12, 0.02608242044861763)
(13, 0.02608242044861763)
(14, 0.02608242044861763)
(15, 0.02608242044861763)
(16, 0.0683359415753782)
(17, 0.02608242044861763)
(18, 0.02608242044861763)
(19, 0.02608242044861763)
(20, 0.02608242044861763)
(21, 0.02608242044861763)
(22, 0.02608242044861763)
(23, 0.0683359415753782)
(24, 0.02608242044861763)
(25, 0.05425143453312468)
(26, 0.040166927490871154)
Topic distributions for document 1591
(0, 0.025720164609053502)
(1, 0.025720164609053502)
(2, 0.025720164609053502)
(3, 0.16460905349794241)
(4, 0.025720164609053502)
(5, 0.025720164609053502)
(6, 0.025720164609053502)
(7, 0.025720164609053502)
(8, 0.025720164609053502)
(9, 0.025720164609053502)
(10, 0.05349794238683129)
(11, 0.025720164609053502)
(12, 0.039609053497942394)
(13, 0.09516460905349795)
(14, 0.025720164609053502)
(15, 0.039609053497942394)
(16, 0.025720164609053502)
(17, 0.02572016460905350

Topic distributions for document 1651
(0, 0.026838432635534086)
(1, 0.026838432635534086)
(2, 0.026838432635534086)
(3, 0.026838432635534086)
(4, 0.07031669350509931)
(5, 0.026838432635534086)
(6, 0.026838432635534086)
(7, 0.026838432635534086)
(8, 0.026838432635534086)
(9, 0.18625872249060654)
(10, 0.026838432635534086)
(11, 0.0558239398819109)
(12, 0.026838432635534086)
(13, 0.026838432635534086)
(14, 0.026838432635534086)
(15, 0.026838432635534086)
(16, 0.026838432635534086)
(17, 0.026838432635534086)
(18, 0.026838432635534086)
(19, 0.026838432635534086)
(20, 0.041331186258722494)
(21, 0.041331186258722494)
(22, 0.026838432635534086)
(23, 0.041331186258722494)
(24, 0.026838432635534086)
(25, 0.026838432635534086)
(26, 0.026838432635534086)
Topic distributions for document 1652
(0, 0.0284900284900285)
(1, 0.0284900284900285)
(2, 0.05925925925925928)
(3, 0.0284900284900285)
(4, 0.0284900284900285)
(5, 0.0284900284900285)
(6, 0.04387464387464389)
(7, 0.04387464387464389)
(8, 0.02849002

(7, 0.04387464387464388)
(8, 0.028490028490028498)
(9, 0.028490028490028498)
(10, 0.059259259259259275)
(11, 0.028490028490028498)
(12, 0.028490028490028498)
(13, 0.04387464387464388)
(14, 0.028490028490028498)
(15, 0.04387464387464388)
(16, 0.059259259259259275)
(17, 0.028490028490028498)
(18, 0.028490028490028498)
(19, 0.04387464387464388)
(20, 0.028490028490028498)
(21, 0.04387464387464388)
(22, 0.028490028490028498)
(23, 0.028490028490028498)
(24, 0.028490028490028498)
(25, 0.028490028490028498)
(26, 0.04387464387464388)
Topic distributions for document 1716
(0, 0.02258355916892502)
(1, 0.2542908762420957)
(2, 0.02258355916892502)
(3, 0.046973803071364034)
(4, 0.02258355916892502)
(5, 0.02258355916892502)
(6, 0.02258355916892502)
(7, 0.02258355916892502)
(8, 0.02258355916892502)
(9, 0.02258355916892502)
(10, 0.02258355916892502)
(11, 0.03477868112014453)
(12, 0.03477868112014453)
(13, 0.03477868112014453)
(14, 0.02258355916892502)
(15, 0.03477868112014453)
(16, 0.02258355916892502)

Topic distributions for document 1782
(0, 0.026455026455026443)
(1, 0.04074074074074072)
(2, 0.026455026455026443)
(3, 0.026455026455026443)
(4, 0.055026455026455)
(5, 0.04074074074074072)
(6, 0.026455026455026443)
(7, 0.026455026455026443)
(8, 0.04074074074074072)
(9, 0.04074074074074072)
(10, 0.04074074074074072)
(11, 0.026455026455026443)
(12, 0.026455026455026443)
(13, 0.04074074074074072)
(14, 0.026455026455026443)
(15, 0.026455026455026443)
(16, 0.026455026455026443)
(17, 0.026455026455026443)
(18, 0.15502645502645496)
(19, 0.026455026455026443)
(20, 0.026455026455026443)
(21, 0.026455026455026443)
(22, 0.026455026455026443)
(23, 0.055026455026455)
(24, 0.04074074074074072)
(25, 0.026455026455026443)
(26, 0.026455026455026443)
Topic distributions for document 1783
(0, 0.023441162681669018)
(1, 0.048757618377871556)
(2, 0.09939052977027664)
(3, 0.036099390529770285)
(4, 0.023441162681669018)
(5, 0.036099390529770285)
(6, 0.023441162681669018)
(7, 0.023441162681669018)
(8, 0.023441

(6, 0.023441162681669018)
(7, 0.023441162681669018)
(8, 0.03609939052977029)
(9, 0.03609939052977029)
(10, 0.03609939052977029)
(11, 0.023441162681669018)
(12, 0.023441162681669018)
(13, 0.023441162681669018)
(14, 0.023441162681669018)
(15, 0.023441162681669018)
(16, 0.023441162681669018)
(17, 0.03609939052977029)
(18, 0.023441162681669018)
(19, 0.061415846225972834)
(20, 0.03609939052977029)
(21, 0.03609939052977029)
(22, 0.03609939052977029)
(23, 0.03609939052977029)
(24, 0.023441162681669018)
(25, 0.023441162681669018)
(26, 0.023441162681669018)
Topic distributions for document 1841
(0, 0.02893518518518518)
(1, 0.02893518518518518)
(2, 0.02893518518518518)
(3, 0.02893518518518518)
(4, 0.04456018518518518)
(5, 0.02893518518518518)
(6, 0.02893518518518518)
(7, 0.02893518518518518)
(8, 0.02893518518518518)
(9, 0.02893518518518518)
(10, 0.02893518518518518)
(11, 0.02893518518518518)
(12, 0.02893518518518518)
(13, 0.04456018518518518)
(14, 0.02893518518518518)
(15, 0.21643518518518512)
(

(6, 0.025367833587011668)
(7, 0.025367833587011668)
(8, 0.2993404363267377)
(9, 0.025367833587011668)
(10, 0.025367833587011668)
(11, 0.025367833587011668)
(12, 0.025367833587011668)
(13, 0.025367833587011668)
(14, 0.025367833587011668)
(15, 0.052765093860984276)
(16, 0.025367833587011668)
(17, 0.025367833587011668)
(18, 0.025367833587011668)
(19, 0.025367833587011668)
(20, 0.025367833587011668)
(21, 0.025367833587011668)
(22, 0.025367833587011668)
(23, 0.025367833587011668)
(24, 0.025367833587011668)
(25, 0.039066463723997975)
(26, 0.025367833587011668)
Topic distributions for document 1904
(0, 0.039609053497942394)
(1, 0.025720164609053502)
(2, 0.039609053497942394)
(3, 0.025720164609053502)
(4, 0.039609053497942394)
(5, 0.025720164609053502)
(6, 0.06738683127572018)
(7, 0.025720164609053502)
(8, 0.025720164609053502)
(9, 0.025720164609053502)
(10, 0.025720164609053502)
(11, 0.039609053497942394)
(12, 0.025720164609053502)
(13, 0.025720164609053502)
(14, 0.12294238683127573)
(15, 0.0

(5, 0.02608242044861763)
(6, 0.040166927490871154)
(7, 0.040166927490871154)
(8, 0.02608242044861763)
(9, 0.02608242044861763)
(10, 0.02608242044861763)
(11, 0.02608242044861763)
(12, 0.040166927490871154)
(13, 0.040166927490871154)
(14, 0.02608242044861763)
(15, 0.02608242044861763)
(16, 0.02608242044861763)
(17, 0.22326551904016692)
(18, 0.02608242044861763)
(19, 0.040166927490871154)
(20, 0.02608242044861763)
(21, 0.02608242044861763)
(22, 0.02608242044861763)
(23, 0.02608242044861763)
(24, 0.02608242044861763)
(25, 0.02608242044861763)
(26, 0.040166927490871154)
Topic distributions for document 1966
(0, 0.025720164609053502)
(1, 0.025720164609053502)
(2, 0.025720164609053502)
(3, 0.05349794238683129)
(4, 0.025720164609053502)
(5, 0.09516460905349795)
(6, 0.025720164609053502)
(7, 0.025720164609053502)
(8, 0.025720164609053502)
(9, 0.039609053497942394)
(10, 0.05349794238683129)
(11, 0.05349794238683129)
(12, 0.039609053497942394)
(13, 0.025720164609053502)
(14, 0.025720164609053502

(12, 0.026455026455026447)
(13, 0.16931216931216928)
(14, 0.055026455026455014)
(15, 0.055026455026455014)
(16, 0.026455026455026447)
(17, 0.026455026455026447)
(18, 0.026455026455026447)
(19, 0.026455026455026447)
(20, 0.0693121693121693)
(21, 0.026455026455026447)
(22, 0.026455026455026447)
(23, 0.026455026455026447)
(24, 0.026455026455026447)
(25, 0.04074074074074074)
(26, 0.026455026455026447)
Topic distributions for document 2028
(0, 0.024691358024691357)
(1, 0.024691358024691357)
(2, 0.024691358024691357)
(3, 0.024691358024691357)
(4, 0.23802469135802468)
(5, 0.024691358024691357)
(6, 0.024691358024691357)
(7, 0.024691358024691357)
(8, 0.024691358024691357)
(9, 0.024691358024691357)
(10, 0.024691358024691357)
(11, 0.03802469135802469)
(12, 0.024691358024691357)
(13, 0.024691358024691357)
(14, 0.024691358024691357)
(15, 0.06469135802469136)
(16, 0.024691358024691357)
(17, 0.024691358024691357)
(18, 0.024691358024691357)
(19, 0.024691358024691357)
(20, 0.024691358024691357)
(21, 0.

(4, 0.025720164609053502)
(5, 0.039609053497942394)
(6, 0.025720164609053502)
(7, 0.025720164609053502)
(8, 0.025720164609053502)
(9, 0.025720164609053502)
(10, 0.025720164609053502)
(11, 0.05349794238683129)
(12, 0.025720164609053502)
(13, 0.039609053497942394)
(14, 0.025720164609053502)
(15, 0.05349794238683129)
(16, 0.039609053497942394)
(17, 0.025720164609053502)
(18, 0.039609053497942394)
(19, 0.025720164609053502)
(20, 0.05349794238683129)
(21, 0.025720164609053502)
(22, 0.039609053497942394)
(23, 0.17849794238683128)
(24, 0.025720164609053502)
(25, 0.025720164609053502)
(26, 0.025720164609053502)
Topic distributions for document 2091
(0, 0.028935185185185164)
(1, 0.028935185185185164)
(2, 0.028935185185185164)
(3, 0.028935185185185164)
(4, 0.028935185185185164)
(5, 0.028935185185185164)
(6, 0.10706018518518512)
(7, 0.028935185185185164)
(8, 0.028935185185185164)
(9, 0.1383101851851851)
(10, 0.028935185185185164)
(11, 0.028935185185185164)
(12, 0.028935185185185164)
(13, 0.028935

Topic distributions for document 2154
(0, 0.02436647173489278)
(1, 0.1033138401559454)
(2, 0.02436647173489278)
(3, 0.02436647173489278)
(4, 0.02436647173489278)
(5, 0.02436647173489278)
(6, 0.03752436647173489)
(7, 0.02436647173489278)
(8, 0.02436647173489278)
(9, 0.02436647173489278)
(10, 0.03752436647173489)
(11, 0.02436647173489278)
(12, 0.02436647173489278)
(13, 0.02436647173489278)
(14, 0.05068226120857699)
(15, 0.02436647173489278)
(16, 0.02436647173489278)
(17, 0.1954191033138401)
(18, 0.02436647173489278)
(19, 0.02436647173489278)
(20, 0.03752436647173489)
(21, 0.03752436647173489)
(22, 0.02436647173489278)
(23, 0.02436647173489278)
(24, 0.03752436647173489)
(25, 0.02436647173489278)
(26, 0.02436647173489278)
Topic distributions for document 2155
(0, 0.05003248862897986)
(1, 0.03248862897985705)
(2, 0.03248862897985705)
(3, 0.03248862897985705)
(4, 0.03248862897985705)
(5, 0.03248862897985705)
(6, 0.03248862897985705)
(7, 0.03248862897985705)
(8, 0.03248862897985705)
(9, 0.032

(3, 0.02893518518518517)
(4, 0.02893518518518517)
(5, 0.02893518518518517)
(6, 0.02893518518518517)
(7, 0.02893518518518517)
(8, 0.06018518518518516)
(9, 0.02893518518518517)
(10, 0.02893518518518517)
(11, 0.02893518518518517)
(12, 0.04456018518518517)
(13, 0.02893518518518517)
(14, 0.04456018518518517)
(15, 0.04456018518518517)
(16, 0.02893518518518517)
(17, 0.02893518518518517)
(18, 0.02893518518518517)
(19, 0.04456018518518517)
(20, 0.04456018518518517)
(21, 0.06018518518518516)
(22, 0.04456018518518517)
(23, 0.02893518518518517)
(24, 0.04456018518518517)
(25, 0.02893518518518517)
(26, 0.04456018518518517)
Topic distributions for document 2216
(0, 0.03192848020434229)
(1, 0.03192848020434229)
(2, 0.03192848020434229)
(3, 0.03192848020434229)
(4, 0.03192848020434229)
(5, 0.049169859514687116)
(6, 0.03192848020434229)
(7, 0.03192848020434229)
(8, 0.03192848020434229)
(9, 0.03192848020434229)
(10, 0.03192848020434229)
(11, 0.03192848020434229)
(12, 0.03192848020434229)
(13, 0.049169859

(9, 0.02436647173489278)
(10, 0.03752436647173489)
(11, 0.02436647173489278)
(12, 0.02436647173489278)
(13, 0.0769980506822612)
(14, 0.06384015594541909)
(15, 0.02436647173489278)
(16, 0.02436647173489278)
(17, 0.02436647173489278)
(18, 0.02436647173489278)
(19, 0.02436647173489278)
(20, 0.03752436647173489)
(21, 0.06384015594541909)
(22, 0.06384015594541909)
(23, 0.02436647173489278)
(24, 0.03752436647173489)
(25, 0.02436647173489278)
(26, 0.03752436647173489)
Topic distributions for document 2279
(0, 0.11896745230078568)
(1, 0.043209876543209895)
(2, 0.028058361391694736)
(3, 0.028058361391694736)
(4, 0.043209876543209895)
(5, 0.028058361391694736)
(6, 0.028058361391694736)
(7, 0.028058361391694736)
(8, 0.028058361391694736)
(9, 0.028058361391694736)
(10, 0.043209876543209895)
(11, 0.043209876543209895)
(12, 0.043209876543209895)
(13, 0.028058361391694736)
(14, 0.028058361391694736)
(15, 0.028058361391694736)
(16, 0.058361391694725054)
(17, 0.043209876543209895)
(18, 0.04320987654320

(2, 0.026838432635534086)
(3, 0.2877079978529254)
(4, 0.026838432635534086)
(5, 0.026838432635534086)
(6, 0.026838432635534086)
(7, 0.026838432635534086)
(8, 0.026838432635534086)
(9, 0.026838432635534086)
(10, 0.026838432635534086)
(11, 0.026838432635534086)
(12, 0.026838432635534086)
(13, 0.026838432635534086)
(14, 0.026838432635534086)
(15, 0.026838432635534086)
(16, 0.026838432635534086)
(17, 0.026838432635534086)
(18, 0.026838432635534086)
(19, 0.026838432635534086)
(20, 0.026838432635534086)
(21, 0.026838432635534086)
(22, 0.026838432635534086)
(23, 0.026838432635534086)
(24, 0.026838432635534086)
(25, 0.041331186258722494)
(26, 0.026838432635534086)
Topic distributions for document 2341
(0, 0.029868578255675033)
(1, 0.07825567502986859)
(2, 0.029868578255675033)
(3, 0.029868578255675033)
(4, 0.062126642771804075)
(5, 0.029868578255675033)
(6, 0.029868578255675033)
(7, 0.029868578255675033)
(8, 0.04599761051373956)
(9, 0.04599761051373956)
(10, 0.029868578255675033)
(11, 0.029868

Topic distributions for document 2402
(0, 0.025367833587011668)
(1, 0.025367833587011668)
(2, 0.025367833587011668)
(3, 0.2719431760527651)
(4, 0.025367833587011668)
(5, 0.025367833587011668)
(6, 0.025367833587011668)
(7, 0.025367833587011668)
(8, 0.025367833587011668)
(9, 0.025367833587011668)
(10, 0.025367833587011668)
(11, 0.025367833587011668)
(12, 0.025367833587011668)
(13, 0.08016235413495688)
(14, 0.025367833587011668)
(15, 0.039066463723997975)
(16, 0.025367833587011668)
(17, 0.025367833587011668)
(18, 0.025367833587011668)
(19, 0.025367833587011668)
(20, 0.025367833587011668)
(21, 0.025367833587011668)
(22, 0.025367833587011668)
(23, 0.025367833587011668)
(24, 0.025367833587011668)
(25, 0.025367833587011668)
(26, 0.025367833587011668)
Topic distributions for document 2403
(0, 0.03192848020434229)
(1, 0.03192848020434229)
(2, 0.03192848020434229)
(3, 0.03192848020434229)
(4, 0.03192848020434229)
(5, 0.03192848020434229)
(6, 0.03192848020434229)
(7, 0.049169859514687116)
(8, 0.0

(1, 0.027639579878385855)
(2, 0.027639579878385855)
(3, 0.027639579878385855)
(4, 0.027639579878385855)
(5, 0.027639579878385855)
(6, 0.027639579878385855)
(7, 0.042564953012714214)
(8, 0.042564953012714214)
(9, 0.17689331122166946)
(10, 0.027639579878385855)
(11, 0.027639579878385855)
(12, 0.027639579878385855)
(13, 0.027639579878385855)
(14, 0.027639579878385855)
(15, 0.027639579878385855)
(16, 0.027639579878385855)
(17, 0.027639579878385855)
(18, 0.027639579878385855)
(19, 0.027639579878385855)
(20, 0.0873410724156993)
(21, 0.042564953012714214)
(22, 0.027639579878385855)
(23, 0.027639579878385855)
(24, 0.027639579878385855)
(25, 0.027639579878385855)
(26, 0.027639579878385855)
Topic distributions for document 2466
(0, 0.0257201646090535)
(1, 0.03960905349794239)
(2, 0.0257201646090535)
(3, 0.0257201646090535)
(4, 0.0257201646090535)
(5, 0.0257201646090535)
(6, 0.0257201646090535)
(7, 0.0257201646090535)
(8, 0.05349794238683128)
(9, 0.0257201646090535)
(10, 0.0257201646090535)
(11, 

Topic distributions for document 2519
(0, 0.0257201646090535)
(1, 0.0257201646090535)
(2, 0.0257201646090535)
(3, 0.0257201646090535)
(4, 0.0257201646090535)
(5, 0.0257201646090535)
(6, 0.0257201646090535)
(7, 0.03960905349794239)
(8, 0.0257201646090535)
(9, 0.03960905349794239)
(10, 0.0257201646090535)
(11, 0.0257201646090535)
(12, 0.0257201646090535)
(13, 0.0257201646090535)
(14, 0.03960905349794239)
(15, 0.05349794238683128)
(16, 0.0257201646090535)
(17, 0.03960905349794239)
(18, 0.17849794238683125)
(19, 0.0257201646090535)
(20, 0.0257201646090535)
(21, 0.0257201646090535)
(22, 0.0257201646090535)
(23, 0.03960905349794239)
(24, 0.0257201646090535)
(25, 0.06738683127572016)
(26, 0.03960905349794239)
Topic distributions for document 2520
(0, 0.026455026455026447)
(1, 0.026455026455026447)
(2, 0.026455026455026447)
(3, 0.026455026455026447)
(4, 0.026455026455026447)
(5, 0.026455026455026447)
(6, 0.04074074074074074)
(7, 0.026455026455026447)
(8, 0.04074074074074074)
(9, 0.026455026455

(11, 0.024366471734892776)
(12, 0.024366471734892776)
(13, 0.024366471734892776)
(14, 0.024366471734892776)
(15, 0.024366471734892776)
(16, 0.024366471734892776)
(17, 0.024366471734892776)
(18, 0.024366471734892776)
(19, 0.024366471734892776)
(20, 0.024366471734892776)
(21, 0.024366471734892776)
(22, 0.024366471734892776)
(23, 0.024366471734892776)
(24, 0.024366471734892776)
(25, 0.09015594541910328)
(26, 0.037524366471734884)
Topic distributions for document 2584
(0, 0.02986857825567504)
(1, 0.02986857825567504)
(2, 0.02986857825567504)
(3, 0.02986857825567504)
(4, 0.02986857825567504)
(5, 0.02986857825567504)
(6, 0.02986857825567504)
(7, 0.02986857825567504)
(8, 0.02986857825567504)
(9, 0.02986857825567504)
(10, 0.02986857825567504)
(11, 0.02986857825567504)
(12, 0.02986857825567504)
(13, 0.02986857825567504)
(14, 0.02986857825567504)
(15, 0.045997610513739566)
(16, 0.02986857825567504)
(17, 0.02986857825567504)
(18, 0.02986857825567504)
(19, 0.045997610513739566)
(20, 0.029868578255

(15, 0.02469135802469136)
(16, 0.02469135802469136)
(17, 0.051358024691358035)
(18, 0.06469135802469138)
(19, 0.02469135802469136)
(20, 0.02469135802469136)
(21, 0.02469135802469136)
(22, 0.02469135802469136)
(23, 0.0380246913580247)
(24, 0.02469135802469136)
(25, 0.07802469135802471)
(26, 0.02469135802469136)
Topic distributions for document 2644
(0, 0.02469135802469136)
(1, 0.02469135802469136)
(2, 0.02469135802469136)
(3, 0.02469135802469136)
(4, 0.02469135802469136)
(5, 0.21135802469135803)
(6, 0.02469135802469136)
(7, 0.02469135802469136)
(8, 0.0380246913580247)
(9, 0.07802469135802471)
(10, 0.02469135802469136)
(11, 0.02469135802469136)
(12, 0.06469135802469138)
(13, 0.0380246913580247)
(14, 0.02469135802469136)
(15, 0.0380246913580247)
(16, 0.02469135802469136)
(17, 0.02469135802469136)
(18, 0.02469135802469136)
(19, 0.02469135802469136)
(20, 0.02469135802469136)
(21, 0.02469135802469136)
(22, 0.02469135802469136)
(23, 0.0380246913580247)
(24, 0.02469135802469136)
(25, 0.0246913

Topic distributions for document 2709
(0, 0.02608242044861763)
(1, 0.02608242044861763)
(2, 0.02608242044861763)
(3, 0.02608242044861763)
(4, 0.02608242044861763)
(5, 0.02608242044861763)
(6, 0.02608242044861763)
(7, 0.02608242044861763)
(8, 0.05425143453312468)
(9, 0.040166927490871154)
(10, 0.02608242044861763)
(11, 0.02608242044861763)
(12, 0.02608242044861763)
(13, 0.11058946270213876)
(14, 0.12467396974439227)
(15, 0.08242044861763172)
(16, 0.02608242044861763)
(17, 0.02608242044861763)
(18, 0.02608242044861763)
(19, 0.02608242044861763)
(20, 0.02608242044861763)
(21, 0.02608242044861763)
(22, 0.02608242044861763)
(23, 0.040166927490871154)
(24, 0.02608242044861763)
(25, 0.02608242044861763)
(26, 0.02608242044861763)
Topic distributions for document 2710
(0, 0.029394473838918293)
(1, 0.029394473838918293)
(2, 0.029394473838918293)
(3, 0.04526748971193417)
(4, 0.04526748971193417)
(5, 0.06114050558495005)
(6, 0.029394473838918293)
(7, 0.04526748971193417)
(8, 0.029394473838918293)


(14, 0.025367833587011668)
(15, 0.025367833587011668)
(16, 0.025367833587011668)
(17, 0.025367833587011668)
(18, 0.025367833587011668)
(19, 0.025367833587011668)
(20, 0.025367833587011668)
(21, 0.025367833587011668)
(22, 0.025367833587011668)
(23, 0.025367833587011668)
(24, 0.025367833587011668)
(25, 0.052765093860984276)
(26, 0.052765093860984276)
Topic distributions for document 2769
(0, 0.046973803071364055)
(1, 0.0835591689250226)
(2, 0.046973803071364055)
(3, 0.022583559168925026)
(4, 0.022583559168925026)
(5, 0.022583559168925026)
(6, 0.034778681120144546)
(7, 0.022583559168925026)
(8, 0.046973803071364055)
(9, 0.022583559168925026)
(10, 0.022583559168925026)
(11, 0.022583559168925026)
(12, 0.022583559168925026)
(13, 0.022583559168925026)
(14, 0.09575429087624211)
(15, 0.046973803071364055)
(16, 0.034778681120144546)
(17, 0.022583559168925026)
(18, 0.022583559168925026)
(19, 0.034778681120144546)
(20, 0.046973803071364055)
(21, 0.09575429087624211)
(22, 0.022583559168925026)
(23,

(21, 0.025367833587011668)
(22, 0.025367833587011668)
(23, 0.039066463723997975)
(24, 0.025367833587011668)
(25, 0.025367833587011668)
(26, 0.025367833587011668)
Topic distributions for document 2827
(0, 0.036099390529770285)
(1, 0.023441162681669014)
(2, 0.023441162681669014)
(3, 0.023441162681669014)
(4, 0.023441162681669014)
(5, 0.023441162681669014)
(6, 0.036099390529770285)
(7, 0.023441162681669014)
(8, 0.023441162681669014)
(9, 0.023441162681669014)
(10, 0.023441162681669014)
(11, 0.023441162681669014)
(12, 0.023441162681669014)
(13, 0.023441162681669014)
(14, 0.036099390529770285)
(15, 0.023441162681669014)
(16, 0.036099390529770285)
(17, 0.023441162681669014)
(18, 0.27660571964369435)
(19, 0.023441162681669014)
(20, 0.04875761837787155)
(21, 0.023441162681669014)
(22, 0.023441162681669014)
(23, 0.023441162681669014)
(24, 0.036099390529770285)
(25, 0.036099390529770285)
(26, 0.036099390529770285)
Topic distributions for document 2828
(0, 0.02986857825567504)
(1, 0.02986857825567

(2, 0.02763957987838585)
(3, 0.02763957987838585)
(4, 0.02763957987838585)
(5, 0.042564953012714214)
(6, 0.02763957987838585)
(7, 0.042564953012714214)
(8, 0.02763957987838585)
(9, 0.02763957987838585)
(10, 0.02763957987838585)
(11, 0.02763957987838585)
(12, 0.02763957987838585)
(13, 0.02763957987838585)
(14, 0.02763957987838585)
(15, 0.02763957987838585)
(16, 0.02763957987838585)
(17, 0.1918186843559978)
(18, 0.02763957987838585)
(19, 0.02763957987838585)
(20, 0.042564953012714214)
(21, 0.02763957987838585)
(22, 0.02763957987838585)
(23, 0.042564953012714214)
(24, 0.02763957987838585)
(25, 0.02763957987838585)
(26, 0.042564953012714214)
Topic distributions for document 2885
(0, 0.021786492374727674)
(1, 0.021786492374727674)
(2, 0.021786492374727674)
(3, 0.021786492374727674)
(4, 0.021786492374727674)
(5, 0.03355119825708062)
(6, 0.021786492374727674)
(7, 0.021786492374727674)
(8, 0.021786492374727674)
(9, 0.021786492374727674)
(10, 0.021786492374727674)
(11, 0.16296296296296298)
(12,

(1, 0.03802469135802469)
(2, 0.03802469135802469)
(3, 0.03802469135802469)
(4, 0.024691358024691357)
(5, 0.024691358024691357)
(6, 0.03802469135802469)
(7, 0.05135802469135803)
(8, 0.024691358024691357)
(9, 0.024691358024691357)
(10, 0.03802469135802469)
(11, 0.03802469135802469)
(12, 0.024691358024691357)
(13, 0.024691358024691357)
(14, 0.024691358024691357)
(15, 0.024691358024691357)
(16, 0.06469135802469136)
(17, 0.05135802469135803)
(18, 0.024691358024691357)
(19, 0.024691358024691357)
(20, 0.05135802469135803)
(21, 0.024691358024691357)
(22, 0.1580246913580247)
(23, 0.024691358024691357)
(24, 0.024691358024691357)
(25, 0.024691358024691357)
(26, 0.024691358024691357)
Topic distributions for document 2948
(0, 0.03494060097833682)
(1, 0.03494060097833682)
(2, 0.03494060097833682)
(3, 0.03494060097833682)
(4, 0.03494060097833682)
(5, 0.03494060097833682)
(6, 0.03494060097833682)
(7, 0.03494060097833682)
(8, 0.03494060097833682)
(9, 0.03494060097833682)
(10, 0.03494060097833682)
(11, 

(21, 0.04193899782135077)
(22, 0.027233115468409595)
(23, 0.027233115468409595)
(24, 0.05664488017429196)
(25, 0.027233115468409595)
(26, 0.027233115468409595)
Topic distributions for document 3006
(0, 0.03248862897985705)
(1, 0.03248862897985705)
(2, 0.10266406757634829)
(3, 0.03248862897985705)
(4, 0.03248862897985705)
(5, 0.03248862897985705)
(6, 0.03248862897985705)
(7, 0.03248862897985705)
(8, 0.03248862897985705)
(9, 0.03248862897985705)
(10, 0.03248862897985705)
(11, 0.03248862897985705)
(12, 0.03248862897985705)
(13, 0.03248862897985705)
(14, 0.03248862897985705)
(15, 0.03248862897985705)
(16, 0.03248862897985705)
(17, 0.03248862897985705)
(18, 0.03248862897985705)
(19, 0.05003248862897986)
(20, 0.03248862897985705)
(21, 0.05003248862897986)
(22, 0.03248862897985705)
(23, 0.03248862897985705)
(24, 0.05003248862897986)
(25, 0.03248862897985705)
(26, 0.03248862897985705)
Topic distributions for document 3007
(0, 0.034778681120144525)
(1, 0.059168925022583536)
(2, 0.02258355916892

(0, 0.027639579878385855)
(1, 0.027639579878385855)
(2, 0.13211719181868437)
(3, 0.027639579878385855)
(4, 0.027639579878385855)
(5, 0.027639579878385855)
(6, 0.027639579878385855)
(7, 0.027639579878385855)
(8, 0.05749032614704258)
(9, 0.027639579878385855)
(10, 0.042564953012714214)
(11, 0.027639579878385855)
(12, 0.027639579878385855)
(13, 0.07241569928137094)
(14, 0.027639579878385855)
(15, 0.027639579878385855)
(16, 0.027639579878385855)
(17, 0.042564953012714214)
(18, 0.027639579878385855)
(19, 0.027639579878385855)
(20, 0.027639579878385855)
(21, 0.027639579878385855)
(22, 0.05749032614704258)
(23, 0.027639579878385855)
(24, 0.027639579878385855)
(25, 0.042564953012714214)
(26, 0.027639579878385855)
Topic distributions for document 3073
(0, 0.028058361391694736)
(1, 0.028058361391694736)
(2, 0.028058361391694736)
(3, 0.17957351290684628)
(4, 0.028058361391694736)
(5, 0.028058361391694736)
(6, 0.028058361391694736)
(7, 0.0735129068462402)
(8, 0.028058361391694736)
(9, 0.0280583613

(0, 0.02608242044861763)
(1, 0.02608242044861763)
(2, 0.02608242044861763)
(3, 0.02608242044861763)
(4, 0.05425143453312468)
(5, 0.02608242044861763)
(6, 0.02608242044861763)
(7, 0.02608242044861763)
(8, 0.1950965049556599)
(9, 0.02608242044861763)
(10, 0.02608242044861763)
(11, 0.02608242044861763)
(12, 0.02608242044861763)
(13, 0.02608242044861763)
(14, 0.02608242044861763)
(15, 0.0683359415753782)
(16, 0.02608242044861763)
(17, 0.040166927490871154)
(18, 0.02608242044861763)
(19, 0.02608242044861763)
(20, 0.02608242044861763)
(21, 0.02608242044861763)
(22, 0.02608242044861763)
(23, 0.040166927490871154)
(24, 0.02608242044861763)
(25, 0.05425143453312468)
(26, 0.02608242044861763)
Topic distributions for document 3132
(0, 0.026838432635534086)
(1, 0.026838432635534086)
(2, 0.026838432635534086)
(3, 0.026838432635534086)
(4, 0.026838432635534086)
(5, 0.026838432635534086)
(6, 0.041331186258722494)
(7, 0.026838432635534086)
(8, 0.026838432635534086)
(9, 0.026838432635534086)
(10, 0.026

Topic distributions for document 3197
(0, 0.02683843263553408)
(1, 0.0703166935050993)
(2, 0.02683843263553408)
(3, 0.02683843263553408)
(4, 0.04133118625872249)
(5, 0.02683843263553408)
(6, 0.02683843263553408)
(7, 0.04133118625872249)
(8, 0.02683843263553408)
(9, 0.02683843263553408)
(10, 0.02683843263553408)
(11, 0.02683843263553408)
(12, 0.02683843263553408)
(13, 0.09930220075147608)
(14, 0.02683843263553408)
(15, 0.0848094471282877)
(16, 0.02683843263553408)
(17, 0.05582393988191089)
(18, 0.02683843263553408)
(19, 0.02683843263553408)
(20, 0.04133118625872249)
(21, 0.02683843263553408)
(22, 0.02683843263553408)
(23, 0.02683843263553408)
(24, 0.02683843263553408)
(25, 0.05582393988191089)
(26, 0.02683843263553408)
Topic distributions for document 3198
(0, 0.02502502502502503)
(1, 0.02502502502502503)
(2, 0.02502502502502503)
(3, 0.1601601601601602)
(4, 0.02502502502502503)
(5, 0.0790790790790791)
(6, 0.02502502502502503)
(7, 0.02502502502502503)
(8, 0.02502502502502503)
(9, 0.02502

Topic distributions for document 3255
(0, 0.023741690408357084)
(1, 0.023741690408357084)
(2, 0.049382716049382734)
(3, 0.023741690408357084)
(4, 0.023741690408357084)
(5, 0.03656220322886991)
(6, 0.03656220322886991)
(7, 0.03656220322886991)
(8, 0.023741690408357084)
(9, 0.023741690408357084)
(10, 0.023741690408357084)
(11, 0.023741690408357084)
(12, 0.023741690408357084)
(13, 0.023741690408357084)
(14, 0.023741690408357084)
(15, 0.03656220322886991)
(16, 0.03656220322886991)
(17, 0.06220322886989556)
(18, 0.023741690408357084)
(19, 0.06220322886989556)
(20, 0.03656220322886991)
(21, 0.11348528015194685)
(22, 0.023741690408357084)
(23, 0.023741690408357084)
(24, 0.0878442545109212)
(25, 0.049382716049382734)
(26, 0.023741690408357084)
Topic distributions for document 3256
(0, 0.026838432635534086)
(1, 0.026838432635534086)
(2, 0.0558239398819109)
(3, 0.026838432635534086)
(4, 0.041331186258722494)
(5, 0.026838432635534086)
(6, 0.026838432635534086)
(7, 0.026838432635534086)
(8, 0.0413

(26, 0.028058361391694733)
Topic distributions for document 3322
(0, 0.04599761051373956)
(1, 0.029868578255675036)
(2, 0.029868578255675036)
(3, 0.04599761051373956)
(4, 0.029868578255675036)
(5, 0.029868578255675036)
(6, 0.06212664277180408)
(7, 0.04599761051373956)
(8, 0.06212664277180408)
(9, 0.029868578255675036)
(10, 0.029868578255675036)
(11, 0.029868578255675036)
(12, 0.029868578255675036)
(13, 0.029868578255675036)
(14, 0.029868578255675036)
(15, 0.029868578255675036)
(16, 0.04599761051373956)
(17, 0.029868578255675036)
(18, 0.029868578255675036)
(19, 0.029868578255675036)
(20, 0.029868578255675036)
(21, 0.04599761051373956)
(22, 0.029868578255675036)
(23, 0.04599761051373956)
(24, 0.029868578255675036)
(25, 0.04599761051373956)
(26, 0.04599761051373956)
Topic distributions for document 3323
(0, 0.02723311546840959)
(1, 0.07135076252723313)
(2, 0.02723311546840959)
(3, 0.02723311546840959)
(4, 0.02723311546840959)
(5, 0.04193899782135077)
(6, 0.02723311546840959)
(7, 0.0419389

(25, 0.026455026455026454)
(26, 0.026455026455026454)
Topic distributions for document 3382
(0, 0.024691358024691357)
(1, 0.03802469135802469)
(2, 0.024691358024691357)
(3, 0.024691358024691357)
(4, 0.024691358024691357)
(5, 0.05135802469135803)
(6, 0.024691358024691357)
(7, 0.024691358024691357)
(8, 0.03802469135802469)
(9, 0.024691358024691357)
(10, 0.024691358024691357)
(11, 0.024691358024691357)
(12, 0.024691358024691357)
(13, 0.024691358024691357)
(14, 0.024691358024691357)
(15, 0.024691358024691357)
(16, 0.05135802469135803)
(17, 0.03802469135802469)
(18, 0.05135802469135803)
(19, 0.0780246913580247)
(20, 0.03802469135802469)
(21, 0.0780246913580247)
(22, 0.0780246913580247)
(23, 0.024691358024691357)
(24, 0.03802469135802469)
(25, 0.024691358024691357)
(26, 0.05135802469135803)
Topic distributions for document 3383
(0, 0.026455026455026436)
(1, 0.026455026455026436)
(2, 0.026455026455026436)
(3, 0.24074074074074056)
(4, 0.026455026455026436)
(5, 0.026455026455026436)
(6, 0.02645

(24, 0.06528562460765852)
(25, 0.03138731952291275)
(26, 0.03138731952291275)
Topic distributions for document 3435
(0, 0.026455026455026447)
(1, 0.026455026455026447)
(2, 0.04074074074074074)
(3, 0.026455026455026447)
(4, 0.026455026455026447)
(5, 0.026455026455026447)
(6, 0.026455026455026447)
(7, 0.04074074074074074)
(8, 0.055026455026455014)
(9, 0.026455026455026447)
(10, 0.026455026455026447)
(11, 0.026455026455026447)
(12, 0.026455026455026447)
(13, 0.055026455026455014)
(14, 0.026455026455026447)
(15, 0.026455026455026447)
(16, 0.026455026455026447)
(17, 0.04074074074074074)
(18, 0.026455026455026447)
(19, 0.026455026455026447)
(20, 0.04074074074074074)
(21, 0.055026455026455014)
(22, 0.15502645502645498)
(23, 0.026455026455026447)
(24, 0.04074074074074074)
(25, 0.026455026455026447)
(26, 0.026455026455026447)
Topic distributions for document 3436
(0, 0.023741690408357084)
(1, 0.036562203228869904)
(2, 0.023741690408357084)
(3, 0.023741690408357084)
(4, 0.023741690408357084)
(5,

(12, 0.027639579878385858)
(13, 0.027639579878385858)
(14, 0.027639579878385858)
(15, 0.027639579878385858)
(16, 0.027639579878385858)
(17, 0.027639579878385858)
(18, 0.027639579878385858)
(19, 0.027639579878385858)
(20, 0.05749032614704259)
(21, 0.1619679380873411)
(22, 0.04256495301271422)
(23, 0.027639579878385858)
(24, 0.027639579878385858)
(25, 0.027639579878385858)
(26, 0.027639579878385858)
Topic distributions for document 3501
(0, 0.022311468094600623)
(1, 0.022311468094600623)
(2, 0.022311468094600623)
(3, 0.022311468094600623)
(4, 0.022311468094600623)
(5, 0.022311468094600623)
(6, 0.022311468094600623)
(7, 0.022311468094600623)
(8, 0.022311468094600623)
(9, 0.022311468094600623)
(10, 0.03435966086568496)
(11, 0.03435966086568496)
(12, 0.022311468094600623)
(13, 0.03435966086568496)
(14, 0.022311468094600623)
(15, 0.022311468094600623)
(16, 0.022311468094600623)
(17, 0.022311468094600623)
(18, 0.3596608656849621)
(19, 0.022311468094600623)
(20, 0.0464078536367693)
(21, 0.0223

Topic distributions for document 3561
(0, 0.03192848020434229)
(1, 0.049169859514687116)
(2, 0.049169859514687116)
(3, 0.03192848020434229)
(4, 0.03192848020434229)
(5, 0.03192848020434229)
(6, 0.06641123882503196)
(7, 0.049169859514687116)
(8, 0.03192848020434229)
(9, 0.03192848020434229)
(10, 0.03192848020434229)
(11, 0.03192848020434229)
(12, 0.03192848020434229)
(13, 0.03192848020434229)
(14, 0.03192848020434229)
(15, 0.03192848020434229)
(16, 0.03192848020434229)
(17, 0.03192848020434229)
(18, 0.03192848020434229)
(19, 0.03192848020434229)
(20, 0.03192848020434229)
(21, 0.03192848020434229)
(22, 0.03192848020434229)
(23, 0.049169859514687116)
(24, 0.03192848020434229)
(25, 0.06641123882503196)
(26, 0.03192848020434229)
Topic distributions for document 3562
(0, 0.025367833587011668)
(1, 0.08016235413495688)
(2, 0.025367833587011668)
(3, 0.025367833587011668)
(4, 0.025367833587011668)
(5, 0.039066463723997975)
(6, 0.039066463723997975)
(7, 0.025367833587011668)
(8, 0.039066463723997

(26, 0.02723311546840959)
Topic distributions for document 3623
(0, 0.02502502502502503)
(1, 0.02502502502502503)
(2, 0.02502502502502503)
(3, 0.02502502502502503)
(4, 0.02502502502502503)
(5, 0.02502502502502503)
(6, 0.02502502502502503)
(7, 0.03853853853853855)
(8, 0.02502502502502503)
(9, 0.03853853853853855)
(10, 0.02502502502502503)
(11, 0.02502502502502503)
(12, 0.052052052052052065)
(13, 0.0790790790790791)
(14, 0.1736736736736737)
(15, 0.06556556556556559)
(16, 0.02502502502502503)
(17, 0.02502502502502503)
(18, 0.02502502502502503)
(19, 0.02502502502502503)
(20, 0.02502502502502503)
(21, 0.03853853853853855)
(22, 0.02502502502502503)
(23, 0.02502502502502503)
(24, 0.02502502502502503)
(25, 0.03853853853853855)
(26, 0.02502502502502503)
Topic distributions for document 3624
(0, 0.02763957987838585)
(1, 0.02763957987838585)
(2, 0.02763957987838585)
(3, 0.042564953012714214)
(4, 0.02763957987838585)
(5, 0.02763957987838585)
(6, 0.057490326147042574)
(7, 0.02763957987838585)
(8, 0

Topic distributions for document 3679
(0, 0.05205205205205207)
(1, 0.03853853853853855)
(2, 0.09259259259259262)
(3, 0.025025025025025033)
(4, 0.025025025025025033)
(5, 0.025025025025025033)
(6, 0.025025025025025033)
(7, 0.025025025025025033)
(8, 0.025025025025025033)
(9, 0.025025025025025033)
(10, 0.025025025025025033)
(11, 0.025025025025025033)
(12, 0.025025025025025033)
(13, 0.025025025025025033)
(14, 0.13313313313313316)
(15, 0.025025025025025033)
(16, 0.025025025025025033)
(17, 0.025025025025025033)
(18, 0.03853853853853855)
(19, 0.10610610610610614)
(20, 0.025025025025025033)
(21, 0.025025025025025033)
(22, 0.025025025025025033)
(23, 0.025025025025025033)
(24, 0.03853853853853855)
(25, 0.025025025025025033)
(26, 0.025025025025025033)
Topic distributions for document 3680
(0, 0.03035822707953858)
(1, 0.03035822707953858)
(2, 0.03035822707953858)
(3, 0.03035822707953858)
(4, 0.03035822707953858)
(5, 0.12871888281724359)
(6, 0.03035822707953858)
(7, 0.06314511232544025)
(8, 0.046751

(9, 0.03429355281207132)
(10, 0.03429355281207132)
(11, 0.03429355281207132)
(12, 0.03429355281207132)
(13, 0.052812071330589835)
(14, 0.03429355281207132)
(15, 0.03429355281207132)
(16, 0.03429355281207132)
(17, 0.03429355281207132)
(18, 0.03429355281207132)
(19, 0.052812071330589835)
(20, 0.03429355281207132)
(21, 0.03429355281207132)
(22, 0.03429355281207132)
(23, 0.052812071330589835)
(24, 0.03429355281207132)
(25, 0.03429355281207132)
(26, 0.052812071330589835)
Topic distributions for document 3743
(0, 0.029394473838918293)
(1, 0.029394473838918293)
(2, 0.04526748971193417)
(3, 0.04526748971193417)
(4, 0.029394473838918293)
(5, 0.029394473838918293)
(6, 0.06114050558495005)
(7, 0.029394473838918293)
(8, 0.029394473838918293)
(9, 0.029394473838918293)
(10, 0.029394473838918293)
(11, 0.029394473838918293)
(12, 0.029394473838918293)
(13, 0.029394473838918293)
(14, 0.029394473838918293)
(15, 0.029394473838918293)
(16, 0.1563786008230453)
(17, 0.029394473838918293)
(18, 0.0293944738389

Topic distributions for document 3802
(0, 0.02683843263553408)
(1, 0.04133118625872249)
(2, 0.02683843263553408)
(3, 0.02683843263553408)
(4, 0.04133118625872249)
(5, 0.02683843263553408)
(6, 0.02683843263553408)
(7, 0.02683843263553408)
(8, 0.02683843263553408)
(9, 0.04133118625872249)
(10, 0.02683843263553408)
(11, 0.02683843263553408)
(12, 0.04133118625872249)
(13, 0.0703166935050993)
(14, 0.02683843263553408)
(15, 0.02683843263553408)
(16, 0.02683843263553408)
(17, 0.02683843263553408)
(18, 0.04133118625872249)
(19, 0.02683843263553408)
(20, 0.02683843263553408)
(21, 0.02683843263553408)
(22, 0.09930220075147608)
(23, 0.02683843263553408)
(24, 0.1137949543746645)
(25, 0.02683843263553408)
(26, 0.02683843263553408)
Topic distributions for document 3803
(0, 0.025025025025025033)
(1, 0.025025025025025033)
(2, 0.025025025025025033)
(3, 0.03853853853853855)
(4, 0.025025025025025033)
(5, 0.025025025025025033)
(6, 0.03853853853853855)
(7, 0.03853853853853855)
(8, 0.025025025025025033)
(9,

(5, 0.0558239398819109)
(6, 0.026838432635534086)
(7, 0.026838432635534086)
(8, 0.026838432635534086)
(9, 0.041331186258722494)
(10, 0.07031669350509931)
(11, 0.026838432635534086)
(12, 0.026838432635534086)
(13, 0.07031669350509931)
(14, 0.026838432635534086)
(15, 0.026838432635534086)
(16, 0.026838432635534086)
(17, 0.026838432635534086)
(18, 0.026838432635534086)
(19, 0.026838432635534086)
(20, 0.026838432635534086)
(21, 0.026838432635534086)
(22, 0.026838432635534086)
(23, 0.026838432635534086)
(24, 0.041331186258722494)
(25, 0.026838432635534086)
(26, 0.026838432635534086)
Topic distributions for document 3859
(0, 0.02939447383891829)
(1, 0.02939447383891829)
(2, 0.02939447383891829)
(3, 0.02939447383891829)
(4, 0.02939447383891829)
(5, 0.045267489711934165)
(6, 0.061140505584950045)
(7, 0.02939447383891829)
(8, 0.02939447383891829)
(9, 0.02939447383891829)
(10, 0.02939447383891829)
(11, 0.045267489711934165)
(12, 0.02939447383891829)
(13, 0.02939447383891829)
(14, 0.0452674897119

Topic distributions for document 3925
(0, 0.02893518518518517)
(1, 0.02893518518518517)
(2, 0.02893518518518517)
(3, 0.02893518518518517)
(4, 0.06018518518518516)
(5, 0.02893518518518517)
(6, 0.02893518518518517)
(7, 0.02893518518518517)
(8, 0.07581018518518516)
(9, 0.06018518518518516)
(10, 0.04456018518518517)
(11, 0.02893518518518517)
(12, 0.02893518518518517)
(13, 0.02893518518518517)
(14, 0.02893518518518517)
(15, 0.02893518518518517)
(16, 0.02893518518518517)
(17, 0.02893518518518517)
(18, 0.02893518518518517)
(19, 0.04456018518518517)
(20, 0.02893518518518517)
(21, 0.04456018518518517)
(22, 0.06018518518518516)
(23, 0.02893518518518517)
(24, 0.02893518518518517)
(25, 0.04456018518518517)
(26, 0.04456018518518517)
Topic distributions for document 3926
(0, 0.023741690408357084)
(1, 0.03656220322886991)
(2, 0.07502374169040839)
(3, 0.03656220322886991)
(4, 0.049382716049382734)
(5, 0.1391263057929725)
(6, 0.023741690408357084)
(7, 0.023741690408357084)
(8, 0.023741690408357084)
(9,

(12, 0.2006563525550868)
(13, 0.036099390529770285)
(14, 0.023441162681669018)
(15, 0.036099390529770285)
(16, 0.036099390529770285)
(17, 0.036099390529770285)
(18, 0.023441162681669018)
(19, 0.036099390529770285)
(20, 0.023441162681669018)
(21, 0.023441162681669018)
(22, 0.023441162681669018)
(23, 0.023441162681669018)
(24, 0.023441162681669018)
(25, 0.036099390529770285)
(26, 0.023441162681669018)
Topic distributions for document 3983
(0, 0.026455026455026443)
(1, 0.026455026455026443)
(2, 0.055026455026455)
(3, 0.026455026455026443)
(4, 0.04074074074074072)
(5, 0.026455026455026443)
(6, 0.11216931216931211)
(7, 0.04074074074074072)
(8, 0.04074074074074072)
(9, 0.04074074074074072)
(10, 0.026455026455026443)
(11, 0.026455026455026443)
(12, 0.026455026455026443)
(13, 0.026455026455026443)
(14, 0.026455026455026443)
(15, 0.04074074074074072)
(16, 0.026455026455026443)
(17, 0.04074074074074072)
(18, 0.04074074074074072)
(19, 0.055026455026455)
(20, 0.026455026455026443)
(21, 0.026455026

(25, 0.03703703703703703)
(26, 0.02405002405002404)
Topic distributions for document 4050
(0, 0.031387319522912745)
(1, 0.031387319522912745)
(2, 0.031387319522912745)
(3, 0.031387319522912745)
(4, 0.031387319522912745)
(5, 0.09918392969240428)
(6, 0.031387319522912745)
(7, 0.031387319522912745)
(8, 0.06528562460765851)
(9, 0.031387319522912745)
(10, 0.04833647206528563)
(11, 0.031387319522912745)
(12, 0.04833647206528563)
(13, 0.031387319522912745)
(14, 0.031387319522912745)
(15, 0.031387319522912745)
(16, 0.031387319522912745)
(17, 0.031387319522912745)
(18, 0.031387319522912745)
(19, 0.031387319522912745)
(20, 0.031387319522912745)
(21, 0.031387319522912745)
(22, 0.031387319522912745)
(23, 0.031387319522912745)
(24, 0.04833647206528563)
(25, 0.031387319522912745)
(26, 0.031387319522912745)
Topic distributions for document 4051
(0, 0.03520804755372656)
(1, 0.20804755372656597)
(2, 0.022862368541380875)
(3, 0.022862368541380875)
(4, 0.022862368541380875)
(5, 0.022862368541380875)
(6, 

(11, 0.02893518518518517)
(12, 0.02893518518518517)
(13, 0.02893518518518517)
(14, 0.02893518518518517)
(15, 0.04456018518518517)
(16, 0.02893518518518517)
(17, 0.02893518518518517)
(18, 0.02893518518518517)
(19, 0.04456018518518517)
(20, 0.04456018518518517)
(21, 0.02893518518518517)
(22, 0.10706018518518515)
(23, 0.02893518518518517)
(24, 0.09143518518518515)
(25, 0.02893518518518517)
(26, 0.02893518518518517)
Topic distributions for document 4108
(0, 0.02608242044861763)
(1, 0.02608242044861763)
(2, 0.02608242044861763)
(3, 0.02608242044861763)
(4, 0.02608242044861763)
(5, 0.02608242044861763)
(6, 0.0683359415753782)
(7, 0.02608242044861763)
(8, 0.02608242044861763)
(9, 0.02608242044861763)
(10, 0.02608242044861763)
(11, 0.22326551904016692)
(12, 0.0683359415753782)
(13, 0.02608242044861763)
(14, 0.02608242044861763)
(15, 0.02608242044861763)
(16, 0.02608242044861763)
(17, 0.02608242044861763)
(18, 0.040166927490871154)
(19, 0.02608242044861763)
(20, 0.02608242044861763)
(21, 0.0260

Topic distributions for document 4166
(0, 0.06018518518518516)
(1, 0.02893518518518517)
(2, 0.07581018518518516)
(3, 0.02893518518518517)
(4, 0.02893518518518517)
(5, 0.02893518518518517)
(6, 0.02893518518518517)
(7, 0.02893518518518517)
(8, 0.02893518518518517)
(9, 0.02893518518518517)
(10, 0.02893518518518517)
(11, 0.02893518518518517)
(12, 0.02893518518518517)
(13, 0.02893518518518517)
(14, 0.02893518518518517)
(15, 0.02893518518518517)
(16, 0.04456018518518517)
(17, 0.02893518518518517)
(18, 0.02893518518518517)
(19, 0.02893518518518517)
(20, 0.02893518518518517)
(21, 0.07581018518518516)
(22, 0.06018518518518516)
(23, 0.04456018518518517)
(24, 0.04456018518518517)
(25, 0.04456018518518517)
(26, 0.02893518518518517)
Topic distributions for document 4167
(0, 0.02405002405002404)
(1, 0.03703703703703703)
(2, 0.03703703703703703)
(3, 0.02405002405002404)
(4, 0.02405002405002404)
(5, 0.02405002405002404)
(6, 0.02405002405002404)
(7, 0.03703703703703703)
(8, 0.02405002405002404)
(9, 0.0

(10, 0.022583559168925012)
(11, 0.022583559168925012)
(12, 0.034778681120144525)
(13, 0.022583559168925012)
(14, 0.022583559168925012)
(15, 0.022583559168925012)
(16, 0.022583559168925012)
(17, 0.022583559168925012)
(18, 0.022583559168925012)
(19, 0.022583559168925012)
(20, 0.022583559168925012)
(21, 0.022583559168925012)
(22, 0.059168925022583536)
(23, 0.07136404697380305)
(24, 0.04697380307136403)
(25, 0.034778681120144525)
(26, 0.022583559168925012)
Topic distributions for document 4233
(0, 0.027639579878385848)
(1, 0.04256495301271421)
(2, 0.027639579878385848)
(3, 0.04256495301271421)
(4, 0.04256495301271421)
(5, 0.027639579878385848)
(6, 0.027639579878385848)
(7, 0.04256495301271421)
(8, 0.04256495301271421)
(9, 0.04256495301271421)
(10, 0.027639579878385848)
(11, 0.027639579878385848)
(12, 0.027639579878385848)
(13, 0.027639579878385848)
(14, 0.04256495301271421)
(15, 0.027639579878385848)
(16, 0.07241569928137093)
(17, 0.027639579878385848)
(18, 0.027639579878385848)
(19, 0.027

(6, 0.022862368541380882)
(7, 0.022862368541380882)
(8, 0.022862368541380882)
(9, 0.022862368541380882)
(10, 0.022862368541380882)
(11, 0.022862368541380882)
(12, 0.022862368541380882)
(13, 0.022862368541380882)
(14, 0.022862368541380882)
(15, 0.022862368541380882)
(16, 0.022862368541380882)
(17, 0.022862368541380882)
(18, 0.022862368541380882)
(19, 0.07224508459076359)
(20, 0.022862368541380882)
(21, 0.035208047553726564)
(22, 0.022862368541380882)
(23, 0.022862368541380882)
(24, 0.035208047553726564)
(25, 0.022862368541380882)
(26, 0.022862368541380882)
Topic distributions for document 4292
(0, 0.02893518518518517)
(1, 0.04456018518518517)
(2, 0.02893518518518517)
(3, 0.02893518518518517)
(4, 0.02893518518518517)
(5, 0.02893518518518517)
(6, 0.02893518518518517)
(7, 0.02893518518518517)
(8, 0.07581018518518516)
(9, 0.02893518518518517)
(10, 0.02893518518518517)
(11, 0.02893518518518517)
(12, 0.02893518518518517)
(13, 0.06018518518518516)
(14, 0.09143518518518515)
(15, 0.0289351851851

Topic distributions for document 4355
(0, 0.05135802469135804)
(1, 0.0380246913580247)
(2, 0.0380246913580247)
(3, 0.024691358024691364)
(4, 0.0380246913580247)
(5, 0.024691358024691364)
(6, 0.024691358024691364)
(7, 0.024691358024691364)
(8, 0.024691358024691364)
(9, 0.024691358024691364)
(10, 0.024691358024691364)
(11, 0.024691358024691364)
(12, 0.024691358024691364)
(13, 0.024691358024691364)
(14, 0.15802469135802472)
(15, 0.024691358024691364)
(16, 0.024691358024691364)
(17, 0.0380246913580247)
(18, 0.024691358024691364)
(19, 0.09135802469135804)
(20, 0.024691358024691364)
(21, 0.024691358024691364)
(22, 0.024691358024691364)
(23, 0.024691358024691364)
(24, 0.0380246913580247)
(25, 0.0380246913580247)
(26, 0.05135802469135804)
Topic distributions for document 4356
(0, 0.02405002405002404)
(1, 0.02405002405002404)
(2, 0.02405002405002404)
(3, 0.02405002405002404)
(4, 0.02405002405002404)
(5, 0.02405002405002404)
(6, 0.02405002405002404)
(7, 0.050024050024050006)
(8, 0.03703703703703

(10, 0.061140505584950045)
(11, 0.02939447383891829)
(12, 0.02939447383891829)
(13, 0.045267489711934165)
(14, 0.02939447383891829)
(15, 0.02939447383891829)
(16, 0.02939447383891829)
(17, 0.02939447383891829)
(18, 0.02939447383891829)
(19, 0.02939447383891829)
(20, 0.045267489711934165)
(21, 0.02939447383891829)
(22, 0.02939447383891829)
(23, 0.02939447383891829)
(24, 0.061140505584950045)
(25, 0.045267489711934165)
(26, 0.045267489711934165)
Topic distributions for document 4410
(0, 0.026455026455026447)
(1, 0.026455026455026447)
(2, 0.055026455026455014)
(3, 0.026455026455026447)
(4, 0.026455026455026447)
(5, 0.04074074074074074)
(6, 0.04074074074074074)
(7, 0.08359788359788359)
(8, 0.026455026455026447)
(9, 0.026455026455026447)
(10, 0.026455026455026447)
(11, 0.026455026455026447)
(12, 0.026455026455026447)
(13, 0.026455026455026447)
(14, 0.026455026455026447)
(15, 0.026455026455026447)
(16, 0.026455026455026447)
(17, 0.026455026455026447)
(18, 0.04074074074074074)
(19, 0.11216931

Topic distributions for document 4479
(0, 0.027233115468409595)
(1, 0.027233115468409595)
(2, 0.07135076252723313)
(3, 0.027233115468409595)
(4, 0.04193899782135077)
(5, 0.027233115468409595)
(6, 0.027233115468409595)
(7, 0.04193899782135077)
(8, 0.04193899782135077)
(9, 0.027233115468409595)
(10, 0.04193899782135077)
(11, 0.027233115468409595)
(12, 0.027233115468409595)
(13, 0.027233115468409595)
(14, 0.04193899782135077)
(15, 0.027233115468409595)
(16, 0.027233115468409595)
(17, 0.027233115468409595)
(18, 0.027233115468409595)
(19, 0.08605664488017431)
(20, 0.027233115468409595)
(21, 0.027233115468409595)
(22, 0.04193899782135077)
(23, 0.027233115468409595)
(24, 0.1007625272331155)
(25, 0.027233115468409595)
(26, 0.027233115468409595)
Topic distributions for document 4480
(0, 0.030864197530864185)
(1, 0.030864197530864185)
(2, 0.030864197530864185)
(3, 0.030864197530864185)
(4, 0.030864197530864185)
(5, 0.030864197530864185)
(6, 0.047530864197530845)
(7, 0.030864197530864185)
(8, 0.0

(24, 0.029868578255675033)
(25, 0.029868578255675033)
(26, 0.029868578255675033)
Topic distributions for document 4536
(0, 0.02536783358701166)
(1, 0.02536783358701166)
(2, 0.02536783358701166)
(3, 0.02536783358701166)
(4, 0.02536783358701166)
(5, 0.02536783358701166)
(6, 0.05276509386098426)
(7, 0.02536783358701166)
(8, 0.03906646372399797)
(9, 0.02536783358701166)
(10, 0.03906646372399797)
(11, 0.02536783358701166)
(12, 0.02536783358701166)
(13, 0.02536783358701166)
(14, 0.02536783358701166)
(15, 0.02536783358701166)
(16, 0.03906646372399797)
(17, 0.02536783358701166)
(18, 0.02536783358701166)
(19, 0.03906646372399797)
(20, 0.02536783358701166)
(21, 0.09386098427194316)
(22, 0.02536783358701166)
(23, 0.02536783358701166)
(24, 0.03906646372399797)
(25, 0.05276509386098426)
(26, 0.14865550481988835)
Topic distributions for document 4537
(0, 0.04320987654320989)
(1, 0.028058361391694733)
(2, 0.028058361391694733)
(3, 0.028058361391694733)
(4, 0.028058361391694733)
(5, 0.0280583613916947

(21, 0.030358227079538575)
(22, 0.04675166970248941)
(23, 0.04675166970248941)
(24, 0.030358227079538575)
(25, 0.04675166970248941)
(26, 0.030358227079538575)
Topic distributions for document 4595
(0, 0.025367833587011668)
(1, 0.025367833587011668)
(2, 0.025367833587011668)
(3, 0.039066463723997975)
(4, 0.025367833587011668)
(5, 0.025367833587011668)
(6, 0.025367833587011668)
(7, 0.025367833587011668)
(8, 0.13495687468290207)
(9, 0.025367833587011668)
(10, 0.025367833587011668)
(11, 0.039066463723997975)
(12, 0.025367833587011668)
(13, 0.025367833587011668)
(14, 0.08016235413495688)
(15, 0.039066463723997975)
(16, 0.025367833587011668)
(17, 0.025367833587011668)
(18, 0.025367833587011668)
(19, 0.025367833587011668)
(20, 0.039066463723997975)
(21, 0.025367833587011668)
(22, 0.025367833587011668)
(23, 0.025367833587011668)
(24, 0.025367833587011668)
(25, 0.10755961440892949)
(26, 0.039066463723997975)
Topic distributions for document 4596
(0, 0.02608242044861763)
(1, 0.02608242044861763)

(24, 0.025367833587011668)
(25, 0.025367833587011668)
(26, 0.039066463723997975)
Topic distributions for document 4659
(0, 0.027639579878385855)
(1, 0.042564953012714214)
(2, 0.027639579878385855)
(3, 0.027639579878385855)
(4, 0.027639579878385855)
(5, 0.027639579878385855)
(6, 0.042564953012714214)
(7, 0.027639579878385855)
(8, 0.027639579878385855)
(9, 0.042564953012714214)
(10, 0.027639579878385855)
(11, 0.10226644555002767)
(12, 0.13211719181868437)
(13, 0.027639579878385855)
(14, 0.027639579878385855)
(15, 0.027639579878385855)
(16, 0.027639579878385855)
(17, 0.027639579878385855)
(18, 0.027639579878385855)
(19, 0.042564953012714214)
(20, 0.027639579878385855)
(21, 0.027639579878385855)
(22, 0.027639579878385855)
(23, 0.027639579878385855)
(24, 0.027639579878385855)
(25, 0.027639579878385855)
(26, 0.042564953012714214)
Topic distributions for document 4660
(0, 0.024691358024691357)
(1, 0.024691358024691357)
(2, 0.024691358024691357)
(3, 0.024691358024691357)
(4, 0.0246913580246913

Topic distributions for document 4720
(0, 0.02939447383891829)
(1, 0.02939447383891829)
(2, 0.045267489711934165)
(3, 0.02939447383891829)
(4, 0.02939447383891829)
(5, 0.02939447383891829)
(6, 0.02939447383891829)
(7, 0.02939447383891829)
(8, 0.02939447383891829)
(9, 0.02939447383891829)
(10, 0.02939447383891829)
(11, 0.02939447383891829)
(12, 0.02939447383891829)
(13, 0.02939447383891829)
(14, 0.02939447383891829)
(15, 0.02939447383891829)
(16, 0.02939447383891829)
(17, 0.02939447383891829)
(18, 0.02939447383891829)
(19, 0.02939447383891829)
(20, 0.02939447383891829)
(21, 0.02939447383891829)
(22, 0.15637860082304528)
(23, 0.045267489711934165)
(24, 0.045267489711934165)
(25, 0.02939447383891829)
(26, 0.061140505584950045)
Topic distributions for document 4721
(0, 0.03192848020434228)
(1, 0.03192848020434228)
(2, 0.049169859514687116)
(3, 0.03192848020434228)
(4, 0.03192848020434228)
(5, 0.03192848020434228)
(6, 0.03192848020434228)
(7, 0.03192848020434228)
(8, 0.03192848020434228)
(9

(22, 0.021786492374727674)
(23, 0.021786492374727674)
(24, 0.021786492374727674)
(25, 0.04531590413943356)
(26, 0.021786492374727674)
Topic distributions for document 4786
(0, 0.02893518518518517)
(1, 0.02893518518518517)
(2, 0.07581018518518516)
(3, 0.02893518518518517)
(4, 0.02893518518518517)
(5, 0.02893518518518517)
(6, 0.02893518518518517)
(7, 0.02893518518518517)
(8, 0.02893518518518517)
(9, 0.04456018518518517)
(10, 0.02893518518518517)
(11, 0.02893518518518517)
(12, 0.02893518518518517)
(13, 0.02893518518518517)
(14, 0.02893518518518517)
(15, 0.02893518518518517)
(16, 0.04456018518518517)
(17, 0.04456018518518517)
(18, 0.02893518518518517)
(19, 0.04456018518518517)
(20, 0.02893518518518517)
(21, 0.10706018518518515)
(22, 0.04456018518518517)
(23, 0.04456018518518517)
(24, 0.02893518518518517)
(25, 0.02893518518518517)
(26, 0.02893518518518517)
Topic distributions for document 4787
(0, 0.02344116268166903)
(1, 0.02344116268166903)
(2, 0.0360993905297703)
(3, 0.02344116268166903)

(4, 0.0284900284900285)
(5, 0.0284900284900285)
(6, 0.0284900284900285)
(7, 0.0284900284900285)
(8, 0.0284900284900285)
(9, 0.0284900284900285)
(10, 0.04387464387464389)
(11, 0.0284900284900285)
(12, 0.0284900284900285)
(13, 0.0284900284900285)
(14, 0.04387464387464389)
(15, 0.0284900284900285)
(16, 0.05925925925925928)
(17, 0.0284900284900285)
(18, 0.0284900284900285)
(19, 0.0284900284900285)
(20, 0.0284900284900285)
(21, 0.0284900284900285)
(22, 0.166951566951567)
(23, 0.04387464387464389)
(24, 0.0284900284900285)
(25, 0.0284900284900285)
(26, 0.0284900284900285)
Topic distributions for document 4851
(0, 0.024050024050024044)
(1, 0.024050024050024044)
(2, 0.024050024050024044)
(3, 0.024050024050024044)
(4, 0.05002405002405001)
(5, 0.024050024050024044)
(6, 0.05002405002405001)
(7, 0.024050024050024044)
(8, 0.024050024050024044)
(9, 0.024050024050024044)
(10, 0.024050024050024044)
(11, 0.024050024050024044)
(12, 0.024050024050024044)
(13, 0.024050024050024044)
(14, 0.03703703703703703

(1, 0.02986857825567504)
(2, 0.045997610513739566)
(3, 0.02986857825567504)
(4, 0.045997610513739566)
(5, 0.09438470728793312)
(6, 0.02986857825567504)
(7, 0.02986857825567504)
(8, 0.02986857825567504)
(9, 0.02986857825567504)
(10, 0.06212664277180409)
(11, 0.02986857825567504)
(12, 0.02986857825567504)
(13, 0.06212664277180409)
(14, 0.02986857825567504)
(15, 0.02986857825567504)
(16, 0.045997610513739566)
(17, 0.02986857825567504)
(18, 0.02986857825567504)
(19, 0.02986857825567504)
(20, 0.02986857825567504)
(21, 0.02986857825567504)
(22, 0.02986857825567504)
(23, 0.02986857825567504)
(24, 0.02986857825567504)
(25, 0.045997610513739566)
(26, 0.02986857825567504)
Topic distributions for document 4909
(0, 0.039609053497942394)
(1, 0.025720164609053502)
(2, 0.039609053497942394)
(3, 0.025720164609053502)
(4, 0.025720164609053502)
(5, 0.025720164609053502)
(6, 0.025720164609053502)
(7, 0.025720164609053502)
(8, 0.10905349794238685)
(9, 0.05349794238683129)
(10, 0.025720164609053502)
(11, 0

(12, 0.028058361391694733)
(13, 0.028058361391694733)
(14, 0.028058361391694733)
(15, 0.028058361391694733)
(16, 0.07351290684624019)
(17, 0.028058361391694733)
(18, 0.028058361391694733)
(19, 0.028058361391694733)
(20, 0.028058361391694733)
(21, 0.028058361391694733)
(22, 0.028058361391694733)
(23, 0.04320987654320989)
(24, 0.028058361391694733)
(25, 0.028058361391694733)
(26, 0.04320987654320989)
Topic distributions for document 4964
(0, 0.02608242044861763)
(1, 0.02608242044861763)
(2, 0.02608242044861763)
(3, 0.02608242044861763)
(4, 0.0683359415753782)
(5, 0.02608242044861763)
(6, 0.02608242044861763)
(7, 0.02608242044861763)
(8, 0.02608242044861763)
(9, 0.02608242044861763)
(10, 0.02608242044861763)
(11, 0.040166927490871154)
(12, 0.02608242044861763)
(13, 0.02608242044861763)
(14, 0.02608242044861763)
(15, 0.08242044861763172)
(16, 0.02608242044861763)
(17, 0.02608242044861763)
(18, 0.02608242044861763)
(19, 0.02608242044861763)
(20, 0.02608242044861763)
(21, 0.02608242044861763

(23, 0.10905349794238685)
(24, 0.06738683127572018)
(25, 0.025720164609053502)
(26, 0.025720164609053502)
Topic distributions for document 5026
(0, 0.027233115468409598)
(1, 0.027233115468409598)
(2, 0.04193899782135078)
(3, 0.04193899782135078)
(4, 0.027233115468409598)
(5, 0.13017429193899788)
(6, 0.027233115468409598)
(7, 0.027233115468409598)
(8, 0.027233115468409598)
(9, 0.027233115468409598)
(10, 0.027233115468409598)
(11, 0.027233115468409598)
(12, 0.027233115468409598)
(13, 0.056644880174291964)
(14, 0.027233115468409598)
(15, 0.027233115468409598)
(16, 0.027233115468409598)
(17, 0.027233115468409598)
(18, 0.027233115468409598)
(19, 0.027233115468409598)
(20, 0.027233115468409598)
(21, 0.027233115468409598)
(22, 0.04193899782135078)
(23, 0.056644880174291964)
(24, 0.027233115468409598)
(25, 0.08605664488017432)
(26, 0.027233115468409598)
Topic distributions for document 5027
(0, 0.020350020350020356)
(1, 0.020350020350020356)
(2, 0.020350020350020356)
(3, 0.020350020350020356)


(9, 0.026455026455026443)
(10, 0.026455026455026443)
(11, 0.09788359788359785)
(12, 0.026455026455026443)
(13, 0.055026455026455)
(14, 0.04074074074074072)
(15, 0.026455026455026443)
(16, 0.026455026455026443)
(17, 0.08359788359788356)
(18, 0.026455026455026443)
(19, 0.026455026455026443)
(20, 0.026455026455026443)
(21, 0.026455026455026443)
(22, 0.026455026455026443)
(23, 0.026455026455026443)
(24, 0.026455026455026443)
(25, 0.11216931216931211)
(26, 0.026455026455026443)
Topic distributions for document 5096
(0, 0.029868578255675036)
(1, 0.029868578255675036)
(2, 0.029868578255675036)
(3, 0.029868578255675036)
(4, 0.029868578255675036)
(5, 0.029868578255675036)
(6, 0.029868578255675036)
(7, 0.029868578255675036)
(8, 0.029868578255675036)
(9, 0.029868578255675036)
(10, 0.04599761051373956)
(11, 0.029868578255675036)
(12, 0.029868578255675036)
(13, 0.04599761051373956)
(14, 0.029868578255675036)
(15, 0.029868578255675036)
(16, 0.04599761051373956)
(17, 0.029868578255675036)
(18, 0.0298

(2, 0.02469135802469136)
(3, 0.02469135802469136)
(4, 0.06469135802469138)
(5, 0.02469135802469136)
(6, 0.02469135802469136)
(7, 0.0380246913580247)
(8, 0.02469135802469136)
(9, 0.0380246913580247)
(10, 0.02469135802469136)
(11, 0.02469135802469136)
(12, 0.10469135802469137)
(13, 0.02469135802469136)
(14, 0.02469135802469136)
(15, 0.02469135802469136)
(16, 0.07802469135802471)
(17, 0.02469135802469136)
(18, 0.0380246913580247)
(19, 0.06469135802469138)
(20, 0.02469135802469136)
(21, 0.06469135802469138)
(22, 0.0380246913580247)
(23, 0.02469135802469136)
(24, 0.0380246913580247)
(25, 0.02469135802469136)
(26, 0.02469135802469136)
Topic distributions for document 5156
(0, 0.023441162681669018)
(1, 0.023441162681669018)
(2, 0.023441162681669018)
(3, 0.03609939052977029)
(4, 0.023441162681669018)
(5, 0.023441162681669018)
(6, 0.023441162681669018)
(7, 0.023441162681669018)
(8, 0.023441162681669018)
(9, 0.023441162681669018)
(10, 0.03609939052977029)
(11, 0.048757618377871556)
(12, 0.023441

(6, 0.05664488017429197)
(7, 0.027233115468409598)
(8, 0.05664488017429197)
(9, 0.1889978213507626)
(10, 0.027233115468409598)
(11, 0.027233115468409598)
(12, 0.027233115468409598)
(13, 0.027233115468409598)
(14, 0.027233115468409598)
(15, 0.027233115468409598)
(16, 0.027233115468409598)
(17, 0.027233115468409598)
(18, 0.027233115468409598)
(19, 0.027233115468409598)
(20, 0.027233115468409598)
(21, 0.027233115468409598)
(22, 0.027233115468409598)
(23, 0.027233115468409598)
(24, 0.027233115468409598)
(25, 0.027233115468409598)
(26, 0.027233115468409598)
Topic distributions for document 5222
(0, 0.02572016460905351)
(1, 0.02572016460905351)
(2, 0.02572016460905351)
(3, 0.03960905349794241)
(4, 0.02572016460905351)
(5, 0.02572016460905351)
(6, 0.02572016460905351)
(7, 0.02572016460905351)
(8, 0.26183127572016474)
(9, 0.02572016460905351)
(10, 0.02572016460905351)
(11, 0.02572016460905351)
(12, 0.02572016460905351)
(13, 0.02572016460905351)
(14, 0.03960905349794241)
(15, 0.0396090534979424

(25, 0.0284900284900285)
(26, 0.0284900284900285)
Topic distributions for document 5285
(0, 0.0293944738389183)
(1, 0.0293944738389183)
(2, 0.0293944738389183)
(3, 0.1881246325690771)
(4, 0.0293944738389183)
(5, 0.0293944738389183)
(6, 0.0293944738389183)
(7, 0.045267489711934186)
(8, 0.0293944738389183)
(9, 0.0293944738389183)
(10, 0.0293944738389183)
(11, 0.0293944738389183)
(12, 0.0293944738389183)
(13, 0.0293944738389183)
(14, 0.0293944738389183)
(15, 0.0293944738389183)
(16, 0.0293944738389183)
(17, 0.0293944738389183)
(18, 0.0293944738389183)
(19, 0.0293944738389183)
(20, 0.0293944738389183)
(21, 0.0293944738389183)
(22, 0.0293944738389183)
(23, 0.0293944738389183)
(24, 0.0293944738389183)
(25, 0.061140505584950065)
(26, 0.0293944738389183)
Topic distributions for document 5286
(0, 0.02405002405002403)
(1, 0.03703703703703701)
(2, 0.02405002405002403)
(3, 0.3487253487253484)
(4, 0.03703703703703701)
(5, 0.02405002405002403)
(6, 0.02405002405002403)
(7, 0.02405002405002403)
(8, 0.

Topic distributions for document 5347
(0, 0.029394473838918293)
(1, 0.029394473838918293)
(2, 0.04526748971193417)
(3, 0.029394473838918293)
(4, 0.029394473838918293)
(5, 0.029394473838918293)
(6, 0.029394473838918293)
(7, 0.04526748971193417)
(8, 0.04526748971193417)
(9, 0.06114050558495005)
(10, 0.029394473838918293)
(11, 0.029394473838918293)
(12, 0.029394473838918293)
(13, 0.029394473838918293)
(14, 0.029394473838918293)
(15, 0.029394473838918293)
(16, 0.04526748971193417)
(17, 0.029394473838918293)
(18, 0.029394473838918293)
(19, 0.06114050558495005)
(20, 0.029394473838918293)
(21, 0.04526748971193417)
(22, 0.029394473838918293)
(23, 0.029394473838918293)
(24, 0.06114050558495005)
(25, 0.029394473838918293)
(26, 0.06114050558495005)
Topic distributions for document 5348
(0, 0.023741690408357084)
(1, 0.16476733143399816)
(2, 0.03656220322886991)
(3, 0.03656220322886991)
(4, 0.06220322886989556)
(5, 0.06220322886989556)
(6, 0.023741690408357084)
(7, 0.03656220322886991)
(8, 0.023741

(17, 0.04256495301271422)
(18, 0.05749032614704259)
(19, 0.027639579878385858)
(20, 0.027639579878385858)
(21, 0.027639579878385858)
(22, 0.027639579878385858)
(23, 0.04256495301271422)
(24, 0.027639579878385858)
(25, 0.027639579878385858)
(26, 0.027639579878385858)
Topic distributions for document 5411
(0, 0.13439599937490232)
(1, 0.0331301765900922)
(2, 0.06688545085169557)
(3, 0.012033130176590093)
(4, 0.02469135802469136)
(5, 0.012033130176590093)
(6, 0.0331301765900922)
(7, 0.020471948741990938)
(8, 0.012033130176590093)
(9, 0.016252539459290515)
(10, 0.02469135802469136)
(11, 0.00781372089388967)
(12, 0.00781372089388967)
(13, 0.00781372089388967)
(14, 0.00781372089388967)
(15, 0.016252539459290515)
(16, 0.071104860134396)
(17, 0.020471948741990938)
(18, 0.00781372089388967)
(19, 0.06688545085169557)
(20, 0.012033130176590093)
(21, 0.11751836224410064)
(22, 0.020471948741990938)
(23, 0.02469135802469136)
(24, 0.12173777152680106)
(25, 0.05844663228629473)
(26, 0.04156899515549304

Topic distributions for document 5471
(0, 0.047530864197530845)
(1, 0.030864197530864185)
(2, 0.030864197530864185)
(3, 0.047530864197530845)
(4, 0.030864197530864185)
(5, 0.047530864197530845)
(6, 0.030864197530864185)
(7, 0.030864197530864185)
(8, 0.030864197530864185)
(9, 0.030864197530864185)
(10, 0.030864197530864185)
(11, 0.030864197530864185)
(12, 0.030864197530864185)
(13, 0.030864197530864185)
(14, 0.030864197530864185)
(15, 0.030864197530864185)
(16, 0.030864197530864185)
(17, 0.030864197530864185)
(18, 0.030864197530864185)
(19, 0.047530864197530845)
(20, 0.030864197530864185)
(21, 0.030864197530864185)
(22, 0.09753086419753082)
(23, 0.030864197530864185)
(24, 0.030864197530864185)
(25, 0.030864197530864185)
(26, 0.06419753086419751)
Topic distributions for document 5472
(0, 0.023441162681669032)
(1, 0.023441162681669032)
(2, 0.023441162681669032)
(3, 0.03609939052977031)
(4, 0.35255508673230224)
(5, 0.023441162681669032)
(6, 0.023441162681669032)
(7, 0.023441162681669032)
(

(16, 0.02763957987838585)
(17, 0.02763957987838585)
(18, 0.057490326147042574)
(19, 0.042564953012714214)
(20, 0.02763957987838585)
(21, 0.02763957987838585)
(22, 0.042564953012714214)
(23, 0.042564953012714214)
(24, 0.02763957987838585)
(25, 0.057490326147042574)
(26, 0.0873410724156993)
Topic distributions for document 5536
(0, 0.028490028490028494)
(1, 0.04387464387464388)
(2, 0.028490028490028494)
(3, 0.05925925925925927)
(4, 0.028490028490028494)
(5, 0.028490028490028494)
(6, 0.028490028490028494)
(7, 0.028490028490028494)
(8, 0.028490028490028494)
(9, 0.028490028490028494)
(10, 0.05925925925925927)
(11, 0.028490028490028494)
(12, 0.04387464387464388)
(13, 0.04387464387464388)
(14, 0.028490028490028494)
(15, 0.04387464387464388)
(16, 0.10541310541310543)
(17, 0.028490028490028494)
(18, 0.028490028490028494)
(19, 0.028490028490028494)
(20, 0.028490028490028494)
(21, 0.04387464387464388)
(22, 0.028490028490028494)
(23, 0.028490028490028494)
(24, 0.028490028490028494)
(25, 0.02849002

(2, 0.02314814814814815)
(3, 0.02314814814814815)
(4, 0.04814814814814816)
(5, 0.02314814814814815)
(6, 0.02314814814814815)
(7, 0.02314814814814815)
(8, 0.02314814814814815)
(9, 0.02314814814814815)
(10, 0.03564814814814816)
(11, 0.07314814814814817)
(12, 0.02314814814814815)
(13, 0.060648148148148166)
(14, 0.03564814814814816)
(15, 0.03564814814814816)
(16, 0.03564814814814816)
(17, 0.02314814814814815)
(18, 0.02314814814814815)
(19, 0.02314814814814815)
(20, 0.03564814814814816)
(21, 0.02314814814814815)
(22, 0.02314814814814815)
(23, 0.2106481481481482)
(24, 0.03564814814814816)
(25, 0.02314814814814815)
(26, 0.02314814814814815)
Topic distributions for document 5597
(0, 0.029868578255675036)
(1, 0.07825567502986859)
(2, 0.029868578255675036)
(3, 0.029868578255675036)
(4, 0.029868578255675036)
(5, 0.04599761051373956)
(6, 0.06212664277180408)
(7, 0.029868578255675036)
(8, 0.029868578255675036)
(9, 0.04599761051373956)
(10, 0.029868578255675036)
(11, 0.029868578255675036)
(12, 0.094

In [33]:
count = 0
for i in range(27):
    count += len(playlists_real[i])
print(count)

5600


In [101]:
transformed_docs = lda_real.load_document_topics()
topic_distributions_real = pd.DataFrame([[x[1] for x in doc] for doc in transformed_docs], 
             columns=['topic_{}'.format(i) for i in range(27)])
topic_distributions_real.tail()
topic_distributions_real.mean().sort_values(ascending=False)

topic_3     0.042825
topic_4     0.042771
topic_9     0.041676
topic_16    0.041601
topic_10    0.041561
topic_5     0.041045
topic_19    0.040365
topic_22    0.040339
topic_14    0.040319
topic_11    0.040177
topic_7     0.039967
topic_2     0.039790
topic_13    0.039737
topic_20    0.039648
topic_0     0.039393
topic_15    0.039371
topic_17    0.039073
topic_8     0.039051
topic_23    0.038970
topic_21    0.038929
topic_1     0.038830
topic_24    0.038768
topic_18    0.038680
topic_6     0.038670
topic_12    0.038445
dtype: float64

## VADER Sentiment Analysis

In [102]:
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/rosalucassen/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


#### Real News

In [None]:
df_real = pd.DataFrame(corpus_real)
df_real.columns = ["Tweet"]
df_real

In [None]:
df_real['scores'] = df_real['Tweet'].apply(lambda tweet: sid.polarity_scores(tweet))
df_real.head()

In [None]:
df_real['compound']  = df_real['scores'].apply(lambda score_dict: score_dict['compound'])
df_real.head()

In [None]:
# Create a function that returns desired values
# You only need to check upper bound as the next elif-statement will catch the value
def func(compound):
    if compound < -0.1:
        return "neg"
    elif compound > 0.1:
        return "pos"
    else:
        return 'neu'
df_real['Class'] = df_real['compound'].apply(func)
df_real.head()

In [None]:
df_real["compound"].mean()

In [None]:
df_real['compound'].plot(kind = 'hist')

#### Fake News

In [109]:
def compound_func(compound):
    if compound < -0.1:
        return "neg"
    elif compound > 0.1:
        return "pos"
    else:
        return 'neu'

def VADER(playlists):
    df = pd.DataFrame(playlists)
    df.columns = ["Tweet"]
    df['scores'] = df['Tweet'].apply(lambda tweet: sid.polarity_scores(tweet))
    df['compound']  = df['scores'].apply(lambda score_dict: score_dict['compound'])
    df['Class'] = df['compound'].apply(compound_func)
    return df

In [110]:
df_topic1_fake = VADER(playlists[0])
df_topic2_fake = VADER(playlists[1])
df_topic3_fake = VADER(playlists[2])
df_topic4_fake = VADER(playlists[3])

In [111]:
df_topic1_real = VADER(playlists_real[0])
df_topic2_real = VADER(playlists_real[1])
df_topic3_real = VADER(playlists_real[2])
df_topic4_real = VADER(playlists_real[3])
df_topic5_real = VADER(playlists_real[4])
df_topic6_real = VADER(playlists_real[5])
df_topic7_real = VADER(playlists_real[6])
df_topic8_real = VADER(playlists_real[7])
df_topic9_real = VADER(playlists_real[8])
df_topic10_real = VADER(playlists_real[9])
df_topic11_real = VADER(playlists_real[10])
df_topic12_real = VADER(playlists_real[11])
df_topic13_real = VADER(playlists_real[12])
df_topic14_real = VADER(playlists_real[13])
df_topic15_real = VADER(playlists_real[14])
df_topic16_real = VADER(playlists_real[15])
df_topic17_real = VADER(playlists_real[16])
df_topic18_real = VADER(playlists_real[17])
df_topic19_real = VADER(playlists_real[18])
df_topic20_real = VADER(playlists_real[19])
df_topic21_real = VADER(playlists_real[20])
df_topic22_real = VADER(playlists_real[21])
df_topic23_real = VADER(playlists_real[22])
df_topic24_real = VADER(playlists_real[23])
df_topic25_real = VADER(playlists_real[24])

Unnamed: 0,Tweet,scores,compound,Class
0,government come pressure lack availability tests. Read here:,"{'neg': 0.429, 'neu': 0.571, 'pos': 0.0, 'compound': -0.5423}",-0.5423,neg
1,James_Gross reporting effectively. It's refuse numbers table dashboard reasons me.,"{'neg': 0.168, 'neu': 0.611, 'pos': 0.221, 'compound': 0.1779}",0.1779,pos
2,important work data collection analysis : pm day data Team collates results received testing laboratories inform Nigerians number Results received time reported day,"{'neg': 0.0, 'neu': 0.866, 'pos': 0.134, 'compound': 0.2732}",0.2732,pos
3,CoronaVirusUpdates: testing status update: ICMRDELHI stated samples upto September sample September StaySafe IndiaWillWin,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}",0.0000,neu
4,RT imperialcollege: UK government announced . million support Prof Robin Shattocks roboskis teams phase trials,"{'neg': 0.0, 'neu': 0.828, 'pos': 0.172, 'compound': 0.4019}",0.4019,pos
...,...,...,...,...
160,Avoid Cs. certain places spreads easily: Crowded places Close-contact settings Confined enclosed spaces,"{'neg': 0.132, 'neu': 0.599, 'pos': 0.269, 'compound': 0.3182}",0.3182,pos
161,CoronaVirusUpdates: testing status update: ICMRDELHI stated samples upto August sample August StaySafe IndiaWillWin,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}",0.0000,neu
162,important continues play safe. person charge social gathering ensure records kept contact tracing purposes person gathering knows other.,"{'neg': 0.0, 'neu': 0.591, 'pos': 0.409, 'compound': 0.8271}",0.8271,pos
163,model TheLancet studied contact tracing data people UK. researchers found combining contact tracing quarantining better contained compared mass testing self-isolation alone.,"{'neg': 0.084, 'neu': 0.795, 'pos': 0.121, 'compound': 0.2263}",0.2263,pos


In [None]:
df_fake["compound"].mean()

In [None]:
df_fake['compound'].plot(kind = 'hist')

#### All News

In [None]:
df = pd.DataFrame(corpus)
df.columns = ["Tweet"]
df

In [None]:
df['scores'] = df['Tweet'].apply(lambda tweet: sid.polarity_scores(tweet))
df.head()

In [None]:
df['compound']  = df['scores'].apply(lambda score_dict: score_dict['compound'])
df.head()

In [None]:
# Create a function that returns desired values
# You only need to check upper bound as the next elif-statement will catch the value
def func(compound):
    if compound < -0.1:
        return "neg"
    elif compound > 0.1:
        return "pos"
    else:
        return 'neu'
df['Class'] = df['compound'].apply(func)
df.head()

In [None]:
df["compound"].mean()

In [None]:
df['compound'].plot(kind = 'hist')

## LIWC Sentiment Analysis

#### Fake news

In [None]:
def pre_processer_LIWC(corpus):
    corpus = [text.lower() for text in corpus]
    corpus = [remove_urls(text) for text in corpus]
    corpus = [remove_single_char(text) for text in corpus]
    corpus = [remove_numbers(text) for text in corpus]
    corpus = [remove_rt(text) for text in corpus]
    corpus = [remove_emojis(text) for text in corpus]
    corpus = [remove_nonalpha(text) for text in corpus]
    corpus = [text.encode("ascii", "ignore") for text in corpus]
    corpus = [text.decode() for text in corpus]
    corpus = [remove_extra_spaces(text) for text in corpus]
    corpus = strip_list_noempty(corpus)
    
    return corpus

import liwc
parse, category_names = liwc.load_token_parser('liwc.dic')

In [None]:
corpus_fake = data_train[data_train.label == "fake"]["tweet"]
corpus_real = data_train[data_train.label == "real"]["tweet"]
corpus = data_train["tweet"]
corpus_fake = pre_processer_LIWC(corpus_fake)
print(corpus_fake)
corpus_real = pre_processer_LIWC(corpus_real)
print(corpus_real)
corpus = pre_processer(corpus)

In [34]:
import liwc
parse, category_names = liwc.load_token_parser('liwc.dic')

def LIWC(playlists):
    df = Counter(category for token in playlists for category in parse(token))
    return df

In [26]:
dfLIWC_topic1_fake = LIWC(playlists[0])
dfLIWC_topic2_fake = LIWC(playlists[1])
dfLIWC_topic3_fake = LIWC(playlists[2])
dfLIWC_topic4_fake = LIWC(playlists[3])
dfLIWC_topic5_fake = LIWC(playlists[4])
dfLIWC_topic6_fake = LIWC(playlists[5])
dfLIWC_topic7_fake = LIWC(playlists[6])
dfLIWC_topic8_fake = LIWC(playlists[7])

In [35]:
dfLIWC_topic1_real = LIWC(playlists_real[0])
dfLIWC_topic2_real = LIWC(playlists_real[1])
dfLIWC_topic3_real = LIWC(playlists_real[2])
dfLIWC_topic4_real = LIWC(playlists_real[3])
dfLIWC_topic5_real = LIWC(playlists_real[4])
dfLIWC_topic6_real = LIWC(playlists_real[5])
dfLIWC_topic7_real = LIWC(playlists_real[6])
dfLIWC_topic8_real = LIWC(playlists_real[7])
dfLIWC_topic9_real = LIWC(playlists_real[8])
dfLIWC_topic10_real = LIWC(playlists_real[9])
dfLIWC_topic11_real = LIWC(playlists_real[10])
dfLIWC_topic12_real = LIWC(playlists_real[11])
dfLIWC_topic13_real = LIWC(playlists_real[12])
dfLIWC_topic14_real = LIWC(playlists_real[13])
dfLIWC_topic15_real = LIWC(playlists_real[14])
dfLIWC_topic16_real = LIWC(playlists_real[15])
dfLIWC_topic17_real = LIWC(playlists_real[16])
dfLIWC_topic18_real = LIWC(playlists_real[17])
dfLIWC_topic19_real = LIWC(playlists_real[18])
dfLIWC_topic20_real = LIWC(playlists_real[19])
dfLIWC_topic21_real = LIWC(playlists_real[20])
dfLIWC_topic22_real = LIWC(playlists_real[21])
dfLIWC_topic23_real = LIWC(playlists_real[22])
dfLIWC_topic24_real = LIWC(playlists_real[23])
dfLIWC_topic25_real = LIWC(playlists_real[24])
dfLIWC_topic26_real = LIWC(playlists_real[25])
dfLIWC_topic27_real = LIWC(playlists_real[26])

In [36]:
dfLIWC_topic27_real

Counter({'affect': 8,
         'negemo': 6,
         'bio': 5,
         'health': 5,
         'posemo': 2,
         'cogmech': 3,
         'certain': 1,
         'space': 2,
         'relativ': 7,
         'anx': 1,
         'tentat': 1,
         'achieve': 3,
         'inhib': 1,
         'time': 4,
         'funct': 1,
         'number': 1,
         'motion': 1,
         'social': 1})

In [None]:
#from collections import Counter
feature_counts = Counter(category for token in corpus_fake for category in parse(token))
print(feature_counts)

#### Real news

In [None]:
#from collections import Counter
feature_counts = Counter(category for token in corpus_real for category in parse(token))
print(feature_counts)

#### All news

In [None]:
#from collections import Counter
feature_counts = Counter(category for token in corpus for category in parse(token))
print(feature_counts)

## SentiNet

In [42]:
import nltk
from nltk.corpus import sentiwordnet as swn
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
nltk.download('sentiwordnet')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/rosalucassen/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/rosalucassen/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package sentiwordnet to
[nltk_data]     /Users/rosalucassen/nltk_data...
[nltk_data]   Unzipping corpora/sentiwordnet.zip.


True

In [75]:
def pre_processer_SENT(corpus):
    corpus = [remove_urls(text) for text in corpus]
    corpus = [remove_single_char(text) for text in corpus]
    corpus = [remove_numbers(text) for text in corpus]
    corpus = [remove_rt(text) for text in corpus]
    corpus = [remove_nonalpha(text) for text in corpus]
    corpus = [text.encode("ascii", "ignore") for text in corpus]
    corpus = [text.decode() for text in corpus]
    corpus = [remove_extra_spaces(text) for text in corpus]
    corpus = strip_list_noempty(corpus)
    
    processed_texts = [text for text in nlp.pipe(corpus, 
                                              disable=["ner",
                                                       "parser"])]
    tokenized_texts = [[word.lemma_ for word in lemma_ if not word.is_punct if not word.is_stop] 
                            for lemma_ in processed_texts]
    tokens = make_n_grams(tokenized_texts)
    return tokens

def penn_to_wn(tag):
    if tag.startswith('J'):
        return wn.ADJ
    elif tag.startswith('N'):
        return wn.NOUN
    elif tag.startswith('R'):
        return wn.ADV
    elif tag.startswith('V'):
        return wn.VERB
    return None

lemmatizer = nltk.WordNetLemmatizer()

def get_sentiment(word,tag):
    wn_tag = penn_to_wn(tag)
    if wn_tag not in (wn.NOUN, wn.ADJ, wn.ADV):
        return []

    lemma = lemmatizer.lemmatize(word, pos=wn_tag)
    if not lemma:
        return []

    synsets = wn.synsets(word, pos=wn_tag)
    if not synsets:
        return []

    synset = synsets[0]
    swn_synset = swn.senti_synset(synset.name())
    return [swn_synset.pos_score(),swn_synset.neg_score(),swn_synset.obj_score()]

data = data_train[data_train.label == "fake"]["tweet"]
data["text_n"] = data.str.replace('[^\w\s]',' ')
data['tokens']= data['text_n'].apply(pre_processer_SENT)

word_list=[]
for i in range(len(data['tokens'])):
    word_list.append([])

import numbers
for i in range(len(data['tokens'])): 
    for word in data['tokens'][i]:
        if word[:].isalpha():
            word_list[i].append(word[:])

tagged_tokens=[]
for token in word_list:
    tagged_tokens.append(nltk.pos_tag(token))
    
senti_val=[]
for i in range(len(tagged_tokens)):
    t=tuple(get_sentiment(x,y) for x,y in tagged_tokens[i])
    values= [x for x in t if x]
    senti_val.append(values)
data['value']=senti_val

calc=[]
#iterating over each sentence 
for i in range(len(senti_val)):
    # subtract positive and negative score of each token in the sentence 
    scores = [token_score[0] - token_score[1] for token_score in senti_val[i]]
    # if you want to sum up all the score within the sentence
    # scores = sum(scores) 
    print(scores)
    calc.append(scores)

AttributeError: 'list' object has no attribute 'isalpha'

In [83]:
playlists[1]

['Multiple Facebook posts claim Aussies fined found talking conspiracies [COVID-19]. posts included screenshot segment Australian breakfast television evidence claim.',
 'NYT invented video doctor fighting hospital.',
 'RT EllenCutch: Coronavirus misinformation moving offline. reddit user posted flyer site told delive',
 'Multiple posts shared thousands times Facebook Twitter YouTube claim salt effective remedy novel coronavirus.',
 "video shows Italian doctor says died Italy it's dangerous flu.",
 "Leaked Report Says 'Too Humans' Planet globalwarming conspiracy",
 'Video shows infected patient COVID-19 Dhanbad Hospital.',
 "photo funeral recently deceased Sri Lankan politician Arumugam Thondaman shared thousands times multiple Facebook posts. posts claim image shows Thondamans daughters violated Sri Lanka's mandatory quarantine policy travelers attending ceremony shortly returning country overseas.",
 'photo shows people infected lying sidewalk China.',
 'Man beaten Deoria India lockd

In [102]:
def pre_processer_SENT(corpus):
    corpus = [remove_urls(text) for text in corpus]
    corpus = [remove_single_char(text) for text in corpus]
    corpus = [remove_numbers(text) for text in corpus]
    corpus = [remove_rt(text) for text in corpus]
    corpus = [remove_nonalpha(text) for text in corpus]
    corpus = [text.encode("ascii", "ignore") for text in corpus]
    corpus = [text.decode() for text in corpus]
    corpus = [remove_extra_spaces(text) for text in corpus]
    corpus = strip_list_noempty(corpus)
    
    data = pd.DataFrame(corpus)
    data.columns = ["Tweet"]

    return data

In [107]:
data = pre_processer_SENT(playlists[1])

In [108]:
data

Unnamed: 0,Tweet
0,Multiple Facebook posts claim Aussies fined found talking conspiracies [COVID-19]. posts included screenshot segment Australian breakfast television evidence claim.
1,NYT invented video doctor fighting hospital.
2,RT EllenCutch: Coronavirus misinformation moving offline. reddit user posted flyer site told delive
3,Multiple posts shared thousands times Facebook Twitter YouTube claim salt effective remedy novel coronavirus.
4,video shows Italian doctor says died Italy it's dangerous flu.
...,...
588,Boris Johnson shopping England time coronavirus.
589,video viewed thousands times multiple posts Facebook Twitter claim shows President Donald Trump tested positive novel coronavirus.
590,Central Park hospital tents housed thousands abused children released underground captivity.
591,men women clamped wooden plank claim people Thailand punished disobeying lockdown stepping houses.


In [109]:
# Getting nlp from spacy.load
#nlp=spacy.load('en')
# Making the function to get the sentiments out of the dataframe
def get_sentiment(data, name):
    count=1
    l=len(data)
    positive_sentiments=[]
    negative_sentiments=[]
    for tex in data[name]:
        print('The current status is :',count*100/l,'%')
        tex=nlp(tex)
        noun=[]
        verb=[]
        adj=[]
        adv=[]
        for i in tex :
            if i.pos_=='NOUN':
                noun.append(i)
            elif i.pos_ =='ADJ':
                adj.append(i)
            elif i.pos_ =='VERB':
                verb.append(i)
            elif i.pos_=='ADV':
                adv.append(i)
        clear_output(wait=True)
        count+=1
        neg_score=[]
        pos_score=[]
        for i in tex :
            try:
                if i in noun:
                    x=swn.senti_synset(str(i)+'.n.01')
                    neg_score.append(x.neg_score())
                    pos_score.append(x.pos_score())
                elif i in adj:
                    x=swn.senti_synset(str(i)+'.a.02')
                    neg_score.append(x.neg_score())
                    pos_score.append(x.pos_score())
                elif i in adv :
                    x=swn.senti_synset(str(i)+'.r.02')
                    neg_score.append(x.neg_score())
                    pos_score.append(x.pos_score())
                elif i in verb :
                    x=swn.senti_synset(str(i)+'.v.02')
                    neg_score.append(x.neg_score())
                    pos_score.append(x.pos_score())

            except:
                pass
        positive_sentiments.append(np.mean(pos_score))
        negative_sentiments.append(np.mean(neg_score))

    data['Positive Sentiment']=positive_sentiments
    data['Negative Sentiment']=negative_sentiments

In [110]:
from IPython.display import clear_output

import seaborn as sns
import matplotlib.pyplot as plt
from wordcloud import WordCloud

get_sentiment(data, "Tweet")

The current status is : 100.0 %


In [113]:
overall=[]
for i in range(len(data)):
    if data['Positive Sentiment'][i]>data['Negative Sentiment'][i]:
        overall.append('Positive')
    elif data['Positive Sentiment'][i]<data['Negative Sentiment'][i]:
        overall.append('Negative')
    else:
        overall.append('Neutral')
data['Overall Sentiment']=overall

In [112]:
data.head()

Unnamed: 0,Tweet,Positive Sentiment,Negative Sentiment,Overall Sentiment
0,Multiple Facebook posts claim Aussies fined found talking conspiracies [COVID-19]. posts included screenshot segment Australian breakfast television evidence claim.,0.017857,0.0,Positive
1,NYT invented video doctor fighting hospital.,0.083333,0.0,Positive
2,RT EllenCutch: Coronavirus misinformation moving offline. reddit user posted flyer site told delive,0.03125,0.09375,Negative
3,Multiple posts shared thousands times Facebook Twitter YouTube claim salt effective remedy novel coronavirus.,0.208333,0.020833,Positive
4,video shows Italian doctor says died Italy it's dangerous flu.,0.15625,0.1875,Negative
