In [1]:
import pandas as pd

data = pd.read_csv('title_abstract.csv', error_bad_lines=False);
data_text = data[['text']]
data_text['index'] = data_text.index
data_text['paper_id'] = data['paperId']
documents = data_text

In [2]:
len(documents)

3982

In [3]:
documents[:5]

Unnamed: 0,text,index
0,A strategy for managing content complexity in ...,0
1,Efficient passage ranking for document databas...,1
2,The aditi deductive database system:Deductive ...,2
3,Housekeeping for prefix coding:We consider the...,3
4,Memory efficient ranking:Fast and effective ra...,4


### Data Preprocessing

In [3]:
import gensim
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS
from nltk.stem import WordNetLemmatizer, SnowballStemmer
from nltk.stem.porter import *
import numpy as np
np.random.seed(2019)

In [4]:
import nltk
# nltk.download('wordnet')

#### Lemmatize example

In [5]:
print(WordNetLemmatizer().lemmatize('went', pos='v'))

go


#### Stemmer Example

In [6]:
stemmer = SnowballStemmer('english')
original_words = ['caresses', 'flies', 'dies', 'mules', 'denied','died', 'agreed', 'owned', 
           'humbled', 'sized','meeting', 'stating', 'siezing', 'itemization','sensational', 
           'traditional', 'reference', 'colonizer','plotted']
singles = [stemmer.stem(plural) for plural in original_words]
pd.DataFrame(data = {'original word': original_words, 'stemmed': singles})

Unnamed: 0,original word,stemmed
0,caresses,caress
1,flies,fli
2,dies,die
3,mules,mule
4,denied,deni
5,died,die
6,agreed,agre
7,owned,own
8,humbled,humbl
9,sized,size


In [7]:
def lemmatize_stemming(text):
    return stemmer.stem(WordNetLemmatizer().lemmatize(text, pos='v'))

def preprocess(text):
    result = []
    for token in gensim.utils.simple_preprocess(text):
        if token not in gensim.parsing.preprocessing.STOPWORDS and len(token) > 3:
            result.append(lemmatize_stemming(token))
    return result

In [9]:
doc_sample = documents[documents['index'] == 200].values[0][0]

print('original document: ')
words = []
for word in doc_sample.split(' '):
    words.append(word)
print(words)
print('\n\n tokenized and lemmatized document: ')
print(preprocess(doc_sample))

original document: 
['Using', 'emerging', 'patterns', 'and', 'decision', 'trees', 'in', 'rare-class', 'classification:The', 'problem', 'of', 'classifying', 'rarely', 'occurring', 'cases', 'is', 'faced', 'in', 'many', 'real', 'life', 'applications.', 'The', 'scarcity', 'of', 'the', 'rare', 'cases', 'makes', 'it', 'difficult', 'to', 'classify', 'them', 'correctly', 'using', 'traditional', 'classifiers.', 'In', 'this', 'paper,', 'we', 'propose', 'a', 'new', 'approach', 'to', 'use', 'emerging', 'patterns', '(EPs)', 'and', 'decision', 'trees', '(DTs)', 'in', 'rare-class', 'classification', '(EPDT).', 'EPs', 'are', 'those', 'itemsets', 'whose', 'supports', 'in', 'one', 'class', 'are', 'significantly', 'higher', 'than', 'their', 'supports', 'in', 'the', 'other', 'classes.', 'EPDT', 'employs', 'the', 'power', 'of', 'EPs', 'to', 'improve', 'the', 'quality', 'of', 'rare-case', 'classification.', 'To', 'achieve', 'this', 'aim,', 'we', 'first', 'introduce', 'the', 'idea', 'of', 'generating', 'new'

In [8]:
processed_docs = documents['text'].map(preprocess)

In [43]:
processed_docs[10:20]

10    [efficient, consumer, response, survey, austra...
11    [binary, interpolative, cod, effective, index,...
12    [empirical, evaluation, cod, methods, multi, s...
13    [fast, algorithm, meld, splay, tree, springer,...
14    [efficient, object, orient, program, prolog, d...
15    [optimal, dynamic, multi, attribute, hash, ran...
16    [determinism, functional, languages, introduct...
17    [efficient, computation, query, stratify, data...
18    [share, groundness, dependencies, logic, progr...
19    [linear, arboricity, linear, arboricity, regul...
Name: text, dtype: object

### Bag of words on the dataset

In [9]:
dictionary = gensim.corpora.Dictionary(processed_docs)

In [58]:
count = 0
for k, v in dictionary.iteritems():
    print(k, v)
    count += 1
    if count > 20:
        break

0 accompani
1 action
2 address
3 advantag
4 algorithm
5 allow
6 anim
7 avail
8 call
9 captur
10 complex
11 content
12 control
13 coordin
14 correspond
15 data
16 descript
17 detail
18 differ
19 dofferem
20 dynam


In [10]:
dictionary.filter_extremes(no_below=15, no_above=0.5, keep_n=100000)

In [11]:
bow_corpus = [dictionary.doc2bow(doc) for doc in processed_docs]
# bow_corpus[0]

In [16]:
bow_doc_200 = bow_corpus[200]

for i in range(len(bow_doc_200)):
    print("Word {} (\"{}\") appears {} time.".format(bow_doc_200[i][0], 
                                                     dictionary[bow_doc_200[i][0]], 
                                                     bow_doc_200[i][1]))

Word 52 ("support") appears 2 time.
Word 82 ("improv") appears 1 time.
Word 89 ("method") appears 1 time.
Word 125 ("power") appears 1 time.
Word 134 ("applic") appears 1 time.
Word 143 ("ieee") appears 1 time.
Word 153 ("problem") appears 1 time.
Word 154 ("propos") appears 1 time.
Word 169 ("experi") appears 1 time.
Word 179 ("make") appears 1 time.
Word 192 ("achiev") appears 1 time.
Word 225 ("generat") appears 1 time.
Word 227 ("idea") appears 1 time.
Word 244 ("signific") appears 1 time.
Word 313 ("difficult") appears 1 time.
Word 368 ("import") appears 1 time.
Word 370 ("introduc") appears 1 time.
Word 382 ("class") appears 6 time.
Word 407 ("emerg") appears 2 time.
Word 414 ("tree") appears 2 time.
Word 416 ("correct") appears 1 time.
Word 460 ("approach") appears 1 time.
Word 473 ("decis") appears 2 time.
Word 557 ("occur") appears 1 time.
Word 582 ("pattern") appears 2 time.
Word 624 ("case") appears 3 time.
Word 630 ("qualiti") appears 1 time.
Word 644 ("instanc") appears 2 

### TF-IDF

In [12]:
from gensim import corpora, models

tfidf = models.TfidfModel(bow_corpus)

In [13]:
corpus_tfidf = tfidf[bow_corpus]

In [14]:
from pprint import pprint

for doc in corpus_tfidf:
    pprint(doc)
    break

[(0, 0.126376808266365),
 (1, 0.07787565055114672),
 (2, 0.0505504023030399),
 (3, 0.07096209915928124),
 (4, 0.1748251902363385),
 (5, 0.04688443567726373),
 (6, 0.4359943602628918),
 (7, 0.05340127524193944),
 (8, 0.061323191701547604),
 (9, 0.06634824688343606),
 (10, 0.15057628619104355),
 (11, 0.14691563847129344),
 (12, 0.11439273585975188),
 (13, 0.09021621346066433),
 (14, 0.16542145878952677),
 (15, 0.04763464920893705),
 (16, 0.09262477331378463),
 (17, 0.07998269523208633),
 (18, 0.032805413275128516),
 (19, 0.0522628878867586),
 (20, 0.09326819024973004),
 (21, 0.03370998865714734),
 (22, 0.11400664596594824),
 (23, 0.20564612977768015),
 (24, 0.23875104747687836),
 (25, 0.10655828035125481),
 (26, 0.10185786188827148),
 (27, 0.11566174420557522),
 (28, 0.08111274960634392),
 (29, 0.08318625106719792),
 (30, 0.1078647813944015),
 (31, 0.052939184698755956),
 (32, 0.2512147198997714),
 (33, 0.06634824688343606),
 (34, 0.1514966249528634),
 (35, 0.10233543592587072),
 (36, 0.

### Running LDA using Bag of Words

In [15]:
lda_model = gensim.models.LdaMulticore(bow_corpus, num_topics=20, id2word=dictionary, passes=2, workers=2)

In [73]:
for idx, topic in lda_model.print_topics(-1,50):
    print('Topic: {} \nWords: {}'.format(idx, topic))

Topic: 0 
Words: 0.013*"data" + 0.012*"approach" + 0.010*"detect" + 0.010*"event" + 0.009*"base" + 0.009*"time" + 0.008*"program" + 0.008*"analysi" + 0.008*"propos" + 0.008*"applic" + 0.007*"comput" + 0.007*"method" + 0.007*"constraint" + 0.007*"techniqu" + 0.006*"result" + 0.006*"algorithm" + 0.005*"evalu" + 0.005*"problem" + 0.005*"queri" + 0.005*"process" + 0.005*"chang" + 0.005*"stream" + 0.005*"pattern" + 0.005*"execut" + 0.004*"task" + 0.004*"effici" + 0.004*"effect" + 0.004*"larg" + 0.004*"springer" + 0.004*"word" + 0.004*"differ" + 0.004*"model" + 0.004*"inform" + 0.004*"present" + 0.004*"logic" + 0.004*"mine" + 0.004*"graph" + 0.004*"real" + 0.004*"support" + 0.004*"system" + 0.004*"perform" + 0.003*"exist" + 0.003*"relat" + 0.003*"number" + 0.003*"depend" + 0.003*"domain" + 0.003*"import" + 0.003*"abstract" + 0.003*"high" + 0.003*"signific"
Topic: 1 
Words: 0.010*"propos" + 0.009*"agent" + 0.009*"method" + 0.008*"document" + 0.008*"similar" + 0.008*"game" + 0.008*"term" + 0.0

Cool! Can you distinguish different topics using the words in each topic and their corresponding weights?

### Running LDA using TF-IDF

In [16]:
lda_model_tfidf = gensim.models.LdaMulticore(corpus_tfidf, num_topics=15, id2word=dictionary, passes=2, workers=4)

In [17]:
topics = list()
for idx, topic in lda_model_tfidf.print_topics(-1):
    print('Topic: {} Word: {}'.format(idx, topic))
    print()

Topic: 0 Word: 0.005*"cloud" + 0.005*"topic" + 0.005*"data" + 0.004*"network" + 0.004*"document" + 0.004*"user" + 0.004*"energi" + 0.004*"model" + 0.003*"search" + 0.003*"file"

Topic: 1 Word: 0.005*"interact" + 0.004*"cluster" + 0.004*"model" + 0.004*"algorithm" + 0.004*"data" + 0.004*"network" + 0.004*"process" + 0.004*"peer" + 0.003*"social" + 0.003*"method"

Topic: 2 Word: 0.007*"process" + 0.005*"queri" + 0.005*"model" + 0.004*"data" + 0.004*"search" + 0.004*"index" + 0.004*"array" + 0.003*"databas" + 0.003*"user" + 0.003*"mobil"

Topic: 3 Word: 0.008*"cloud" + 0.005*"agent" + 0.005*"energi" + 0.004*"comput" + 0.004*"model" + 0.004*"resourc" + 0.004*"virtual" + 0.004*"public" + 0.004*"applic" + 0.003*"vote"

Topic: 4 Word: 0.010*"queri" + 0.004*"trajectori" + 0.004*"locat" + 0.004*"network" + 0.004*"data" + 0.004*"graph" + 0.004*"model" + 0.004*"user" + 0.004*"measur" + 0.003*"algorithm"

Topic: 5 Word: 0.006*"data" + 0.005*"detect" + 0.005*"stream" + 0.004*"anomali" + 0.004*"netw

In [18]:
topics = list()
for idx, topic in lda_model_tfidf.print_topics(-1,10):
    topics.append({"topic_id":idx,"topic_ref":topic})
    print('Topic: {} Word: {}'.format(idx, topic))

Topic: 0 Word: 0.005*"cloud" + 0.005*"topic" + 0.005*"data" + 0.004*"network" + 0.004*"document" + 0.004*"user" + 0.004*"energi" + 0.004*"model" + 0.003*"search" + 0.003*"file"
Topic: 1 Word: 0.005*"interact" + 0.004*"cluster" + 0.004*"model" + 0.004*"algorithm" + 0.004*"data" + 0.004*"network" + 0.004*"process" + 0.004*"peer" + 0.003*"social" + 0.003*"method"
Topic: 2 Word: 0.007*"process" + 0.005*"queri" + 0.005*"model" + 0.004*"data" + 0.004*"search" + 0.004*"index" + 0.004*"array" + 0.003*"databas" + 0.003*"user" + 0.003*"mobil"
Topic: 3 Word: 0.008*"cloud" + 0.005*"agent" + 0.005*"energi" + 0.004*"comput" + 0.004*"model" + 0.004*"resourc" + 0.004*"virtual" + 0.004*"public" + 0.004*"applic" + 0.003*"vote"
Topic: 4 Word: 0.010*"queri" + 0.004*"trajectori" + 0.004*"locat" + 0.004*"network" + 0.004*"data" + 0.004*"graph" + 0.004*"model" + 0.004*"user" + 0.004*"measur" + 0.003*"algorithm"
Topic: 5 Word: 0.006*"data" + 0.005*"detect" + 0.005*"stream" + 0.004*"anomali" + 0.004*"network" 

{'topic_id': 0, 'topic_ref': '0.007*"sequenc" + 0.004*"cloud" + 0.004*"network" + 0.004*"data" + 0.004*"sensor" + 0.004*"model" + 0.004*"cluster" + 0.003*"word" + 0.003*"method" + 0.003*"protein"'}
{'topic_id': 1, 'topic_ref': '0.005*"technolog" + 0.005*"research" + 0.004*"social" + 0.004*"health" + 0.004*"inform" + 0.004*"design" + 0.004*"data" + 0.004*"secur" + 0.004*"interact" + 0.004*"digit"'}
{'topic_id': 2, 'topic_ref': '0.006*"data" + 0.005*"detect" + 0.005*"imag" + 0.005*"method" + 0.004*"cloud" + 0.004*"cluster" + 0.004*"model" + 0.004*"featur" + 0.004*"retin" + 0.004*"learn"'}
{'topic_id': 3, 'topic_ref': '0.006*"cloud" + 0.005*"servic" + 0.005*"energi" + 0.005*"data" + 0.005*"network" + 0.004*"applic" + 0.004*"resourc" + 0.004*"mobil" + 0.004*"comput" + 0.003*"time"'}
{'topic_id': 4, 'topic_ref': '0.008*"queri" + 0.006*"document" + 0.005*"model" + 0.005*"user" + 0.004*"process" + 0.004*"locat" + 0.004*"algorithm" + 0.004*"collect" + 0.003*"data" + 0.003*"retriev"', 'topic_na

### Classification of the topics

### Performance evaluation by classifying sample document using LDA Bag of Words model

In [19]:
def getTopic(i):    
    print(docText(documents,i))
    for index, score in sorted(lda_model_tfidf[bow_corpus[i]], key=lambda tup: -1*tup[1]):
        topic_ref = lda_model_tfidf.print_topic(index);
        print(score,topic_ref)

In [55]:

def docText(documents,index):
    text = documents[documents['index'] == index].values[0][0]
    return text

def getTopicId(i):    
#     print(docText(documents,i))
    topic_ids = list()
    for index, score in sorted(lda_model_tfidf[bow_corpus[i]], key=lambda tup: -1*tup[1]):
        topic_ref = lda_model_tfidf.print_topic(index);
        for topic in topics:
            if topic_ref == topic["topic_ref"]:
                topic_ids.append(topic["topic_id"])
    return topic_ids

# topic_table = {}
# for i in range(0,len(documents)):
#     topic_ids = getTopicId(i)
#     for j in topic_ids:
#         if j not in topic_table:
#             topic_table[j] = list()
#         else:
#             topic_table[j].append(i)
    

getTopicId(500)

Supporting grid-based clinical trials in Scotland : A computational infrastructure to underpin complex clinical trials and medical population studies is highly desirable. This should allow access to a range of distributed clinical data sets; support the efficient processing and analysis of the data obtained; have security at its heart; and ensure that authorized individuals are able to see privileged data and no more. Each clinical trial has its own requirements on data sets and how they are used; hence a reusable and flexible framework offers many advantages. The MRC funded Virtual Organisations for Trials and Epidemiological Studies (VOTES) is a collaborative project involving several UK universities specifically to explore this space. This article presents the experiences of developing the Scottish component of this nationwide infrastructure, by the National e-Science Centre (NeSC) based at the University of Glasgow, and the issues inherent in accessing and using the clinical data s

[6, 10, 3]

In [30]:
topic_keyword = {}
topic_keyword[0] = ['sensor','mutation','protein', 'sensor network', 'word sense','data', 'mining']
topic_keyword[1] = ['clustering', 'interaction', 'community', 'exertion', 'gesture','process model','graph', 'measure']
topic_keyword[2] = ['health','security', 'social', 'digital election', 'information', 'e-voting', 'internet technology', 'data privacy']
topic_keyword[3] = ['cloud','public display','agent','game play','data center','energy consumption']
topic_keyword[4] = ['query', 'document','location','data compression','search','model','tree','poi']
topic_keyword[5] = ['complementary pair','arithmetic','correlation','optimal']
topic_keyword[6] = ['code','security','clustering','coding','trajectory','privacy','attack','dependency','minimum redundancy','redundancy']
topic_keyword[7] = ['game agent', 'mobile','technology', 'interaction', 'display', 'social', 'workshop', 'device','feedback', 'gesture']
topic_keyword[8] = ['process', 'process model', 'graph', 'health', 'model', 'older', 'adoption', 'child', 'adult', 'older adult', 'clustering', 'protein', 'display', 'event', 'measure']
topic_keyword[9] = ['agent', 'meta', 'xcp', 'max min', 'max', 'min', 'equilibrium', 'template', 'behaviour', 'pedestrian', 'fairness', 'meta model', 'groundness', 'fuzzy', 'fuzzy rule']
topic_keyword[10] = ['resource', 'grid', 'cloud cluster', 'service', 'scheduling', 'optical', 'packet', 'energy', 'optical network', 'network', 'agent', 'allocation', 'core', 'size']
topic_keyword[11] = ['word', 'string', 'scheme', 'deep', 'event', 'file', 'lexicon', 'query', 'forum', 'database', 'binary', 'index', 'retrieval', 'constraint', 'statement']
topic_keyword[12] = ['document', 'compression', 'metric', 'query', 'word relevance', 'text retrieval', 'judgment', 'human', 'visual', 'collection', 'contrast']
topic_keyword[13] = ['event', 'model', 'cloud', 'graph', 'behavioral', 'event log', 'program', 'resource', 'path', 'relation check']
topic_keyword[14] = ['feedback', 'surgical', 'gene', 'feature', 'bone', 'selection', 'feature selection', 'classifier', 'xc', 'training', 'temporal bone', 'simulator']


In [28]:
def getPaperID(index):
    id = documents[documents['index'] == index].values[0][2]
    return id[:-1];

In [57]:
def getTopicInfo(index):
#     for index, score in sorted(lda_model_tfidf[bow_corpus[index]], key=lambda tup: -1*tup[1]):
#         print("\nScore: {}\t \nTopic: {}".format(score, lda_model_tfidf.print_topic(index)))
    topic_ids = getTopicId(index)
    if(len(topic_ids)>3):
        topic_ids = topic_ids[0:2]
    return topic_ids



Comments on "On a novel unsupervised competitive learning algorithm for scalar quantization" : This note proposes an efficient alternative to a recently proposed neural network for designing scalar quantizers. It also points out that the performance measure used is of limited applicability. © 1995 IEEE.


[10, 1, 9]

In [136]:
# from collections import Counter
# keywords_topic = {}
# for i in range(0,15):
#     keywords_topic[i] = Counter()

import couchdb
import csv

couch=couchdb.Server("http://admin:password@localhost:5984")
try:
    database=couch["paperinfo_scopus"]
except:
    print("wrong db name")
    
def addTopic(index):
    id = getPaperID(index)
    doc = database.get(id)
#     todo : adding topic info:
    doc["topic_ids"] = getTopicInfo(index)
    doc = database.save(doc)
    
for i in range(0,len(documents)):
    addTopic(i)


In [131]:
for topic in topics:
    topic["topic_name"] = list()
#     print(topic)

In [66]:
print(bow_corpus[100])

[(15, 1), (42, 2), (48, 2), (67, 1), (84, 1), (96, 1), (103, 1), (143, 1), (160, 1), (193, 1), (199, 1), (204, 2), (262, 3), (312, 1), (317, 1), (348, 2), (349, 1), (414, 1), (435, 4), (436, 1), (456, 1), (483, 1), (545, 1), (678, 1), (864, 2), (918, 1), (939, 1), (940, 1), (968, 1), (970, 2), (1081, 1), (1210, 1), (1211, 1), (1212, 1), (1213, 1)]


In [76]:
for index, score in sorted(lda_model[bow_corpus[1]], key=lambda tup: -1*tup[1]):
    print("\nScore: {}\t \nTopic: {}".format(score, lda_model.print_topic(index, 10)))


Score: 0.38644173741340637	 
Topic: 0.022*"inform" + 0.015*"user" + 0.011*"data" + 0.009*"provid" + 0.009*"research" + 0.008*"evalu" + 0.008*"base" + 0.007*"network" + 0.007*"collect" + 0.007*"search"

Score: 0.3388003408908844	 
Topic: 0.019*"queri" + 0.018*"data" + 0.016*"process" + 0.014*"cloud" + 0.009*"propos" + 0.008*"time" + 0.008*"algorithm" + 0.008*"cost" + 0.008*"effici" + 0.007*"problem"

Score: 0.26342302560806274	 
Topic: 0.010*"propos" + 0.009*"agent" + 0.009*"method" + 0.008*"document" + 0.008*"similar" + 0.008*"game" + 0.008*"term" + 0.007*"effect" + 0.007*"model" + 0.007*"algorithm"


Our test document has the highest probability to be part of the topic on the top.

### Performance evaluation by classifying sample document using LDA TF-IDF model

In [31]:
for index, score in sorted(lda_model_tfidf[bow_corpus[3981]], key=lambda tup: -1*tup[1]):
    print("\nScore: {}\t \nTopic: {}".format(score, lda_model_tfidf.print_topic(index)))


Score: 0.5246488451957703	 
Topic: 0.004*"program" + 0.004*"secur" + 0.004*"agent" + 0.004*"model" + 0.004*"process" + 0.003*"user" + 0.003*"method" + 0.003*"inform" + 0.003*"technolog" + 0.003*"data"

Score: 0.3234825134277344	 
Topic: 0.009*"cloud" + 0.007*"servic" + 0.007*"resourc" + 0.005*"schedul" + 0.005*"workflow" + 0.004*"comput" + 0.004*"model" + 0.004*"cluster" + 0.004*"algorithm" + 0.004*"user"

Score: 0.1436990201473236	 
Topic: 0.005*"technolog" + 0.004*"user" + 0.004*"model" + 0.004*"social" + 0.004*"data" + 0.004*"cloud" + 0.004*"design" + 0.004*"anim" + 0.004*"process" + 0.003*"research"


Our test document has the highest probability to be part of the topic on the top.

### Testing model on unseen document

In [29]:
# unseen_document = "machine learning"
unseen_document = "The use of randomness in the designing of the digital devices has been discussed. Qualities of randomness such as unpredictability, indeterminacy and unexpectedness have been used as a creative resource to generate innovative , output. Randomness is a creative tool to inspire and generate innovative outputs that is a means to an end. The growth of digital interactivity has been accompanied by a increasing amount of interactive that express certain qualities of randomness during use. An emergent approach toward randomness is to allow users to interact directly with the randomness. Shuffle listening, which is an alternative listening mode offered by digital music players, is a more sophisticated approach, whereby application of randomness has publicly captured by imagination of many people. Considerations, in determining where a random feature can be used, should include the types of content, the domain and contexts where these digital devices are used"
bow_vector = dictionary.doc2bow(preprocess(unseen_document))

for index, score in sorted(lda_model_tfidf[bow_vector], key=lambda tup: -1*tup[1]):
    print("Score: {}\t Topic: {}".format(score, lda_model_tfidf.print_topic(index, 5)))

Score: 0.3733212351799011	 Topic: 0.006*"mobile" + 0.005*"data" + 0.004*"social" + 0.004*"privacy" + 0.003*"research"
Score: 0.26307183504104614	 Topic: 0.006*"cloud" + 0.005*"process" + 0.005*"data" + 0.004*"model" + 0.004*"document"
Score: 0.16387544572353363	 Topic: 0.006*"game" + 0.005*"data" + 0.004*"design" + 0.003*"network" + 0.003*"mutations"
Score: 0.11993960291147232	 Topic: 0.005*"cluster" + 0.004*"model" + 0.004*"data" + 0.004*"program" + 0.003*"network"
Score: 0.06462649255990982	 Topic: 0.004*"data" + 0.003*"model" + 0.003*"network" + 0.003*"query" + 0.003*"process"
