In [43]:
from collections import Counter
import random

In [97]:
documents = [
    ["Hadoop", "Big Data", "HBase", "Java", "Spark", "Storm", "Cassandra"],
    ["NoSQL", "MongoDB", "Cassandra", "HBase", "Postgres"],
    ["Python", "scikit-learn", "scipy", "numpy", "statsmodels", "pandas"],
    ["R", "Python", "statistics", "regression", "probability"],
    ["machine learning", "regression", "decision trees", "libsvm"],
    ["Python", "R", "Java", "C++", "Haskell", "programming languages"],
    ["statistics", "probability", "mathematics", "theory"],
    ["machine learning", "scikit-learn", "Mahout", "neural networks"],
    ["neural networks", "deep learning", "Big Data", "artificial intelligence"],
    ["Hadoop", "Java", "MapReduce", "Big Data"],
    ["statistics", "R", "statsmodels"],
    ["C++", "deep learning", "artificial intelligence", "probability"],
    ["pandas", "R", "Python"],
    ["databases", "HBase", "Postgres", "MySQL", "MongoDB"],
    ["libsvm", "regression", "support vector machines"]
]

K=4

In [98]:
def sample_from(weights):
    sum_weights=sum(weights)
    rnd=sum_weights*random.random()
    for i, w in enumerate(weights):
        rnd-=w
        if rnd<=0:
            return i

In [99]:
# a list of Counters, one for each document
document_topic_counts= [Counter() for _ in documents]

In [100]:
#How many times each word is assigned to each topic:
topic_word_counts= [Counter() for _ in range(K)]

In [101]:
#number of words assigned to each topic
topic_counts = [0 for _ in range(K)]

In [102]:
document_lengths=list(map(len, documents))

In [103]:
document_lengths

[7, 5, 6, 5, 4, 6, 4, 4, 4, 4, 3, 4, 3, 5, 3]

In [104]:
distinct_words = [word for document in documents for word in document]

In [105]:
W = len(distinct_words)
W

67

In [106]:
D = len(documents)

In [107]:
def p_topic_given_document(topic, d, alpha=0.1):
        """the fraction of words in document _d_
    that are assigned to _topic_ (plus some smoothing)"""
        return (document_topic_counts[d][topic]+alpha)/(document_lengths[d]+K*alpha)
        

In [108]:
def p_word_given_topic(word, topic, beta=0.1):
    """the fraction of words assigned to _topic_
    that equal _word_ (plus some smoothing)"""
    return (topic_word_counts[topic][word]+beta)/(topic_counts[topic]+W*beta)

In [109]:
def topic_weight(d, word, k):
    """given a document and a word in that document,
    return the weight for the kth topic"""
    return p_topic_given_document(k, d)*p_word_given_topic(word, k)

In [110]:
def choose_new_topic(d, word):
    return sample_from([topic_weight(d, word, k) for k in range(K)])

In [111]:
#initiate
random.seed(0)
document_topics = [[random.randrange(K) for word in document] for document in documents]

In [112]:
for d in range(D):
    for word, topic in zip(documents[d], document_topics[d]):
        document_topic_counts[d][topic]+=1
        topic_counts[topic] += 1
        topic_word_counts[topic][word] += 1

In [113]:
for _ in range(1000):
    for d in range(D):
        for i, (word, topic) in enumerate(zip(documents[d], document_topics[d])):
            document_lengths[d]-=1
            document_topic_counts[d][topic]-=1
            topic_counts[topic]-=1
            topic_word_counts[topic][word]-=1
            
            new_topic=choose_new_topic(d, word)
            document_topics[d][i]=new_topic
            print(i)
            print("docu "+ str(d))
            print("topic "+str(topic))
            print("word "+str(word))
            print("new topic "+str(new_topic))
            
            document_lengths[d]+=1
            document_topic_counts[d][new_topic]+=1
            topic_counts[new_topic]+=1
            topic_word_counts[new_topic][word]+=1
            

0
docu 0
topic 3
word Hadoop
new topic 0
1
docu 0
topic 3
word Big Data
new topic 3
2
docu 0
topic 0
word HBase
new topic 2
3
docu 0
topic 2
word Java
new topic 0
4
docu 0
topic 3
word Spark
new topic 0
5
docu 0
topic 3
word Storm
new topic 0
6
docu 0
topic 2
word Cassandra
new topic 2
0
docu 1
topic 3
word NoSQL
new topic 2
1
docu 1
topic 2
word MongoDB
new topic 1
2
docu 1
topic 1
word Cassandra
new topic 2
3
docu 1
topic 1
word HBase
new topic 2
4
docu 1
topic 2
word Postgres
new topic 1
0
docu 2
topic 1
word Python
new topic 2
1
docu 2
topic 0
word scikit-learn
new topic 0
2
docu 2
topic 2
word scipy
new topic 2
3
docu 2
topic 1
word numpy
new topic 2
4
docu 2
topic 2
word statsmodels
new topic 0
5
docu 2
topic 0
word pandas
new topic 0
0
docu 3
topic 0
word R
new topic 2
1
docu 3
topic 2
word Python
new topic 2
2
docu 3
topic 3
word statistics
new topic 2
3
docu 3
topic 0
word regression
new topic 2
4
docu 3
topic 2
word probability
new topic 2
0
docu 4
topic 3
word machine learni

new topic 1
0
docu 8
topic 1
word neural networks
new topic 1
1
docu 8
topic 1
word deep learning
new topic 0
2
docu 8
topic 0
word Big Data
new topic 0
3
docu 8
topic 1
word artificial intelligence
new topic 3
0
docu 9
topic 0
word Hadoop
new topic 0
1
docu 9
topic 0
word Java
new topic 0
2
docu 9
topic 0
word MapReduce
new topic 0
3
docu 9
topic 0
word Big Data
new topic 0
0
docu 10
topic 2
word statistics
new topic 2
1
docu 10
topic 2
word R
new topic 2
2
docu 10
topic 2
word statsmodels
new topic 2
0
docu 11
topic 3
word C++
new topic 3
1
docu 11
topic 3
word deep learning
new topic 3
2
docu 11
topic 3
word artificial intelligence
new topic 3
3
docu 11
topic 3
word probability
new topic 3
0
docu 12
topic 2
word pandas
new topic 2
1
docu 12
topic 2
word R
new topic 2
2
docu 12
topic 2
word Python
new topic 2
0
docu 13
topic 1
word databases
new topic 1
1
docu 13
topic 1
word HBase
new topic 1
2
docu 13
topic 1
word Postgres
new topic 1
3
docu 13
topic 1
word MySQL
new topic 1
4
docu

docu 1
topic 1
word Cassandra
new topic 1
3
docu 1
topic 1
word HBase
new topic 1
4
docu 1
topic 1
word Postgres
new topic 1
0
docu 2
topic 2
word Python
new topic 2
1
docu 2
topic 2
word scikit-learn
new topic 2
2
docu 2
topic 2
word scipy
new topic 2
3
docu 2
topic 2
word numpy
new topic 2
4
docu 2
topic 2
word statsmodels
new topic 2
5
docu 2
topic 2
word pandas
new topic 2
0
docu 3
topic 2
word R
new topic 2
1
docu 3
topic 2
word Python
new topic 2
2
docu 3
topic 2
word statistics
new topic 3
3
docu 3
topic 2
word regression
new topic 3
4
docu 3
topic 3
word probability
new topic 3
0
docu 4
topic 0
word machine learning
new topic 0
1
docu 4
topic 0
word regression
new topic 0
2
docu 4
topic 0
word decision trees
new topic 0
3
docu 4
topic 0
word libsvm
new topic 0
0
docu 5
topic 2
word Python
new topic 2
1
docu 5
topic 2
word R
new topic 2
2
docu 5
topic 1
word Java
new topic 2
3
docu 5
topic 3
word C++
new topic 2
4
docu 5
topic 2
word Haskell
new topic 2
5
docu 5
topic 0
word pro

2
docu 2
topic 2
word scipy
new topic 1
3
docu 2
topic 2
word numpy
new topic 2
4
docu 2
topic 2
word statsmodels
new topic 2
5
docu 2
topic 2
word pandas
new topic 2
0
docu 3
topic 2
word R
new topic 2
1
docu 3
topic 2
word Python
new topic 2
2
docu 3
topic 2
word statistics
new topic 2
3
docu 3
topic 0
word regression
new topic 0
4
docu 3
topic 0
word probability
new topic 0
0
docu 4
topic 0
word machine learning
new topic 0
1
docu 4
topic 0
word regression
new topic 0
2
docu 4
topic 0
word decision trees
new topic 0
3
docu 4
topic 0
word libsvm
new topic 3
0
docu 5
topic 2
word Python
new topic 2
1
docu 5
topic 2
word R
new topic 2
2
docu 5
topic 2
word Java
new topic 2
3
docu 5
topic 2
word C++
new topic 2
4
docu 5
topic 2
word Haskell
new topic 2
5
docu 5
topic 2
word programming languages
new topic 2
0
docu 6
topic 3
word statistics
new topic 3
1
docu 6
topic 3
word probability
new topic 3
2
docu 6
topic 3
word mathematics
new topic 3
3
docu 6
topic 3
word theory
new topic 3
0
do

2
docu 5
topic 3
word Java
new topic 3
3
docu 5
topic 3
word C++
new topic 3
4
docu 5
topic 3
word Haskell
new topic 2
5
docu 5
topic 3
word programming languages
new topic 3
0
docu 6
topic 2
word statistics
new topic 2
1
docu 6
topic 2
word probability
new topic 2
2
docu 6
topic 2
word mathematics
new topic 2
3
docu 6
topic 2
word theory
new topic 2
0
docu 7
topic 0
word machine learning
new topic 0
1
docu 7
topic 0
word scikit-learn
new topic 0
2
docu 7
topic 0
word Mahout
new topic 0
3
docu 7
topic 0
word neural networks
new topic 0
0
docu 8
topic 3
word neural networks
new topic 3
1
docu 8
topic 3
word deep learning
new topic 3
2
docu 8
topic 3
word Big Data
new topic 3
3
docu 8
topic 3
word artificial intelligence
new topic 3
0
docu 9
topic 1
word Hadoop
new topic 1
1
docu 9
topic 1
word Java
new topic 3
2
docu 9
topic 1
word MapReduce
new topic 3
3
docu 9
topic 3
word Big Data
new topic 3
0
docu 10
topic 2
word statistics
new topic 2
1
docu 10
topic 2
word R
new topic 2
2
docu 10

new topic 3
2
docu 0
topic 1
word HBase
new topic 1
3
docu 0
topic 3
word Java
new topic 3
4
docu 0
topic 1
word Spark
new topic 3
5
docu 0
topic 3
word Storm
new topic 3
6
docu 0
topic 1
word Cassandra
new topic 1
0
docu 1
topic 1
word NoSQL
new topic 1
1
docu 1
topic 1
word MongoDB
new topic 1
2
docu 1
topic 1
word Cassandra
new topic 1
3
docu 1
topic 1
word HBase
new topic 1
4
docu 1
topic 1
word Postgres
new topic 1
0
docu 2
topic 2
word Python
new topic 2
1
docu 2
topic 2
word scikit-learn
new topic 2
2
docu 2
topic 2
word scipy
new topic 2
3
docu 2
topic 2
word numpy
new topic 2
4
docu 2
topic 2
word statsmodels
new topic 2
5
docu 2
topic 2
word pandas
new topic 2
0
docu 3
topic 2
word R
new topic 0
1
docu 3
topic 2
word Python
new topic 2
2
docu 3
topic 2
word statistics
new topic 0
3
docu 3
topic 0
word regression
new topic 0
4
docu 3
topic 0
word probability
new topic 0
0
docu 4
topic 0
word machine learning
new topic 0
1
docu 4
topic 0
word regression
new topic 0
2
docu 4
top

2
docu 7
topic 3
word Mahout
new topic 0
3
docu 7
topic 0
word neural networks
new topic 0
0
docu 8
topic 0
word neural networks
new topic 0
1
docu 8
topic 0
word deep learning
new topic 0
2
docu 8
topic 0
word Big Data
new topic 3
3
docu 8
topic 0
word artificial intelligence
new topic 0
0
docu 9
topic 3
word Hadoop
new topic 3
1
docu 9
topic 3
word Java
new topic 3
2
docu 9
topic 3
word MapReduce
new topic 3
3
docu 9
topic 3
word Big Data
new topic 3
0
docu 10
topic 2
word statistics
new topic 2
1
docu 10
topic 2
word R
new topic 2
2
docu 10
topic 2
word statsmodels
new topic 2
0
docu 11
topic 0
word C++
new topic 1
1
docu 11
topic 0
word deep learning
new topic 0
2
docu 11
topic 0
word artificial intelligence
new topic 0
3
docu 11
topic 0
word probability
new topic 1
0
docu 12
topic 2
word pandas
new topic 2
1
docu 12
topic 2
word R
new topic 2
2
docu 12
topic 2
word Python
new topic 2
0
docu 13
topic 1
word databases
new topic 1
1
docu 13
topic 1
word HBase
new topic 1
2
docu 13
to

5
docu 5
topic 1
word programming languages
new topic 2
0
docu 6
topic 2
word statistics
new topic 2
1
docu 6
topic 2
word probability
new topic 2
2
docu 6
topic 0
word mathematics
new topic 2
3
docu 6
topic 0
word theory
new topic 2
0
docu 7
topic 0
word machine learning
new topic 0
1
docu 7
topic 0
word scikit-learn
new topic 0
2
docu 7
topic 0
word Mahout
new topic 1
3
docu 7
topic 0
word neural networks
new topic 0
0
docu 8
topic 3
word neural networks
new topic 3
1
docu 8
topic 3
word deep learning
new topic 3
2
docu 8
topic 3
word Big Data
new topic 3
3
docu 8
topic 3
word artificial intelligence
new topic 3
0
docu 9
topic 2
word Hadoop
new topic 3
1
docu 9
topic 1
word Java
new topic 1
2
docu 9
topic 1
word MapReduce
new topic 3
3
docu 9
topic 3
word Big Data
new topic 3
0
docu 10
topic 2
word statistics
new topic 2
1
docu 10
topic 2
word R
new topic 2
2
docu 10
topic 2
word statsmodels
new topic 2
0
docu 11
topic 2
word C++
new topic 2
1
docu 11
topic 3
word deep learning
new t

new topic 0
3
docu 4
topic 0
word libsvm
new topic 0
0
docu 5
topic 2
word Python
new topic 2
1
docu 5
topic 2
word R
new topic 2
2
docu 5
topic 2
word Java
new topic 2
3
docu 5
topic 2
word C++
new topic 2
4
docu 5
topic 2
word Haskell
new topic 2
5
docu 5
topic 2
word programming languages
new topic 2
0
docu 6
topic 2
word statistics
new topic 3
1
docu 6
topic 2
word probability
new topic 2
2
docu 6
topic 3
word mathematics
new topic 3
3
docu 6
topic 3
word theory
new topic 3
0
docu 7
topic 0
word machine learning
new topic 0
1
docu 7
topic 0
word scikit-learn
new topic 0
2
docu 7
topic 0
word Mahout
new topic 0
3
docu 7
topic 0
word neural networks
new topic 0
0
docu 8
topic 0
word neural networks
new topic 2
1
docu 8
topic 2
word deep learning
new topic 1
2
docu 8
topic 3
word Big Data
new topic 1
3
docu 8
topic 2
word artificial intelligence
new topic 1
0
docu 9
topic 3
word Hadoop
new topic 3
1
docu 9
topic 3
word Java
new topic 3
2
docu 9
topic 3
word MapReduce
new topic 3
3
doc

word machine learning
new topic 0
1
docu 4
topic 0
word regression
new topic 0
2
docu 4
topic 0
word decision trees
new topic 0
3
docu 4
topic 0
word libsvm
new topic 0
0
docu 5
topic 0
word Python
new topic 2
1
docu 5
topic 2
word R
new topic 2
2
docu 5
topic 2
word Java
new topic 2
3
docu 5
topic 2
word C++
new topic 2
4
docu 5
topic 0
word Haskell
new topic 2
5
docu 5
topic 0
word programming languages
new topic 2
0
docu 6
topic 2
word statistics
new topic 2
1
docu 6
topic 2
word probability
new topic 2
2
docu 6
topic 2
word mathematics
new topic 2
3
docu 6
topic 2
word theory
new topic 2
0
docu 7
topic 0
word machine learning
new topic 1
1
docu 7
topic 0
word scikit-learn
new topic 0
2
docu 7
topic 1
word Mahout
new topic 1
3
docu 7
topic 1
word neural networks
new topic 1
0
docu 8
topic 1
word neural networks
new topic 1
1
docu 8
topic 1
word deep learning
new topic 1
2
docu 8
topic 3
word Big Data
new topic 2
3
docu 8
topic 1
word artificial intelligence
new topic 1
0
docu 9
topi

topic 0
word pandas
new topic 2
1
docu 12
topic 2
word R
new topic 2
2
docu 12
topic 2
word Python
new topic 2
0
docu 13
topic 1
word databases
new topic 2
1
docu 13
topic 1
word HBase
new topic 1
2
docu 13
topic 1
word Postgres
new topic 1
3
docu 13
topic 0
word MySQL
new topic 2
4
docu 13
topic 1
word MongoDB
new topic 1
0
docu 14
topic 3
word libsvm
new topic 0
1
docu 14
topic 0
word regression
new topic 0
2
docu 14
topic 0
word support vector machines
new topic 0
0
docu 0
topic 3
word Hadoop
new topic 3
1
docu 0
topic 1
word Big Data
new topic 1
2
docu 0
topic 1
word HBase
new topic 1
3
docu 0
topic 3
word Java
new topic 3
4
docu 0
topic 3
word Spark
new topic 3
5
docu 0
topic 3
word Storm
new topic 1
6
docu 0
topic 3
word Cassandra
new topic 1
0
docu 1
topic 1
word NoSQL
new topic 1
1
docu 1
topic 1
word MongoDB
new topic 1
2
docu 1
topic 3
word Cassandra
new topic 1
3
docu 1
topic 1
word HBase
new topic 1
4
docu 1
topic 1
word Postgres
new topic 1
0
docu 2
topic 0
word Python
new

new topic 0
3
docu 4
topic 0
word libsvm
new topic 0
0
docu 5
topic 2
word Python
new topic 2
1
docu 5
topic 2
word R
new topic 2
2
docu 5
topic 1
word Java
new topic 1
3
docu 5
topic 2
word C++
new topic 1
4
docu 5
topic 1
word Haskell
new topic 1
5
docu 5
topic 1
word programming languages
new topic 1
0
docu 6
topic 2
word statistics
new topic 2
1
docu 6
topic 2
word probability
new topic 2
2
docu 6
topic 2
word mathematics
new topic 2
3
docu 6
topic 2
word theory
new topic 2
0
docu 7
topic 0
word machine learning
new topic 0
1
docu 7
topic 0
word scikit-learn
new topic 0
2
docu 7
topic 0
word Mahout
new topic 1
3
docu 7
topic 0
word neural networks
new topic 0
0
docu 8
topic 0
word neural networks
new topic 0
1
docu 8
topic 3
word deep learning
new topic 1
2
docu 8
topic 3
word Big Data
new topic 3
3
docu 8
topic 3
word artificial intelligence
new topic 1
0
docu 9
topic 3
word Hadoop
new topic 3
1
docu 9
topic 3
word Java
new topic 3
2
docu 9
topic 3
word MapReduce
new topic 3
3
doc

word support vector machines
new topic 0
0
docu 0
topic 1
word Hadoop
new topic 1
1
docu 0
topic 1
word Big Data
new topic 1
2
docu 0
topic 1
word HBase
new topic 3
3
docu 0
topic 1
word Java
new topic 1
4
docu 0
topic 1
word Spark
new topic 1
5
docu 0
topic 3
word Storm
new topic 3
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 2
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 2
word Python
new topic 2
1
docu 2
topic 0
word scikit-learn
new topic 2
2
docu 2
topic 2
word scipy
new topic 2
3
docu 2
topic 2
word numpy
new topic 2
4
docu 2
topic 2
word statsmodels
new topic 2
5
docu 2
topic 2
word pandas
new topic 2
0
docu 3
topic 2
word R
new topic 2
1
docu 3
topic 2
word Python
new topic 2
2
docu 3
topic 2
word statistics
new topic 2
3
docu 3
topic 0
word regression
new topic 0
4
docu 3
topic 2
word probability
new top

2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 2
word Python
new topic 0
1
docu 2
topic 0
word scikit-learn
new topic 0
2
docu 2
topic 0
word scipy
new topic 0
3
docu 2
topic 0
word numpy
new topic 0
4
docu 2
topic 0
word statsmodels
new topic 2
5
docu 2
topic 2
word pandas
new topic 0
0
docu 3
topic 2
word R
new topic 2
1
docu 3
topic 2
word Python
new topic 2
2
docu 3
topic 2
word statistics
new topic 2
3
docu 3
topic 2
word regression
new topic 0
4
docu 3
topic 2
word probability
new topic 2
0
docu 4
topic 0
word machine learning
new topic 0
1
docu 4
topic 0
word regression
new topic 3
2
docu 4
topic 0
word decision trees
new topic 0
3
docu 4
topic 0
word libsvm
new topic 0
0
docu 5
topic 2
word Python
new topic 2
1
docu 5
topic 2
word R
new topic 2
2
docu 5
topic 1
word Java
new topic 2
3
docu 5
topic 2
word C++
new topic 2
4
docu 5
topic 2
word Haskell
new topic 2
5
docu 5
topic 2
word p

0
docu 6
topic 2
word statistics
new topic 2
1
docu 6
topic 2
word probability
new topic 2
2
docu 6
topic 2
word mathematics
new topic 2
3
docu 6
topic 2
word theory
new topic 2
0
docu 7
topic 0
word machine learning
new topic 0
1
docu 7
topic 0
word scikit-learn
new topic 0
2
docu 7
topic 0
word Mahout
new topic 0
3
docu 7
topic 0
word neural networks
new topic 0
0
docu 8
topic 0
word neural networks
new topic 0
1
docu 8
topic 0
word deep learning
new topic 0
2
docu 8
topic 0
word Big Data
new topic 0
3
docu 8
topic 0
word artificial intelligence
new topic 0
0
docu 9
topic 1
word Hadoop
new topic 1
1
docu 9
topic 1
word Java
new topic 1
2
docu 9
topic 1
word MapReduce
new topic 1
3
docu 9
topic 1
word Big Data
new topic 1
0
docu 10
topic 2
word statistics
new topic 2
1
docu 10
topic 2
word R
new topic 2
2
docu 10
topic 1
word statsmodels
new topic 1
0
docu 11
topic 0
word C++
new topic 0
1
docu 11
topic 0
word deep learning
new topic 0
2
docu 11
topic 0
word artificial intelligence
ne

docu 6
topic 2
word theory
new topic 2
0
docu 7
topic 1
word machine learning
new topic 0
1
docu 7
topic 2
word scikit-learn
new topic 2
2
docu 7
topic 1
word Mahout
new topic 1
3
docu 7
topic 0
word neural networks
new topic 0
0
docu 8
topic 0
word neural networks
new topic 0
1
docu 8
topic 0
word deep learning
new topic 0
2
docu 8
topic 0
word Big Data
new topic 0
3
docu 8
topic 0
word artificial intelligence
new topic 0
0
docu 9
topic 1
word Hadoop
new topic 1
1
docu 9
topic 1
word Java
new topic 1
2
docu 9
topic 1
word MapReduce
new topic 1
3
docu 9
topic 1
word Big Data
new topic 1
0
docu 10
topic 2
word statistics
new topic 2
1
docu 10
topic 2
word R
new topic 2
2
docu 10
topic 2
word statsmodels
new topic 2
0
docu 11
topic 0
word C++
new topic 0
1
docu 11
topic 0
word deep learning
new topic 0
2
docu 11
topic 0
word artificial intelligence
new topic 0
3
docu 11
topic 0
word probability
new topic 0
0
docu 12
topic 2
word pandas
new topic 2
1
docu 12
topic 2
word R
new topic 2
2
d

topic 1
word regression
new topic 1
2
docu 14
topic 1
word support vector machines
new topic 3
0
docu 0
topic 1
word Hadoop
new topic 1
1
docu 0
topic 1
word Big Data
new topic 1
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 1
word Java
new topic 1
4
docu 0
topic 1
word Spark
new topic 1
5
docu 0
topic 1
word Storm
new topic 3
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 2
word Python
new topic 2
1
docu 2
topic 2
word scikit-learn
new topic 2
2
docu 2
topic 2
word scipy
new topic 2
3
docu 2
topic 2
word numpy
new topic 2
4
docu 2
topic 2
word statsmodels
new topic 2
5
docu 2
topic 2
word pandas
new topic 2
0
docu 3
topic 2
word R
new topic 2
1
docu 3
topic 2
word Python
new topic 2
2
docu 3
topic 2
word statistics
new topic 2
3
docu 3
topic 2
word regression

new topic 2
1
docu 12
topic 2
word R
new topic 2
2
docu 12
topic 2
word Python
new topic 2
0
docu 13
topic 3
word databases
new topic 3
1
docu 13
topic 3
word HBase
new topic 3
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13
topic 3
word MySQL
new topic 3
4
docu 13
topic 3
word MongoDB
new topic 3
0
docu 14
topic 1
word libsvm
new topic 1
1
docu 14
topic 1
word regression
new topic 1
2
docu 14
topic 1
word support vector machines
new topic 1
0
docu 0
topic 3
word Hadoop
new topic 3
1
docu 0
topic 0
word Big Data
new topic 3
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 3
word Java
new topic 3
4
docu 0
topic 3
word Spark
new topic 3
5
docu 0
topic 3
word Storm
new topic 3
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 2
word Python
new topic 2
1
docu 2
to

1
docu 0
topic 3
word Big Data
new topic 3
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 3
word Java
new topic 3
4
docu 0
topic 3
word Spark
new topic 3
5
docu 0
topic 3
word Storm
new topic 3
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 2
word Python
new topic 2
1
docu 2
topic 2
word scikit-learn
new topic 2
2
docu 2
topic 2
word scipy
new topic 2
3
docu 2
topic 1
word numpy
new topic 2
4
docu 2
topic 2
word statsmodels
new topic 2
5
docu 2
topic 2
word pandas
new topic 2
0
docu 3
topic 2
word R
new topic 1
1
docu 3
topic 2
word Python
new topic 2
2
docu 3
topic 1
word statistics
new topic 1
3
docu 3
topic 2
word regression
new topic 2
4
docu 3
topic 1
word probability
new topic 2
0
docu 4
topic 0
word machine learning
new topic 0
1
docu 4
topic 2
word regr

0
docu 2
topic 2
word Python
new topic 2
1
docu 2
topic 2
word scikit-learn
new topic 2
2
docu 2
topic 2
word scipy
new topic 2
3
docu 2
topic 2
word numpy
new topic 2
4
docu 2
topic 2
word statsmodels
new topic 2
5
docu 2
topic 2
word pandas
new topic 2
0
docu 3
topic 2
word R
new topic 2
1
docu 3
topic 2
word Python
new topic 2
2
docu 3
topic 1
word statistics
new topic 1
3
docu 3
topic 2
word regression
new topic 2
4
docu 3
topic 1
word probability
new topic 1
0
docu 4
topic 0
word machine learning
new topic 0
1
docu 4
topic 2
word regression
new topic 2
2
docu 4
topic 2
word decision trees
new topic 0
3
docu 4
topic 0
word libsvm
new topic 0
0
docu 5
topic 2
word Python
new topic 2
1
docu 5
topic 2
word R
new topic 2
2
docu 5
topic 2
word Java
new topic 3
3
docu 5
topic 2
word C++
new topic 2
4
docu 5
topic 2
word Haskell
new topic 2
5
docu 5
topic 2
word programming languages
new topic 3
0
docu 6
topic 1
word statistics
new topic 1
1
docu 6
topic 1
word probability
new topic 1
2
d

new topic 2
0
docu 6
topic 2
word statistics
new topic 2
1
docu 6
topic 2
word probability
new topic 2
2
docu 6
topic 2
word mathematics
new topic 1
3
docu 6
topic 1
word theory
new topic 2
0
docu 7
topic 0
word machine learning
new topic 0
1
docu 7
topic 0
word scikit-learn
new topic 1
2
docu 7
topic 0
word Mahout
new topic 1
3
docu 7
topic 0
word neural networks
new topic 0
0
docu 8
topic 0
word neural networks
new topic 0
1
docu 8
topic 0
word deep learning
new topic 0
2
docu 8
topic 0
word Big Data
new topic 3
3
docu 8
topic 0
word artificial intelligence
new topic 0
0
docu 9
topic 3
word Hadoop
new topic 3
1
docu 9
topic 3
word Java
new topic 3
2
docu 9
topic 3
word MapReduce
new topic 3
3
docu 9
topic 3
word Big Data
new topic 3
0
docu 10
topic 2
word statistics
new topic 2
1
docu 10
topic 2
word R
new topic 2
2
docu 10
topic 2
word statsmodels
new topic 2
0
docu 11
topic 0
word C++
new topic 1
1
docu 11
topic 0
word deep learning
new topic 0
2
docu 11
topic 0
word artificial int

word Storm
new topic 3
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 2
word Python
new topic 2
1
docu 2
topic 2
word scikit-learn
new topic 2
2
docu 2
topic 2
word scipy
new topic 2
3
docu 2
topic 2
word numpy
new topic 2
4
docu 2
topic 2
word statsmodels
new topic 2
5
docu 2
topic 2
word pandas
new topic 2
0
docu 3
topic 2
word R
new topic 2
1
docu 3
topic 0
word Python
new topic 2
2
docu 3
topic 2
word statistics
new topic 2
3
docu 3
topic 0
word regression
new topic 0
4
docu 3
topic 0
word probability
new topic 0
0
docu 4
topic 0
word machine learning
new topic 1
1
docu 4
topic 0
word regression
new topic 0
2
docu 4
topic 0
word decision trees
new topic 1
3
docu 4
topic 1
word libsvm
new topic 0
0
docu 5
topic 2
word Python
new topic 2
1
docu 5
topic 2
word R
new t

word Storm
new topic 0
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 2
word Python
new topic 2
1
docu 2
topic 2
word scikit-learn
new topic 2
2
docu 2
topic 2
word scipy
new topic 2
3
docu 2
topic 2
word numpy
new topic 2
4
docu 2
topic 2
word statsmodels
new topic 2
5
docu 2
topic 2
word pandas
new topic 2
0
docu 3
topic 2
word R
new topic 2
1
docu 3
topic 0
word Python
new topic 2
2
docu 3
topic 2
word statistics
new topic 0
3
docu 3
topic 0
word regression
new topic 0
4
docu 3
topic 0
word probability
new topic 0
0
docu 4
topic 0
word machine learning
new topic 0
1
docu 4
topic 0
word regression
new topic 0
2
docu 4
topic 0
word decision trees
new topic 0
3
docu 4
topic 0
word libsvm
new topic 0
0
docu 5
topic 0
word Python
new topic 1
1
docu 5
topic 1
word R
new t

topic 0
word regression
new topic 0
2
docu 4
topic 0
word decision trees
new topic 0
3
docu 4
topic 0
word libsvm
new topic 0
0
docu 5
topic 2
word Python
new topic 2
1
docu 5
topic 2
word R
new topic 2
2
docu 5
topic 1
word Java
new topic 2
3
docu 5
topic 1
word C++
new topic 2
4
docu 5
topic 2
word Haskell
new topic 2
5
docu 5
topic 2
word programming languages
new topic 2
0
docu 6
topic 1
word statistics
new topic 1
1
docu 6
topic 1
word probability
new topic 1
2
docu 6
topic 1
word mathematics
new topic 1
3
docu 6
topic 3
word theory
new topic 1
0
docu 7
topic 0
word machine learning
new topic 0
1
docu 7
topic 0
word scikit-learn
new topic 0
2
docu 7
topic 0
word Mahout
new topic 0
3
docu 7
topic 0
word neural networks
new topic 1
0
docu 8
topic 1
word neural networks
new topic 1
1
docu 8
topic 1
word deep learning
new topic 1
2
docu 8
topic 1
word Big Data
new topic 1
3
docu 8
topic 1
word artificial intelligence
new topic 1
0
docu 9
topic 3
word Hadoop
new topic 3
1
docu 9
topic 

topic 3
word Postgres
new topic 3
0
docu 2
topic 2
word Python
new topic 2
1
docu 2
topic 2
word scikit-learn
new topic 2
2
docu 2
topic 2
word scipy
new topic 2
3
docu 2
topic 2
word numpy
new topic 2
4
docu 2
topic 2
word statsmodels
new topic 2
5
docu 2
topic 2
word pandas
new topic 2
0
docu 3
topic 2
word R
new topic 0
1
docu 3
topic 2
word Python
new topic 2
2
docu 3
topic 2
word statistics
new topic 2
3
docu 3
topic 0
word regression
new topic 0
4
docu 3
topic 2
word probability
new topic 0
0
docu 4
topic 1
word machine learning
new topic 0
1
docu 4
topic 0
word regression
new topic 0
2
docu 4
topic 0
word decision trees
new topic 0
3
docu 4
topic 0
word libsvm
new topic 0
0
docu 5
topic 2
word Python
new topic 2
1
docu 5
topic 3
word R
new topic 2
2
docu 5
topic 1
word Java
new topic 2
3
docu 5
topic 1
word C++
new topic 2
4
docu 5
topic 3
word Haskell
new topic 2
5
docu 5
topic 2
word programming languages
new topic 2
0
docu 6
topic 1
word statistics
new topic 0
1
docu 6
topic 

docu 11
topic 1
word artificial intelligence
new topic 1
3
docu 11
topic 1
word probability
new topic 1
0
docu 12
topic 2
word pandas
new topic 2
1
docu 12
topic 2
word R
new topic 2
2
docu 12
topic 3
word Python
new topic 2
0
docu 13
topic 3
word databases
new topic 3
1
docu 13
topic 3
word HBase
new topic 3
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13
topic 3
word MySQL
new topic 3
4
docu 13
topic 3
word MongoDB
new topic 3
0
docu 14
topic 0
word libsvm
new topic 0
1
docu 14
topic 0
word regression
new topic 0
2
docu 14
topic 0
word support vector machines
new topic 0
0
docu 0
topic 3
word Hadoop
new topic 3
1
docu 0
topic 3
word Big Data
new topic 3
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 1
word Java
new topic 3
4
docu 0
topic 1
word Spark
new topic 3
5
docu 0
topic 3
word Storm
new topic 3
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3

docu 14
topic 0
word support vector machines
new topic 0
0
docu 0
topic 3
word Hadoop
new topic 1
1
docu 0
topic 1
word Big Data
new topic 1
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 1
word Java
new topic 1
4
docu 0
topic 1
word Spark
new topic 3
5
docu 0
topic 3
word Storm
new topic 3
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 1
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 2
word Python
new topic 2
1
docu 2
topic 0
word scikit-learn
new topic 0
2
docu 2
topic 0
word scipy
new topic 2
3
docu 2
topic 2
word numpy
new topic 2
4
docu 2
topic 2
word statsmodels
new topic 2
5
docu 2
topic 2
word pandas
new topic 2
0
docu 3
topic 2
word R
new topic 2
1
docu 3
topic 2
word Python
new topic 2
2
docu 3
topic 2
word statistics
new topic 2
3
docu 3
topic 2
word regression
new topic 2
4
docu 3
topic 2
word pro

new topic 2
0
docu 3
topic 2
word R
new topic 2
1
docu 3
topic 2
word Python
new topic 2
2
docu 3
topic 2
word statistics
new topic 2
3
docu 3
topic 2
word regression
new topic 1
4
docu 3
topic 1
word probability
new topic 1
0
docu 4
topic 0
word machine learning
new topic 0
1
docu 4
topic 0
word regression
new topic 0
2
docu 4
topic 0
word decision trees
new topic 0
3
docu 4
topic 0
word libsvm
new topic 0
0
docu 5
topic 2
word Python
new topic 2
1
docu 5
topic 2
word R
new topic 2
2
docu 5
topic 1
word Java
new topic 1
3
docu 5
topic 1
word C++
new topic 1
4
docu 5
topic 2
word Haskell
new topic 1
5
docu 5
topic 0
word programming languages
new topic 1
0
docu 6
topic 2
word statistics
new topic 2
1
docu 6
topic 1
word probability
new topic 2
2
docu 6
topic 2
word mathematics
new topic 2
3
docu 6
topic 2
word theory
new topic 0
0
docu 7
topic 0
word machine learning
new topic 0
1
docu 7
topic 1
word scikit-learn
new topic 0
2
docu 7
topic 0
word Mahout
new topic 0
3
docu 7
topic 1
wor

2
docu 5
topic 0
word Java
new topic 1
3
docu 5
topic 0
word C++
new topic 1
4
docu 5
topic 0
word Haskell
new topic 0
5
docu 5
topic 0
word programming languages
new topic 0
0
docu 6
topic 2
word statistics
new topic 0
1
docu 6
topic 1
word probability
new topic 0
2
docu 6
topic 0
word mathematics
new topic 0
3
docu 6
topic 1
word theory
new topic 0
0
docu 7
topic 0
word machine learning
new topic 2
1
docu 7
topic 2
word scikit-learn
new topic 0
2
docu 7
topic 0
word Mahout
new topic 1
3
docu 7
topic 1
word neural networks
new topic 1
0
docu 8
topic 1
word neural networks
new topic 1
1
docu 8
topic 2
word deep learning
new topic 2
2
docu 8
topic 1
word Big Data
new topic 1
3
docu 8
topic 2
word artificial intelligence
new topic 2
0
docu 9
topic 1
word Hadoop
new topic 1
1
docu 9
topic 1
word Java
new topic 1
2
docu 9
topic 1
word MapReduce
new topic 1
3
docu 9
topic 1
word Big Data
new topic 1
0
docu 10
topic 2
word statistics
new topic 2
1
docu 10
topic 2
word R
new topic 2
2
docu 10

docu 6
topic 0
word probability
new topic 2
2
docu 6
topic 0
word mathematics
new topic 2
3
docu 6
topic 2
word theory
new topic 2
0
docu 7
topic 2
word machine learning
new topic 2
1
docu 7
topic 2
word scikit-learn
new topic 2
2
docu 7
topic 2
word Mahout
new topic 2
3
docu 7
topic 2
word neural networks
new topic 2
0
docu 8
topic 2
word neural networks
new topic 1
1
docu 8
topic 1
word deep learning
new topic 1
2
docu 8
topic 1
word Big Data
new topic 1
3
docu 8
topic 1
word artificial intelligence
new topic 1
0
docu 9
topic 3
word Hadoop
new topic 3
1
docu 9
topic 1
word Java
new topic 1
2
docu 9
topic 3
word MapReduce
new topic 1
3
docu 9
topic 1
word Big Data
new topic 1
0
docu 10
topic 0
word statistics
new topic 0
1
docu 10
topic 0
word R
new topic 0
2
docu 10
topic 2
word statsmodels
new topic 2
0
docu 11
topic 1
word C++
new topic 1
1
docu 11
topic 1
word deep learning
new topic 1
2
docu 11
topic 1
word artificial intelligence
new topic 1
3
docu 11
topic 1
word probability
ne

docu 9
topic 3
word Big Data
new topic 3
0
docu 10
topic 0
word statistics
new topic 0
1
docu 10
topic 0
word R
new topic 0
2
docu 10
topic 2
word statsmodels
new topic 2
0
docu 11
topic 1
word C++
new topic 1
1
docu 11
topic 1
word deep learning
new topic 1
2
docu 11
topic 1
word artificial intelligence
new topic 1
3
docu 11
topic 1
word probability
new topic 1
0
docu 12
topic 0
word pandas
new topic 2
1
docu 12
topic 0
word R
new topic 0
2
docu 12
topic 0
word Python
new topic 0
0
docu 13
topic 3
word databases
new topic 3
1
docu 13
topic 3
word HBase
new topic 3
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13
topic 1
word MySQL
new topic 3
4
docu 13
topic 3
word MongoDB
new topic 3
0
docu 14
topic 2
word libsvm
new topic 2
1
docu 14
topic 2
word regression
new topic 2
2
docu 14
topic 2
word support vector machines
new topic 2
0
docu 0
topic 3
word Hadoop
new topic 3
1
docu 0
topic 3
word Big Data
new topic 3
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 3
word Java
ne

word probability
new topic 0
2
docu 6
topic 0
word mathematics
new topic 0
3
docu 6
topic 0
word theory
new topic 0
0
docu 7
topic 2
word machine learning
new topic 2
1
docu 7
topic 2
word scikit-learn
new topic 2
2
docu 7
topic 2
word Mahout
new topic 1
3
docu 7
topic 1
word neural networks
new topic 1
0
docu 8
topic 1
word neural networks
new topic 1
1
docu 8
topic 1
word deep learning
new topic 1
2
docu 8
topic 3
word Big Data
new topic 3
3
docu 8
topic 1
word artificial intelligence
new topic 1
0
docu 9
topic 3
word Hadoop
new topic 3
1
docu 9
topic 3
word Java
new topic 3
2
docu 9
topic 3
word MapReduce
new topic 3
3
docu 9
topic 3
word Big Data
new topic 3
0
docu 10
topic 0
word statistics
new topic 0
1
docu 10
topic 2
word R
new topic 2
2
docu 10
topic 2
word statsmodels
new topic 2
0
docu 11
topic 0
word C++
new topic 1
1
docu 11
topic 1
word deep learning
new topic 1
2
docu 11
topic 1
word artificial intelligence
new topic 1
3
docu 11
topic 0
word probability
new topic 1
0
doc

word machine learning
new topic 0
1
docu 7
topic 0
word scikit-learn
new topic 0
2
docu 7
topic 0
word Mahout
new topic 0
3
docu 7
topic 0
word neural networks
new topic 0
0
docu 8
topic 1
word neural networks
new topic 1
1
docu 8
topic 1
word deep learning
new topic 1
2
docu 8
topic 1
word Big Data
new topic 3
3
docu 8
topic 1
word artificial intelligence
new topic 1
0
docu 9
topic 3
word Hadoop
new topic 0
1
docu 9
topic 2
word Java
new topic 0
2
docu 9
topic 0
word MapReduce
new topic 0
3
docu 9
topic 3
word Big Data
new topic 0
0
docu 10
topic 2
word statistics
new topic 2
1
docu 10
topic 2
word R
new topic 2
2
docu 10
topic 0
word statsmodels
new topic 0
0
docu 11
topic 1
word C++
new topic 1
1
docu 11
topic 1
word deep learning
new topic 1
2
docu 11
topic 1
word artificial intelligence
new topic 1
3
docu 11
topic 1
word probability
new topic 2
0
docu 12
topic 1
word pandas
new topic 1
1
docu 12
topic 2
word R
new topic 2
2
docu 12
topic 2
word Python
new topic 1
0
docu 13
topic 3

2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 0
word Java
new topic 0
4
docu 0
topic 3
word Spark
new topic 0
5
docu 0
topic 0
word Storm
new topic 3
6
docu 0
topic 3
word Cassandra
new topic 0
0
docu 1
topic 3
word NoSQL
new topic 1
1
docu 1
topic 3
word MongoDB
new topic 1
2
docu 1
topic 3
word Cassandra
new topic 1
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 1
word Python
new topic 1
1
docu 2
topic 1
word scikit-learn
new topic 1
2
docu 2
topic 1
word scipy
new topic 1
3
docu 2
topic 1
word numpy
new topic 1
4
docu 2
topic 1
word statsmodels
new topic 1
5
docu 2
topic 1
word pandas
new topic 1
0
docu 3
topic 1
word R
new topic 1
1
docu 3
topic 1
word Python
new topic 2
2
docu 3
topic 2
word statistics
new topic 2
3
docu 3
topic 2
word regression
new topic 2
4
docu 3
topic 2
word probability
new topic 2
0
docu 4
topic 2
word machine learning
new topic 2
1
docu 4
topic 2
word regression
new topic 2
2
docu 4
topic 2
word de

word mathematics
new topic 1
3
docu 6
topic 1
word theory
new topic 1
0
docu 7
topic 2
word machine learning
new topic 1
1
docu 7
topic 1
word scikit-learn
new topic 1
2
docu 7
topic 1
word Mahout
new topic 1
3
docu 7
topic 1
word neural networks
new topic 3
0
docu 8
topic 3
word neural networks
new topic 2
1
docu 8
topic 2
word deep learning
new topic 2
2
docu 8
topic 2
word Big Data
new topic 0
3
docu 8
topic 2
word artificial intelligence
new topic 2
0
docu 9
topic 0
word Hadoop
new topic 0
1
docu 9
topic 0
word Java
new topic 0
2
docu 9
topic 0
word MapReduce
new topic 0
3
docu 9
topic 0
word Big Data
new topic 0
0
docu 10
topic 1
word statistics
new topic 1
1
docu 10
topic 1
word R
new topic 1
2
docu 10
topic 1
word statsmodels
new topic 1
0
docu 11
topic 1
word C++
new topic 2
1
docu 11
topic 2
word deep learning
new topic 2
2
docu 11
topic 2
word artificial intelligence
new topic 2
3
docu 11
topic 1
word probability
new topic 2
0
docu 12
topic 1
word pandas
new topic 1
1
docu 12

3
docu 7
topic 2
word neural networks
new topic 2
0
docu 8
topic 2
word neural networks
new topic 2
1
docu 8
topic 2
word deep learning
new topic 2
2
docu 8
topic 0
word Big Data
new topic 0
3
docu 8
topic 2
word artificial intelligence
new topic 2
0
docu 9
topic 3
word Hadoop
new topic 0
1
docu 9
topic 0
word Java
new topic 0
2
docu 9
topic 0
word MapReduce
new topic 0
3
docu 9
topic 0
word Big Data
new topic 0
0
docu 10
topic 1
word statistics
new topic 1
1
docu 10
topic 1
word R
new topic 1
2
docu 10
topic 1
word statsmodels
new topic 1
0
docu 11
topic 2
word C++
new topic 1
1
docu 11
topic 2
word deep learning
new topic 2
2
docu 11
topic 2
word artificial intelligence
new topic 2
3
docu 11
topic 2
word probability
new topic 1
0
docu 12
topic 1
word pandas
new topic 2
1
docu 12
topic 1
word R
new topic 1
2
docu 12
topic 1
word Python
new topic 1
0
docu 13
topic 3
word databases
new topic 3
1
docu 13
topic 3
word HBase
new topic 3
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13

docu 2
topic 3
word pandas
new topic 3
0
docu 3
topic 1
word R
new topic 1
1
docu 3
topic 1
word Python
new topic 1
2
docu 3
topic 1
word statistics
new topic 1
3
docu 3
topic 2
word regression
new topic 1
4
docu 3
topic 1
word probability
new topic 1
0
docu 4
topic 2
word machine learning
new topic 2
1
docu 4
topic 2
word regression
new topic 2
2
docu 4
topic 2
word decision trees
new topic 2
3
docu 4
topic 2
word libsvm
new topic 2
0
docu 5
topic 1
word Python
new topic 3
1
docu 5
topic 1
word R
new topic 3
2
docu 5
topic 3
word Java
new topic 0
3
docu 5
topic 1
word C++
new topic 3
4
docu 5
topic 3
word Haskell
new topic 3
5
docu 5
topic 3
word programming languages
new topic 3
0
docu 6
topic 1
word statistics
new topic 1
1
docu 6
topic 1
word probability
new topic 1
2
docu 6
topic 1
word mathematics
new topic 1
3
docu 6
topic 1
word theory
new topic 1
0
docu 7
topic 2
word machine learning
new topic 2
1
docu 7
topic 2
word scikit-learn
new topic 2
2
docu 7
topic 2
word Mahout
new t

new topic 0
3
docu 13
topic 0
word MySQL
new topic 0
4
docu 13
topic 0
word MongoDB
new topic 0
0
docu 14
topic 2
word libsvm
new topic 2
1
docu 14
topic 1
word regression
new topic 2
2
docu 14
topic 2
word support vector machines
new topic 2
0
docu 0
topic 0
word Hadoop
new topic 0
1
docu 0
topic 3
word Big Data
new topic 3
2
docu 0
topic 0
word HBase
new topic 0
3
docu 0
topic 3
word Java
new topic 3
4
docu 0
topic 0
word Spark
new topic 3
5
docu 0
topic 3
word Storm
new topic 3
6
docu 0
topic 3
word Cassandra
new topic 0
0
docu 1
topic 0
word NoSQL
new topic 0
1
docu 1
topic 0
word MongoDB
new topic 0
2
docu 1
topic 0
word Cassandra
new topic 0
3
docu 1
topic 0
word HBase
new topic 0
4
docu 1
topic 0
word Postgres
new topic 0
0
docu 2
topic 1
word Python
new topic 1
1
docu 2
topic 1
word scikit-learn
new topic 1
2
docu 2
topic 1
word scipy
new topic 1
3
docu 2
topic 1
word numpy
new topic 3
4
docu 2
topic 1
word statsmodels
new topic 1
5
docu 2
topic 1
word pandas
new topic 2
0
docu

topic 3
word Python
new topic 3
1
docu 2
topic 3
word scikit-learn
new topic 3
2
docu 2
topic 3
word scipy
new topic 3
3
docu 2
topic 3
word numpy
new topic 3
4
docu 2
topic 3
word statsmodels
new topic 3
5
docu 2
topic 3
word pandas
new topic 3
0
docu 3
topic 3
word R
new topic 3
1
docu 3
topic 3
word Python
new topic 3
2
docu 3
topic 3
word statistics
new topic 3
3
docu 3
topic 3
word regression
new topic 3
4
docu 3
topic 3
word probability
new topic 3
0
docu 4
topic 2
word machine learning
new topic 2
1
docu 4
topic 2
word regression
new topic 2
2
docu 4
topic 2
word decision trees
new topic 2
3
docu 4
topic 2
word libsvm
new topic 2
0
docu 5
topic 0
word Python
new topic 0
1
docu 5
topic 0
word R
new topic 1
2
docu 5
topic 0
word Java
new topic 0
3
docu 5
topic 2
word C++
new topic 2
4
docu 5
topic 0
word Haskell
new topic 2
5
docu 5
topic 0
word programming languages
new topic 1
0
docu 6
topic 3
word statistics
new topic 3
1
docu 6
topic 3
word probability
new topic 3
2
docu 6
top

topic 3
word R
new topic 3
2
docu 5
topic 0
word Java
new topic 0
3
docu 5
topic 0
word C++
new topic 1
4
docu 5
topic 1
word Haskell
new topic 0
5
docu 5
topic 0
word programming languages
new topic 1
0
docu 6
topic 3
word statistics
new topic 3
1
docu 6
topic 3
word probability
new topic 3
2
docu 6
topic 3
word mathematics
new topic 3
3
docu 6
topic 3
word theory
new topic 3
0
docu 7
topic 1
word machine learning
new topic 1
1
docu 7
topic 3
word scikit-learn
new topic 3
2
docu 7
topic 1
word Mahout
new topic 3
3
docu 7
topic 1
word neural networks
new topic 1
0
docu 8
topic 1
word neural networks
new topic 1
1
docu 8
topic 1
word deep learning
new topic 1
2
docu 8
topic 1
word Big Data
new topic 3
3
docu 8
topic 1
word artificial intelligence
new topic 1
0
docu 9
topic 0
word Hadoop
new topic 0
1
docu 9
topic 0
word Java
new topic 0
2
docu 9
topic 2
word MapReduce
new topic 0
3
docu 9
topic 0
word Big Data
new topic 0
0
docu 10
topic 3
word statistics
new topic 3
1
docu 10
topic 3
w

new topic 0
1
docu 1
topic 0
word MongoDB
new topic 0
2
docu 1
topic 0
word Cassandra
new topic 0
3
docu 1
topic 0
word HBase
new topic 0
4
docu 1
topic 0
word Postgres
new topic 0
0
docu 2
topic 3
word Python
new topic 3
1
docu 2
topic 2
word scikit-learn
new topic 2
2
docu 2
topic 2
word scipy
new topic 2
3
docu 2
topic 2
word numpy
new topic 3
4
docu 2
topic 3
word statsmodels
new topic 2
5
docu 2
topic 3
word pandas
new topic 3
0
docu 3
topic 3
word R
new topic 3
1
docu 3
topic 3
word Python
new topic 3
2
docu 3
topic 3
word statistics
new topic 3
3
docu 3
topic 1
word regression
new topic 3
4
docu 3
topic 3
word probability
new topic 3
0
docu 4
topic 2
word machine learning
new topic 1
1
docu 4
topic 1
word regression
new topic 1
2
docu 4
topic 1
word decision trees
new topic 1
3
docu 4
topic 1
word libsvm
new topic 1
0
docu 5
topic 3
word Python
new topic 3
1
docu 5
topic 3
word R
new topic 3
2
docu 5
topic 0
word Java
new topic 0
3
docu 5
topic 0
word C++
new topic 3
4
docu 5
to

topic 1
word neural networks
new topic 1
1
docu 8
topic 0
word deep learning
new topic 1
2
docu 8
topic 0
word Big Data
new topic 1
3
docu 8
topic 1
word artificial intelligence
new topic 1
0
docu 9
topic 0
word Hadoop
new topic 0
1
docu 9
topic 2
word Java
new topic 2
2
docu 9
topic 0
word MapReduce
new topic 0
3
docu 9
topic 0
word Big Data
new topic 1
0
docu 10
topic 3
word statistics
new topic 3
1
docu 10
topic 3
word R
new topic 3
2
docu 10
topic 3
word statsmodels
new topic 3
0
docu 11
topic 2
word C++
new topic 1
1
docu 11
topic 1
word deep learning
new topic 1
2
docu 11
topic 1
word artificial intelligence
new topic 1
3
docu 11
topic 1
word probability
new topic 1
0
docu 12
topic 3
word pandas
new topic 3
1
docu 12
topic 3
word R
new topic 3
2
docu 12
topic 0
word Python
new topic 3
0
docu 13
topic 0
word databases
new topic 0
1
docu 13
topic 0
word HBase
new topic 0
2
docu 13
topic 0
word Postgres
new topic 0
3
docu 13
topic 0
word MySQL
new topic 0
4
docu 13
topic 0
word Mong

word libsvm
new topic 0
1
docu 14
topic 0
word regression
new topic 2
2
docu 14
topic 0
word support vector machines
new topic 2
0
docu 0
topic 2
word Hadoop
new topic 2
1
docu 0
topic 2
word Big Data
new topic 2
2
docu 0
topic 2
word HBase
new topic 0
3
docu 0
topic 2
word Java
new topic 2
4
docu 0
topic 2
word Spark
new topic 2
5
docu 0
topic 2
word Storm
new topic 2
6
docu 0
topic 2
word Cassandra
new topic 0
0
docu 1
topic 0
word NoSQL
new topic 0
1
docu 1
topic 0
word MongoDB
new topic 0
2
docu 1
topic 0
word Cassandra
new topic 0
3
docu 1
topic 0
word HBase
new topic 0
4
docu 1
topic 0
word Postgres
new topic 0
0
docu 2
topic 3
word Python
new topic 3
1
docu 2
topic 3
word scikit-learn
new topic 3
2
docu 2
topic 1
word scipy
new topic 3
3
docu 2
topic 3
word numpy
new topic 3
4
docu 2
topic 3
word statsmodels
new topic 3
5
docu 2
topic 3
word pandas
new topic 3
0
docu 3
topic 3
word R
new topic 3
1
docu 3
topic 2
word Python
new topic 3
2
docu 3
topic 3
word statistics
new topic 

docu 11
topic 0
word C++
new topic 0
1
docu 11
topic 0
word deep learning
new topic 2
2
docu 11
topic 0
word artificial intelligence
new topic 0
3
docu 11
topic 3
word probability
new topic 0
0
docu 12
topic 2
word pandas
new topic 3
1
docu 12
topic 3
word R
new topic 3
2
docu 12
topic 3
word Python
new topic 3
0
docu 13
topic 1
word databases
new topic 1
1
docu 13
topic 1
word HBase
new topic 1
2
docu 13
topic 1
word Postgres
new topic 1
3
docu 13
topic 1
word MySQL
new topic 1
4
docu 13
topic 0
word MongoDB
new topic 1
0
docu 14
topic 2
word libsvm
new topic 2
1
docu 14
topic 2
word regression
new topic 1
2
docu 14
topic 2
word support vector machines
new topic 2
0
docu 0
topic 0
word Hadoop
new topic 1
1
docu 0
topic 0
word Big Data
new topic 0
2
docu 0
topic 1
word HBase
new topic 1
3
docu 0
topic 1
word Java
new topic 1
4
docu 0
topic 1
word Spark
new topic 0
5
docu 0
topic 1
word Storm
new topic 0
6
docu 0
topic 0
word Cassandra
new topic 0
0
docu 1
topic 1
word NoSQL
new topic 1

docu 13
topic 1
word MySQL
new topic 1
4
docu 13
topic 1
word MongoDB
new topic 1
0
docu 14
topic 2
word libsvm
new topic 2
1
docu 14
topic 2
word regression
new topic 2
2
docu 14
topic 2
word support vector machines
new topic 2
0
docu 0
topic 0
word Hadoop
new topic 0
1
docu 0
topic 0
word Big Data
new topic 0
2
docu 0
topic 0
word HBase
new topic 1
3
docu 0
topic 0
word Java
new topic 0
4
docu 0
topic 0
word Spark
new topic 0
5
docu 0
topic 0
word Storm
new topic 1
6
docu 0
topic 0
word Cassandra
new topic 0
0
docu 1
topic 1
word NoSQL
new topic 0
1
docu 1
topic 1
word MongoDB
new topic 1
2
docu 1
topic 1
word Cassandra
new topic 0
3
docu 1
topic 1
word HBase
new topic 1
4
docu 1
topic 1
word Postgres
new topic 1
0
docu 2
topic 2
word Python
new topic 3
1
docu 2
topic 2
word scikit-learn
new topic 2
2
docu 2
topic 2
word scipy
new topic 2
3
docu 2
topic 2
word numpy
new topic 3
4
docu 2
topic 2
word statsmodels
new topic 3
5
docu 2
topic 2
word pandas
new topic 3
0
docu 3
topic 3
wor

docu 11
topic 0
word artificial intelligence
new topic 0
3
docu 11
topic 0
word probability
new topic 3
0
docu 12
topic 3
word pandas
new topic 3
1
docu 12
topic 3
word R
new topic 3
2
docu 12
topic 3
word Python
new topic 3
0
docu 13
topic 1
word databases
new topic 1
1
docu 13
topic 1
word HBase
new topic 1
2
docu 13
topic 1
word Postgres
new topic 1
3
docu 13
topic 1
word MySQL
new topic 1
4
docu 13
topic 1
word MongoDB
new topic 1
0
docu 14
topic 2
word libsvm
new topic 2
1
docu 14
topic 2
word regression
new topic 2
2
docu 14
topic 2
word support vector machines
new topic 2
0
docu 0
topic 0
word Hadoop
new topic 0
1
docu 0
topic 0
word Big Data
new topic 0
2
docu 0
topic 0
word HBase
new topic 0
3
docu 0
topic 0
word Java
new topic 0
4
docu 0
topic 0
word Spark
new topic 0
5
docu 0
topic 0
word Storm
new topic 1
6
docu 0
topic 0
word Cassandra
new topic 0
0
docu 1
topic 1
word NoSQL
new topic 1
1
docu 1
topic 1
word MongoDB
new topic 1
2
docu 1
topic 0
word Cassandra
new topic 1
3

word Storm
new topic 0
6
docu 0
topic 0
word Cassandra
new topic 1
0
docu 1
topic 1
word NoSQL
new topic 1
1
docu 1
topic 1
word MongoDB
new topic 1
2
docu 1
topic 1
word Cassandra
new topic 1
3
docu 1
topic 1
word HBase
new topic 1
4
docu 1
topic 1
word Postgres
new topic 1
0
docu 2
topic 3
word Python
new topic 3
1
docu 2
topic 3
word scikit-learn
new topic 3
2
docu 2
topic 3
word scipy
new topic 3
3
docu 2
topic 3
word numpy
new topic 3
4
docu 2
topic 3
word statsmodels
new topic 3
5
docu 2
topic 3
word pandas
new topic 3
0
docu 3
topic 3
word R
new topic 3
1
docu 3
topic 3
word Python
new topic 3
2
docu 3
topic 3
word statistics
new topic 3
3
docu 3
topic 3
word regression
new topic 3
4
docu 3
topic 3
word probability
new topic 0
0
docu 4
topic 2
word machine learning
new topic 2
1
docu 4
topic 2
word regression
new topic 3
2
docu 4
topic 2
word decision trees
new topic 2
3
docu 4
topic 2
word libsvm
new topic 2
0
docu 5
topic 3
word Python
new topic 1
1
docu 5
topic 3
word R
new t

topic 2
word libsvm
new topic 2
1
docu 14
topic 2
word regression
new topic 2
2
docu 14
topic 2
word support vector machines
new topic 2
0
docu 0
topic 0
word Hadoop
new topic 0
1
docu 0
topic 0
word Big Data
new topic 0
2
docu 0
topic 1
word HBase
new topic 1
3
docu 0
topic 0
word Java
new topic 0
4
docu 0
topic 1
word Spark
new topic 0
5
docu 0
topic 0
word Storm
new topic 0
6
docu 0
topic 1
word Cassandra
new topic 1
0
docu 1
topic 2
word NoSQL
new topic 1
1
docu 1
topic 1
word MongoDB
new topic 1
2
docu 1
topic 1
word Cassandra
new topic 1
3
docu 1
topic 1
word HBase
new topic 1
4
docu 1
topic 1
word Postgres
new topic 1
0
docu 2
topic 3
word Python
new topic 3
1
docu 2
topic 3
word scikit-learn
new topic 3
2
docu 2
topic 3
word scipy
new topic 3
3
docu 2
topic 3
word numpy
new topic 3
4
docu 2
topic 3
word statsmodels
new topic 3
5
docu 2
topic 3
word pandas
new topic 3
0
docu 3
topic 3
word R
new topic 3
1
docu 3
topic 3
word Python
new topic 3
2
docu 3
topic 3
word statistics
ne

new topic 2
1
docu 7
topic 2
word scikit-learn
new topic 2
2
docu 7
topic 2
word Mahout
new topic 2
3
docu 7
topic 2
word neural networks
new topic 2
0
docu 8
topic 2
word neural networks
new topic 2
1
docu 8
topic 2
word deep learning
new topic 2
2
docu 8
topic 2
word Big Data
new topic 2
3
docu 8
topic 2
word artificial intelligence
new topic 0
0
docu 9
topic 2
word Hadoop
new topic 1
1
docu 9
topic 0
word Java
new topic 2
2
docu 9
topic 2
word MapReduce
new topic 1
3
docu 9
topic 2
word Big Data
new topic 2
0
docu 10
topic 3
word statistics
new topic 3
1
docu 10
topic 3
word R
new topic 3
2
docu 10
topic 3
word statsmodels
new topic 3
0
docu 11
topic 0
word C++
new topic 0
1
docu 11
topic 2
word deep learning
new topic 2
2
docu 11
topic 0
word artificial intelligence
new topic 0
3
docu 11
topic 3
word probability
new topic 3
0
docu 12
topic 3
word pandas
new topic 3
1
docu 12
topic 3
word R
new topic 3
2
docu 12
topic 3
word Python
new topic 3
0
docu 13
topic 1
word databases
new to

new topic 0
1
docu 7
topic 0
word scikit-learn
new topic 0
2
docu 7
topic 0
word Mahout
new topic 0
3
docu 7
topic 2
word neural networks
new topic 2
0
docu 8
topic 2
word neural networks
new topic 2
1
docu 8
topic 2
word deep learning
new topic 2
2
docu 8
topic 1
word Big Data
new topic 2
3
docu 8
topic 2
word artificial intelligence
new topic 2
0
docu 9
topic 1
word Hadoop
new topic 1
1
docu 9
topic 1
word Java
new topic 1
2
docu 9
topic 1
word MapReduce
new topic 1
3
docu 9
topic 1
word Big Data
new topic 1
0
docu 10
topic 3
word statistics
new topic 3
1
docu 10
topic 3
word R
new topic 3
2
docu 10
topic 3
word statsmodels
new topic 0
0
docu 11
topic 1
word C++
new topic 0
1
docu 11
topic 2
word deep learning
new topic 0
2
docu 11
topic 2
word artificial intelligence
new topic 0
3
docu 11
topic 3
word probability
new topic 3
0
docu 12
topic 3
word pandas
new topic 0
1
docu 12
topic 3
word R
new topic 3
2
docu 12
topic 3
word Python
new topic 3
0
docu 13
topic 3
word databases
new to

word regression
new topic 2
2
docu 4
topic 2
word decision trees
new topic 2
3
docu 4
topic 2
word libsvm
new topic 2
0
docu 5
topic 0
word Python
new topic 0
1
docu 5
topic 0
word R
new topic 0
2
docu 5
topic 0
word Java
new topic 1
3
docu 5
topic 0
word C++
new topic 0
4
docu 5
topic 0
word Haskell
new topic 0
5
docu 5
topic 0
word programming languages
new topic 1
0
docu 6
topic 3
word statistics
new topic 3
1
docu 6
topic 3
word probability
new topic 3
2
docu 6
topic 3
word mathematics
new topic 3
3
docu 6
topic 3
word theory
new topic 1
0
docu 7
topic 2
word machine learning
new topic 2
1
docu 7
topic 2
word scikit-learn
new topic 2
2
docu 7
topic 2
word Mahout
new topic 2
3
docu 7
topic 2
word neural networks
new topic 2
0
docu 8
topic 2
word neural networks
new topic 2
1
docu 8
topic 2
word deep learning
new topic 2
2
docu 8
topic 1
word Big Data
new topic 1
3
docu 8
topic 2
word artificial intelligence
new topic 2
0
docu 9
topic 1
word Hadoop
new topic 1
1
docu 9
topic 1
word J

new topic 3
0
docu 14
topic 2
word libsvm
new topic 2
1
docu 14
topic 2
word regression
new topic 2
2
docu 14
topic 2
word support vector machines
new topic 2
0
docu 0
topic 1
word Hadoop
new topic 1
1
docu 0
topic 1
word Big Data
new topic 1
2
docu 0
topic 1
word HBase
new topic 3
3
docu 0
topic 1
word Java
new topic 1
4
docu 0
topic 1
word Spark
new topic 3
5
docu 0
topic 3
word Storm
new topic 1
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 0
word Python
new topic 0
1
docu 2
topic 0
word scikit-learn
new topic 0
2
docu 2
topic 0
word scipy
new topic 0
3
docu 2
topic 0
word numpy
new topic 0
4
docu 2
topic 0
word statsmodels
new topic 0
5
docu 2
topic 0
word pandas
new topic 0
0
docu 3
topic 0
word R
new topic 0
1
docu 3
topic 0
word Python
new topic 0
2
docu 3
topi

new topic 0
0
docu 7
topic 2
word machine learning
new topic 2
1
docu 7
topic 2
word scikit-learn
new topic 2
2
docu 7
topic 2
word Mahout
new topic 2
3
docu 7
topic 2
word neural networks
new topic 2
0
docu 8
topic 2
word neural networks
new topic 3
1
docu 8
topic 0
word deep learning
new topic 1
2
docu 8
topic 1
word Big Data
new topic 1
3
docu 8
topic 1
word artificial intelligence
new topic 1
0
docu 9
topic 1
word Hadoop
new topic 1
1
docu 9
topic 1
word Java
new topic 1
2
docu 9
topic 1
word MapReduce
new topic 1
3
docu 9
topic 1
word Big Data
new topic 1
0
docu 10
topic 0
word statistics
new topic 0
1
docu 10
topic 0
word R
new topic 0
2
docu 10
topic 0
word statsmodels
new topic 0
0
docu 11
topic 0
word C++
new topic 0
1
docu 11
topic 0
word deep learning
new topic 1
2
docu 11
topic 1
word artificial intelligence
new topic 1
3
docu 11
topic 0
word probability
new topic 0
0
docu 12
topic 0
word pandas
new topic 0
1
docu 12
topic 0
word R
new topic 0
2
docu 12
topic 0
word Python


word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 0
word Python
new topic 0
1
docu 2
topic 3
word scikit-learn
new topic 2
2
docu 2
topic 0
word scipy
new topic 2
3
docu 2
topic 0
word numpy
new topic 2
4
docu 2
topic 0
word statsmodels
new topic 0
5
docu 2
topic 0
word pandas
new topic 0
0
docu 3
topic 0
word R
new topic 0
1
docu 3
topic 0
word Python
new topic 0
2
docu 3
topic 0
word statistics
new topic 0
3
docu 3
topic 2
word regression
new topic 0
4
docu 3
topic 0
word probability
new topic 0
0
docu 4
topic 2
word machine learning
new topic 2
1
docu 4
topic 2
word regression
new topic 2
2
docu 4
topic 2
word decision trees
new topic 2
3
docu 4
topic 2
word libsvm
new topic 2
0
docu 5
topic 0
word Python
new topic 0
1
docu 5
topic 0
word R
new topic 0
2
docu 5
topic 1
word Java
new topic 1
3
docu 5
topic 1
word C++
new topic 1
4
docu 5
topic 1
word Haskell
new topic 1
5
docu 5
topic 1
word programming languages
new topic 0
0
docu 6
topic 0
word statis

new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 2
word Python
new topic 0
1
docu 2
topic 2
word scikit-learn
new topic 3
2
docu 2
topic 2
word scipy
new topic 3
3
docu 2
topic 2
word numpy
new topic 0
4
docu 2
topic 0
word statsmodels
new topic 0
5
docu 2
topic 0
word pandas
new topic 0
0
docu 3
topic 0
word R
new topic 0
1
docu 3
topic 0
word Python
new topic 0
2
docu 3
topic 0
word statistics
new topic 0
3
docu 3
topic 2
word regression
new topic 0
4
docu 3
topic 0
word probability
new topic 0
0
docu 4
topic 2
word machine learning
new topic 2
1
docu 4
topic 2
word regression
new topic 2
2
docu 4
topic 2
word decision trees
new topic 2
3
docu 4
topic 2
word libsvm
new topic 2
0
docu 5
topic 0
word Python
new topic 0
1
docu 5
topic 0
word R
new topic 1
2
docu 5
topic 1
word Java
new topic 1
3
docu 5
topic 3
word C++
new topic 3
4
docu 5
to

docu 0
topic 1
word Big Data
new topic 1
2
docu 0
topic 1
word HBase
new topic 1
3
docu 0
topic 1
word Java
new topic 1
4
docu 0
topic 1
word Spark
new topic 1
5
docu 0
topic 1
word Storm
new topic 1
6
docu 0
topic 1
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 0
word Python
new topic 0
1
docu 2
topic 0
word scikit-learn
new topic 0
2
docu 2
topic 0
word scipy
new topic 0
3
docu 2
topic 0
word numpy
new topic 0
4
docu 2
topic 0
word statsmodels
new topic 0
5
docu 2
topic 0
word pandas
new topic 0
0
docu 3
topic 0
word R
new topic 1
1
docu 3
topic 0
word Python
new topic 0
2
docu 3
topic 2
word statistics
new topic 0
3
docu 3
topic 2
word regression
new topic 2
4
docu 3
topic 0
word probability
new topic 0
0
docu 4
topic 2
word machine learning
new topic 2
1
docu 4
topic 2
word regres

docu 9
topic 1
word Java
new topic 1
2
docu 9
topic 1
word MapReduce
new topic 1
3
docu 9
topic 1
word Big Data
new topic 1
0
docu 10
topic 0
word statistics
new topic 0
1
docu 10
topic 0
word R
new topic 0
2
docu 10
topic 0
word statsmodels
new topic 0
0
docu 11
topic 0
word C++
new topic 0
1
docu 11
topic 1
word deep learning
new topic 1
2
docu 11
topic 1
word artificial intelligence
new topic 1
3
docu 11
topic 0
word probability
new topic 0
0
docu 12
topic 0
word pandas
new topic 0
1
docu 12
topic 0
word R
new topic 0
2
docu 12
topic 0
word Python
new topic 0
0
docu 13
topic 3
word databases
new topic 3
1
docu 13
topic 3
word HBase
new topic 3
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13
topic 3
word MySQL
new topic 3
4
docu 13
topic 3
word MongoDB
new topic 3
0
docu 14
topic 2
word libsvm
new topic 2
1
docu 14
topic 2
word regression
new topic 2
2
docu 14
topic 2
word support vector machines
new topic 2
0
docu 0
topic 1
word Hadoop
new topic 1
1
docu 0
topic 1
word Big Dat

3
docu 8
topic 1
word artificial intelligence
new topic 1
0
docu 9
topic 3
word Hadoop
new topic 1
1
docu 9
topic 1
word Java
new topic 1
2
docu 9
topic 1
word MapReduce
new topic 1
3
docu 9
topic 1
word Big Data
new topic 1
0
docu 10
topic 0
word statistics
new topic 0
1
docu 10
topic 0
word R
new topic 0
2
docu 10
topic 0
word statsmodels
new topic 0
0
docu 11
topic 1
word C++
new topic 0
1
docu 11
topic 1
word deep learning
new topic 1
2
docu 11
topic 1
word artificial intelligence
new topic 1
3
docu 11
topic 0
word probability
new topic 0
0
docu 12
topic 0
word pandas
new topic 0
1
docu 12
topic 0
word R
new topic 0
2
docu 12
topic 0
word Python
new topic 0
0
docu 13
topic 3
word databases
new topic 3
1
docu 13
topic 3
word HBase
new topic 3
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13
topic 3
word MySQL
new topic 3
4
docu 13
topic 3
word MongoDB
new topic 3
0
docu 14
topic 2
word libsvm
new topic 2
1
docu 14
topic 3
word regression
new topic 2
2
docu 14
topic 2
word suppo

2
docu 14
topic 2
word support vector machines
new topic 2
0
docu 0
topic 1
word Hadoop
new topic 3
1
docu 0
topic 1
word Big Data
new topic 1
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 1
word Java
new topic 3
4
docu 0
topic 3
word Spark
new topic 3
5
docu 0
topic 3
word Storm
new topic 1
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 0
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 0
word Python
new topic 0
1
docu 2
topic 0
word scikit-learn
new topic 0
2
docu 2
topic 0
word scipy
new topic 0
3
docu 2
topic 0
word numpy
new topic 0
4
docu 2
topic 0
word statsmodels
new topic 0
5
docu 2
topic 0
word pandas
new topic 0
0
docu 3
topic 0
word R
new topic 0
1
docu 3
topic 0
word Python
new topic 0
2
docu 3
topic 2
word statistics
new topic 2
3
docu 3
topic 2
word regression
new topic 2
4
docu 3
topic 2
word p

topic 0
word R
new topic 0
2
docu 10
topic 0
word statsmodels
new topic 0
0
docu 11
topic 0
word C++
new topic 0
1
docu 11
topic 1
word deep learning
new topic 1
2
docu 11
topic 1
word artificial intelligence
new topic 1
3
docu 11
topic 0
word probability
new topic 0
0
docu 12
topic 0
word pandas
new topic 0
1
docu 12
topic 0
word R
new topic 0
2
docu 12
topic 0
word Python
new topic 0
0
docu 13
topic 3
word databases
new topic 3
1
docu 13
topic 3
word HBase
new topic 3
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13
topic 3
word MySQL
new topic 3
4
docu 13
topic 3
word MongoDB
new topic 3
0
docu 14
topic 2
word libsvm
new topic 2
1
docu 14
topic 2
word regression
new topic 2
2
docu 14
topic 2
word support vector machines
new topic 2
0
docu 0
topic 1
word Hadoop
new topic 1
1
docu 0
topic 1
word Big Data
new topic 1
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 3
word Java
new topic 1
4
docu 0
topic 1
word Spark
new topic 3
5
docu 0
topic 3
word Storm
new topic 1
6
docu 

topic 0
word Spark
new topic 3
5
docu 0
topic 3
word Storm
new topic 3
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 2
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 0
word Python
new topic 0
1
docu 2
topic 1
word scikit-learn
new topic 1
2
docu 2
topic 0
word scipy
new topic 0
3
docu 2
topic 0
word numpy
new topic 3
4
docu 2
topic 0
word statsmodels
new topic 0
5
docu 2
topic 3
word pandas
new topic 0
0
docu 3
topic 0
word R
new topic 0
1
docu 3
topic 0
word Python
new topic 0
2
docu 3
topic 0
word statistics
new topic 0
3
docu 3
topic 2
word regression
new topic 2
4
docu 3
topic 0
word probability
new topic 0
0
docu 4
topic 2
word machine learning
new topic 2
1
docu 4
topic 2
word regression
new topic 2
2
docu 4
topic 2
word decision trees
new topic 2
3
docu 4
topic 2
word libsvm
new topic 2
0
docu 5
topic 0
word 

word artificial intelligence
new topic 1
3
docu 11
topic 1
word probability
new topic 1
0
docu 12
topic 0
word pandas
new topic 0
1
docu 12
topic 0
word R
new topic 0
2
docu 12
topic 0
word Python
new topic 0
0
docu 13
topic 3
word databases
new topic 3
1
docu 13
topic 3
word HBase
new topic 3
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13
topic 3
word MySQL
new topic 3
4
docu 13
topic 3
word MongoDB
new topic 3
0
docu 14
topic 2
word libsvm
new topic 2
1
docu 14
topic 2
word regression
new topic 2
2
docu 14
topic 2
word support vector machines
new topic 2
0
docu 0
topic 3
word Hadoop
new topic 3
1
docu 0
topic 3
word Big Data
new topic 3
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 3
word Java
new topic 3
4
docu 0
topic 3
word Spark
new topic 3
5
docu 0
topic 3
word Storm
new topic 3
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3


word artificial intelligence
new topic 2
0
docu 9
topic 3
word Hadoop
new topic 3
1
docu 9
topic 3
word Java
new topic 3
2
docu 9
topic 3
word MapReduce
new topic 3
3
docu 9
topic 3
word Big Data
new topic 3
0
docu 10
topic 1
word statistics
new topic 1
1
docu 10
topic 1
word R
new topic 1
2
docu 10
topic 0
word statsmodels
new topic 1
0
docu 11
topic 1
word C++
new topic 1
1
docu 11
topic 2
word deep learning
new topic 2
2
docu 11
topic 2
word artificial intelligence
new topic 2
3
docu 11
topic 1
word probability
new topic 1
0
docu 12
topic 0
word pandas
new topic 0
1
docu 12
topic 1
word R
new topic 1
2
docu 12
topic 0
word Python
new topic 0
0
docu 13
topic 3
word databases
new topic 3
1
docu 13
topic 3
word HBase
new topic 3
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13
topic 3
word MySQL
new topic 2
4
docu 13
topic 3
word MongoDB
new topic 2
0
docu 14
topic 0
word libsvm
new topic 0
1
docu 14
topic 0
word regression
new topic 0
2
docu 14
topic 0
word support vector machine

word pandas
new topic 0
0
docu 3
topic 1
word R
new topic 1
1
docu 3
topic 1
word Python
new topic 1
2
docu 3
topic 1
word statistics
new topic 1
3
docu 3
topic 0
word regression
new topic 0
4
docu 3
topic 1
word probability
new topic 1
0
docu 4
topic 0
word machine learning
new topic 0
1
docu 4
topic 0
word regression
new topic 0
2
docu 4
topic 0
word decision trees
new topic 0
3
docu 4
topic 0
word libsvm
new topic 0
0
docu 5
topic 1
word Python
new topic 1
1
docu 5
topic 1
word R
new topic 1
2
docu 5
topic 1
word Java
new topic 1
3
docu 5
topic 1
word C++
new topic 1
4
docu 5
topic 1
word Haskell
new topic 1
5
docu 5
topic 1
word programming languages
new topic 1
0
docu 6
topic 1
word statistics
new topic 1
1
docu 6
topic 1
word probability
new topic 1
2
docu 6
topic 1
word mathematics
new topic 1
3
docu 6
topic 1
word theory
new topic 1
0
docu 7
topic 0
word machine learning
new topic 0
1
docu 7
topic 2
word scikit-learn
new topic 0
2
docu 7
topic 0
word Mahout
new topic 0
3
docu 7

topic 1
word Python
new topic 1
0
docu 13
topic 3
word databases
new topic 3
1
docu 13
topic 3
word HBase
new topic 3
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13
topic 3
word MySQL
new topic 3
4
docu 13
topic 3
word MongoDB
new topic 3
0
docu 14
topic 0
word libsvm
new topic 0
1
docu 14
topic 0
word regression
new topic 0
2
docu 14
topic 0
word support vector machines
new topic 0
0
docu 0
topic 2
word Hadoop
new topic 2
1
docu 0
topic 2
word Big Data
new topic 2
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 2
word Java
new topic 2
4
docu 0
topic 2
word Spark
new topic 2
5
docu 0
topic 3
word Storm
new topic 2
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 1
word Python
new topic 1
1
docu 2
topic 3
word scikit-learn
new topic 0
2
docu 2
topic 1
word s

new topic 1
1
docu 6
topic 2
word probability
new topic 1
2
docu 6
topic 1
word mathematics
new topic 1
3
docu 6
topic 2
word theory
new topic 1
0
docu 7
topic 0
word machine learning
new topic 0
1
docu 7
topic 0
word scikit-learn
new topic 0
2
docu 7
topic 0
word Mahout
new topic 0
3
docu 7
topic 0
word neural networks
new topic 0
0
docu 8
topic 0
word neural networks
new topic 2
1
docu 8
topic 2
word deep learning
new topic 2
2
docu 8
topic 2
word Big Data
new topic 2
3
docu 8
topic 2
word artificial intelligence
new topic 2
0
docu 9
topic 2
word Hadoop
new topic 2
1
docu 9
topic 2
word Java
new topic 2
2
docu 9
topic 2
word MapReduce
new topic 2
3
docu 9
topic 2
word Big Data
new topic 2
0
docu 10
topic 1
word statistics
new topic 1
1
docu 10
topic 1
word R
new topic 1
2
docu 10
topic 1
word statsmodels
new topic 1
0
docu 11
topic 1
word C++
new topic 1
1
docu 11
topic 2
word deep learning
new topic 2
2
docu 11
topic 2
word artificial intelligence
new topic 2
3
docu 11
topic 1
word 

docu 2
topic 1
word scipy
new topic 1
3
docu 2
topic 1
word numpy
new topic 1
4
docu 2
topic 1
word statsmodels
new topic 1
5
docu 2
topic 1
word pandas
new topic 1
0
docu 3
topic 1
word R
new topic 1
1
docu 3
topic 1
word Python
new topic 1
2
docu 3
topic 1
word statistics
new topic 1
3
docu 3
topic 0
word regression
new topic 0
4
docu 3
topic 0
word probability
new topic 0
0
docu 4
topic 0
word machine learning
new topic 0
1
docu 4
topic 0
word regression
new topic 0
2
docu 4
topic 0
word decision trees
new topic 0
3
docu 4
topic 0
word libsvm
new topic 0
0
docu 5
topic 1
word Python
new topic 1
1
docu 5
topic 1
word R
new topic 1
2
docu 5
topic 2
word Java
new topic 0
3
docu 5
topic 2
word C++
new topic 0
4
docu 5
topic 0
word Haskell
new topic 1
5
docu 5
topic 1
word programming languages
new topic 0
0
docu 6
topic 1
word statistics
new topic 0
1
docu 6
topic 0
word probability
new topic 0
2
docu 6
topic 1
word mathematics
new topic 0
3
docu 6
topic 0
word theory
new topic 0
0
docu

0
docu 2
topic 1
word Python
new topic 1
1
docu 2
topic 1
word scikit-learn
new topic 1
2
docu 2
topic 1
word scipy
new topic 1
3
docu 2
topic 1
word numpy
new topic 1
4
docu 2
topic 1
word statsmodels
new topic 1
5
docu 2
topic 1
word pandas
new topic 1
0
docu 3
topic 1
word R
new topic 1
1
docu 3
topic 1
word Python
new topic 1
2
docu 3
topic 1
word statistics
new topic 0
3
docu 3
topic 0
word regression
new topic 0
4
docu 3
topic 0
word probability
new topic 0
0
docu 4
topic 0
word machine learning
new topic 3
1
docu 4
topic 0
word regression
new topic 0
2
docu 4
topic 0
word decision trees
new topic 0
3
docu 4
topic 0
word libsvm
new topic 0
0
docu 5
topic 1
word Python
new topic 1
1
docu 5
topic 1
word R
new topic 1
2
docu 5
topic 1
word Java
new topic 1
3
docu 5
topic 1
word C++
new topic 1
4
docu 5
topic 1
word Haskell
new topic 1
5
docu 5
topic 1
word programming languages
new topic 1
0
docu 6
topic 0
word statistics
new topic 0
1
docu 6
topic 0
word probability
new topic 0
2
d

3
docu 7
topic 0
word neural networks
new topic 0
0
docu 8
topic 0
word neural networks
new topic 0
1
docu 8
topic 0
word deep learning
new topic 0
2
docu 8
topic 0
word Big Data
new topic 2
3
docu 8
topic 0
word artificial intelligence
new topic 0
0
docu 9
topic 2
word Hadoop
new topic 2
1
docu 9
topic 2
word Java
new topic 2
2
docu 9
topic 2
word MapReduce
new topic 2
3
docu 9
topic 2
word Big Data
new topic 2
0
docu 10
topic 1
word statistics
new topic 1
1
docu 10
topic 1
word R
new topic 1
2
docu 10
topic 1
word statsmodels
new topic 1
0
docu 11
topic 0
word C++
new topic 0
1
docu 11
topic 0
word deep learning
new topic 0
2
docu 11
topic 0
word artificial intelligence
new topic 0
3
docu 11
topic 0
word probability
new topic 0
0
docu 12
topic 1
word pandas
new topic 1
1
docu 12
topic 1
word R
new topic 1
2
docu 12
topic 1
word Python
new topic 1
0
docu 13
topic 3
word databases
new topic 3
1
docu 13
topic 3
word HBase
new topic 3
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13

new topic 2
3
docu 3
topic 0
word regression
new topic 0
4
docu 3
topic 0
word probability
new topic 2
0
docu 4
topic 0
word machine learning
new topic 0
1
docu 4
topic 0
word regression
new topic 0
2
docu 4
topic 0
word decision trees
new topic 0
3
docu 4
topic 0
word libsvm
new topic 0
0
docu 5
topic 1
word Python
new topic 1
1
docu 5
topic 1
word R
new topic 1
2
docu 5
topic 2
word Java
new topic 2
3
docu 5
topic 1
word C++
new topic 2
4
docu 5
topic 1
word Haskell
new topic 1
5
docu 5
topic 1
word programming languages
new topic 1
0
docu 6
topic 2
word statistics
new topic 2
1
docu 6
topic 2
word probability
new topic 2
2
docu 6
topic 2
word mathematics
new topic 2
3
docu 6
topic 2
word theory
new topic 2
0
docu 7
topic 0
word machine learning
new topic 0
1
docu 7
topic 0
word scikit-learn
new topic 0
2
docu 7
topic 0
word Mahout
new topic 1
3
docu 7
topic 0
word neural networks
new topic 1
0
docu 8
topic 2
word neural networks
new topic 1
1
docu 8
topic 0
word deep learning
new to

new topic 2
3
docu 6
topic 2
word theory
new topic 2
0
docu 7
topic 0
word machine learning
new topic 0
1
docu 7
topic 0
word scikit-learn
new topic 3
2
docu 7
topic 0
word Mahout
new topic 0
3
docu 7
topic 0
word neural networks
new topic 0
0
docu 8
topic 0
word neural networks
new topic 0
1
docu 8
topic 0
word deep learning
new topic 0
2
docu 8
topic 2
word Big Data
new topic 0
3
docu 8
topic 0
word artificial intelligence
new topic 0
0
docu 9
topic 2
word Hadoop
new topic 2
1
docu 9
topic 2
word Java
new topic 2
2
docu 9
topic 2
word MapReduce
new topic 2
3
docu 9
topic 2
word Big Data
new topic 2
0
docu 10
topic 2
word statistics
new topic 2
1
docu 10
topic 2
word R
new topic 2
2
docu 10
topic 2
word statsmodels
new topic 2
0
docu 11
topic 0
word C++
new topic 0
1
docu 11
topic 0
word deep learning
new topic 0
2
docu 11
topic 0
word artificial intelligence
new topic 0
3
docu 11
topic 2
word probability
new topic 0
0
docu 12
topic 1
word pandas
new topic 1
1
docu 12
topic 2
word R
n

word support vector machines
new topic 1
0
docu 0
topic 1
word Hadoop
new topic 0
1
docu 0
topic 3
word Big Data
new topic 0
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 3
word Java
new topic 3
4
docu 0
topic 3
word Spark
new topic 0
5
docu 0
topic 3
word Storm
new topic 0
6
docu 0
topic 1
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 2
word Python
new topic 2
1
docu 2
topic 2
word scikit-learn
new topic 2
2
docu 2
topic 2
word scipy
new topic 2
3
docu 2
topic 2
word numpy
new topic 2
4
docu 2
topic 2
word statsmodels
new topic 2
5
docu 2
topic 2
word pandas
new topic 2
0
docu 3
topic 2
word R
new topic 2
1
docu 3
topic 2
word Python
new topic 2
2
docu 3
topic 2
word statistics
new topic 2
3
docu 3
topic 1
word regression
new topic 1
4
docu 3
topic 2
word probability
new top

word scikit-learn
new topic 0
2
docu 2
topic 2
word scipy
new topic 0
3
docu 2
topic 2
word numpy
new topic 2
4
docu 2
topic 2
word statsmodels
new topic 0
5
docu 2
topic 2
word pandas
new topic 2
0
docu 3
topic 2
word R
new topic 2
1
docu 3
topic 2
word Python
new topic 2
2
docu 3
topic 2
word statistics
new topic 2
3
docu 3
topic 2
word regression
new topic 2
4
docu 3
topic 2
word probability
new topic 2
0
docu 4
topic 1
word machine learning
new topic 1
1
docu 4
topic 2
word regression
new topic 2
2
docu 4
topic 2
word decision trees
new topic 2
3
docu 4
topic 1
word libsvm
new topic 1
0
docu 5
topic 2
word Python
new topic 2
1
docu 5
topic 2
word R
new topic 2
2
docu 5
topic 0
word Java
new topic 0
3
docu 5
topic 0
word C++
new topic 0
4
docu 5
topic 0
word Haskell
new topic 2
5
docu 5
topic 0
word programming languages
new topic 0
0
docu 6
topic 2
word statistics
new topic 2
1
docu 6
topic 2
word probability
new topic 2
2
docu 6
topic 2
word mathematics
new topic 2
3
docu 6
topic 

topic 2
word statistics
new topic 2
3
docu 3
topic 2
word regression
new topic 2
4
docu 3
topic 2
word probability
new topic 2
0
docu 4
topic 0
word machine learning
new topic 2
1
docu 4
topic 1
word regression
new topic 2
2
docu 4
topic 0
word decision trees
new topic 1
3
docu 4
topic 1
word libsvm
new topic 2
0
docu 5
topic 0
word Python
new topic 2
1
docu 5
topic 2
word R
new topic 2
2
docu 5
topic 0
word Java
new topic 0
3
docu 5
topic 2
word C++
new topic 2
4
docu 5
topic 0
word Haskell
new topic 2
5
docu 5
topic 2
word programming languages
new topic 2
0
docu 6
topic 2
word statistics
new topic 2
1
docu 6
topic 2
word probability
new topic 2
2
docu 6
topic 2
word mathematics
new topic 2
3
docu 6
topic 2
word theory
new topic 2
0
docu 7
topic 0
word machine learning
new topic 3
1
docu 7
topic 1
word scikit-learn
new topic 1
2
docu 7
topic 0
word Mahout
new topic 1
3
docu 7
topic 0
word neural networks
new topic 1
0
docu 8
topic 0
word neural networks
new topic 0
1
docu 8
topic 0
w

docu 2
topic 0
word scipy
new topic 0
3
docu 2
topic 0
word numpy
new topic 2
4
docu 2
topic 0
word statsmodels
new topic 0
5
docu 2
topic 2
word pandas
new topic 2
0
docu 3
topic 2
word R
new topic 2
1
docu 3
topic 2
word Python
new topic 2
2
docu 3
topic 2
word statistics
new topic 2
3
docu 3
topic 2
word regression
new topic 1
4
docu 3
topic 2
word probability
new topic 2
0
docu 4
topic 0
word machine learning
new topic 0
1
docu 4
topic 1
word regression
new topic 1
2
docu 4
topic 0
word decision trees
new topic 0
3
docu 4
topic 1
word libsvm
new topic 1
0
docu 5
topic 2
word Python
new topic 2
1
docu 5
topic 2
word R
new topic 2
2
docu 5
topic 2
word Java
new topic 2
3
docu 5
topic 2
word C++
new topic 2
4
docu 5
topic 2
word Haskell
new topic 2
5
docu 5
topic 2
word programming languages
new topic 0
0
docu 6
topic 2
word statistics
new topic 2
1
docu 6
topic 2
word probability
new topic 2
2
docu 6
topic 2
word mathematics
new topic 2
3
docu 6
topic 2
word theory
new topic 2
0
docu

word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 0
word Python
new topic 0
1
docu 2
topic 0
word scikit-learn
new topic 0
2
docu 2
topic 0
word scipy
new topic 0
3
docu 2
topic 0
word numpy
new topic 0
4
docu 2
topic 0
word statsmodels
new topic 0
5
docu 2
topic 0
word pandas
new topic 0
0
docu 3
topic 0
word R
new topic 0
1
docu 3
topic 0
word Python
new topic 0
2
docu 3
topic 0
word statistics
new topic 2
3
docu 3
topic 2
word regression
new topic 2
4
docu 3
topic 2
word probability
new topic 2
0
docu 4
topic 1
word machine learning
new topic 2
1
docu 4
topic 2
word regression
new topic 2
2
docu 4
topic 2
word decision trees
new topic 2
3
docu 4
topic 2
word libsvm
new topic 2
0
docu 5
topic 0
word Python
new topic 0
1
docu 5
topic 0
word R
new topic 0
2
docu 5
topic 2
word Java
new to

0
docu 14
topic 2
word libsvm
new topic 0
1
docu 14
topic 0
word regression
new topic 0
2
docu 14
topic 0
word support vector machines
new topic 0
0
docu 0
topic 1
word Hadoop
new topic 1
1
docu 0
topic 1
word Big Data
new topic 1
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 1
word Java
new topic 1
4
docu 0
topic 1
word Spark
new topic 3
5
docu 0
topic 1
word Storm
new topic 3
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 1
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 0
word Python
new topic 0
1
docu 2
topic 0
word scikit-learn
new topic 0
2
docu 2
topic 0
word scipy
new topic 0
3
docu 2
topic 0
word numpy
new topic 0
4
docu 2
topic 0
word statsmodels
new topic 0
5
docu 2
topic 0
word pandas
new topic 0
0
docu 3
topic 0
word R
new topic 0
1
docu 3
topic 0
word Python
new topic 0
2
docu 3
topic 0
word sta

new topic 0
0
docu 13
topic 3
word databases
new topic 3
1
docu 13
topic 3
word HBase
new topic 3
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13
topic 3
word MySQL
new topic 3
4
docu 13
topic 3
word MongoDB
new topic 3
0
docu 14
topic 2
word libsvm
new topic 2
1
docu 14
topic 2
word regression
new topic 2
2
docu 14
topic 3
word support vector machines
new topic 2
0
docu 0
topic 2
word Hadoop
new topic 2
1
docu 0
topic 1
word Big Data
new topic 1
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 1
word Java
new topic 2
4
docu 0
topic 2
word Spark
new topic 2
5
docu 0
topic 1
word Storm
new topic 2
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 2
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 0
word Python
new topic 0
1
docu 2
topic 0
word scikit-learn
new topic 0
2
docu 2
topic 0
word scipy
new topic 1
3
d

new topic 2
1
docu 4
topic 2
word regression
new topic 2
2
docu 4
topic 3
word decision trees
new topic 2
3
docu 4
topic 2
word libsvm
new topic 2
0
docu 5
topic 1
word Python
new topic 0
1
docu 5
topic 1
word R
new topic 0
2
docu 5
topic 1
word Java
new topic 3
3
docu 5
topic 1
word C++
new topic 1
4
docu 5
topic 1
word Haskell
new topic 2
5
docu 5
topic 1
word programming languages
new topic 0
0
docu 6
topic 0
word statistics
new topic 0
1
docu 6
topic 0
word probability
new topic 0
2
docu 6
topic 0
word mathematics
new topic 0
3
docu 6
topic 0
word theory
new topic 0
0
docu 7
topic 2
word machine learning
new topic 2
1
docu 7
topic 2
word scikit-learn
new topic 2
2
docu 7
topic 2
word Mahout
new topic 2
3
docu 7
topic 2
word neural networks
new topic 2
0
docu 8
topic 2
word neural networks
new topic 2
1
docu 8
topic 1
word deep learning
new topic 1
2
docu 8
topic 1
word Big Data
new topic 1
3
docu 8
topic 1
word artificial intelligence
new topic 1
0
docu 9
topic 3
word Hadoop
new to

topic 0
word statsmodels
new topic 0
0
docu 11
topic 0
word C++
new topic 0
1
docu 11
topic 1
word deep learning
new topic 0
2
docu 11
topic 1
word artificial intelligence
new topic 1
3
docu 11
topic 0
word probability
new topic 3
0
docu 12
topic 0
word pandas
new topic 0
1
docu 12
topic 0
word R
new topic 0
2
docu 12
topic 0
word Python
new topic 0
0
docu 13
topic 3
word databases
new topic 3
1
docu 13
topic 3
word HBase
new topic 3
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13
topic 3
word MySQL
new topic 3
4
docu 13
topic 3
word MongoDB
new topic 3
0
docu 14
topic 2
word libsvm
new topic 2
1
docu 14
topic 2
word regression
new topic 2
2
docu 14
topic 2
word support vector machines
new topic 2
0
docu 0
topic 3
word Hadoop
new topic 3
1
docu 0
topic 3
word Big Data
new topic 3
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 3
word Java
new topic 3
4
docu 0
topic 3
word Spark
new topic 3
5
docu 0
topic 3
word Storm
new topic 3
6
docu 0
topic 3
word Cassandra
new topic 3


word Postgres
new topic 3
3
docu 13
topic 3
word MySQL
new topic 3
4
docu 13
topic 3
word MongoDB
new topic 3
0
docu 14
topic 3
word libsvm
new topic 3
1
docu 14
topic 3
word regression
new topic 3
2
docu 14
topic 3
word support vector machines
new topic 2
0
docu 0
topic 3
word Hadoop
new topic 3
1
docu 0
topic 1
word Big Data
new topic 3
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 3
word Java
new topic 3
4
docu 0
topic 3
word Spark
new topic 3
5
docu 0
topic 3
word Storm
new topic 2
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 1
word Python
new topic 1
1
docu 2
topic 1
word scikit-learn
new topic 1
2
docu 2
topic 1
word scipy
new topic 1
3
docu 2
topic 1
word numpy
new topic 1
4
docu 2
topic 1
word statsmodels
new topic 1
5
docu 2
topic 1
word pandas
new 

docu 5
topic 3
word Java
new topic 3
3
docu 5
topic 0
word C++
new topic 0
4
docu 5
topic 3
word Haskell
new topic 3
5
docu 5
topic 3
word programming languages
new topic 0
0
docu 6
topic 0
word statistics
new topic 0
1
docu 6
topic 0
word probability
new topic 0
2
docu 6
topic 2
word mathematics
new topic 0
3
docu 6
topic 0
word theory
new topic 0
0
docu 7
topic 2
word machine learning
new topic 2
1
docu 7
topic 1
word scikit-learn
new topic 1
2
docu 7
topic 2
word Mahout
new topic 1
3
docu 7
topic 1
word neural networks
new topic 1
0
docu 8
topic 1
word neural networks
new topic 2
1
docu 8
topic 2
word deep learning
new topic 2
2
docu 8
topic 2
word Big Data
new topic 2
3
docu 8
topic 2
word artificial intelligence
new topic 2
0
docu 9
topic 3
word Hadoop
new topic 3
1
docu 9
topic 3
word Java
new topic 3
2
docu 9
topic 3
word MapReduce
new topic 3
3
docu 9
topic 3
word Big Data
new topic 3
0
docu 10
topic 0
word statistics
new topic 0
1
docu 10
topic 0
word R
new topic 0
2
docu 10
t

word libsvm
new topic 2
1
docu 14
topic 2
word regression
new topic 2
2
docu 14
topic 2
word support vector machines
new topic 2
0
docu 0
topic 3
word Hadoop
new topic 3
1
docu 0
topic 3
word Big Data
new topic 3
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 3
word Java
new topic 3
4
docu 0
topic 3
word Spark
new topic 3
5
docu 0
topic 3
word Storm
new topic 1
6
docu 0
topic 1
word Cassandra
new topic 1
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 1
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 0
word Python
new topic 0
1
docu 2
topic 2
word scikit-learn
new topic 1
2
docu 2
topic 1
word scipy
new topic 3
3
docu 2
topic 0
word numpy
new topic 1
4
docu 2
topic 0
word statsmodels
new topic 0
5
docu 2
topic 0
word pandas
new topic 0
0
docu 3
topic 0
word R
new topic 0
1
docu 3
topic 0
word Python
new topic 0
2
docu 3
topic 0
word statistics
new topic 

topic 1
word machine learning
new topic 2
1
docu 7
topic 0
word scikit-learn
new topic 1
2
docu 7
topic 2
word Mahout
new topic 3
3
docu 7
topic 1
word neural networks
new topic 1
0
docu 8
topic 1
word neural networks
new topic 1
1
docu 8
topic 1
word deep learning
new topic 1
2
docu 8
topic 1
word Big Data
new topic 1
3
docu 8
topic 1
word artificial intelligence
new topic 1
0
docu 9
topic 0
word Hadoop
new topic 3
1
docu 9
topic 0
word Java
new topic 3
2
docu 9
topic 3
word MapReduce
new topic 3
3
docu 9
topic 3
word Big Data
new topic 1
0
docu 10
topic 0
word statistics
new topic 0
1
docu 10
topic 0
word R
new topic 0
2
docu 10
topic 0
word statsmodels
new topic 0
0
docu 11
topic 0
word C++
new topic 0
1
docu 11
topic 0
word deep learning
new topic 0
2
docu 11
topic 0
word artificial intelligence
new topic 3
3
docu 11
topic 0
word probability
new topic 0
0
docu 12
topic 0
word pandas
new topic 1
1
docu 12
topic 0
word R
new topic 0
2
docu 12
topic 0
word Python
new topic 0
0
docu 13

topic 1
word probability
new topic 1
0
docu 12
topic 0
word pandas
new topic 0
1
docu 12
topic 0
word R
new topic 0
2
docu 12
topic 0
word Python
new topic 0
0
docu 13
topic 2
word databases
new topic 3
1
docu 13
topic 3
word HBase
new topic 3
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13
topic 2
word MySQL
new topic 3
4
docu 13
topic 3
word MongoDB
new topic 3
0
docu 14
topic 2
word libsvm
new topic 2
1
docu 14
topic 2
word regression
new topic 2
2
docu 14
topic 2
word support vector machines
new topic 2
0
docu 0
topic 3
word Hadoop
new topic 3
1
docu 0
topic 3
word Big Data
new topic 3
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 3
word Java
new topic 3
4
docu 0
topic 3
word Spark
new topic 3
5
docu 0
topic 3
word Storm
new topic 3
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgr

docu 6
topic 1
word probability
new topic 1
2
docu 6
topic 0
word mathematics
new topic 1
3
docu 6
topic 1
word theory
new topic 0
0
docu 7
topic 2
word machine learning
new topic 2
1
docu 7
topic 0
word scikit-learn
new topic 2
2
docu 7
topic 0
word Mahout
new topic 2
3
docu 7
topic 2
word neural networks
new topic 2
0
docu 8
topic 2
word neural networks
new topic 1
1
docu 8
topic 1
word deep learning
new topic 1
2
docu 8
topic 1
word Big Data
new topic 1
3
docu 8
topic 1
word artificial intelligence
new topic 1
0
docu 9
topic 3
word Hadoop
new topic 3
1
docu 9
topic 3
word Java
new topic 3
2
docu 9
topic 3
word MapReduce
new topic 0
3
docu 9
topic 3
word Big Data
new topic 3
0
docu 10
topic 0
word statistics
new topic 0
1
docu 10
topic 0
word R
new topic 0
2
docu 10
topic 0
word statsmodels
new topic 0
0
docu 11
topic 1
word C++
new topic 1
1
docu 11
topic 1
word deep learning
new topic 1
2
docu 11
topic 1
word artificial intelligence
new topic 1
3
docu 11
topic 1
word probability
ne

topic 3
word HBase
new topic 3
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13
topic 3
word MySQL
new topic 3
4
docu 13
topic 3
word MongoDB
new topic 3
0
docu 14
topic 2
word libsvm
new topic 2
1
docu 14
topic 2
word regression
new topic 2
2
docu 14
topic 2
word support vector machines
new topic 2
0
docu 0
topic 3
word Hadoop
new topic 3
1
docu 0
topic 3
word Big Data
new topic 3
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 1
word Java
new topic 3
4
docu 0
topic 3
word Spark
new topic 3
5
docu 0
topic 3
word Storm
new topic 3
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 2
1
docu 1
topic 3
word MongoDB
new topic 2
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 0
word Python
new topic 0
1
docu 2
topic 2
word scikit-learn
new topic 2
2
docu 2
topic 0
word scipy
new topic 0
3
docu 2
topic 2
word numpy
new topic 0
4
docu 2
topic 0
word statsmod

1
docu 12
topic 0
word R
new topic 0
2
docu 12
topic 0
word Python
new topic 0
0
docu 13
topic 3
word databases
new topic 3
1
docu 13
topic 3
word HBase
new topic 3
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13
topic 3
word MySQL
new topic 3
4
docu 13
topic 3
word MongoDB
new topic 3
0
docu 14
topic 2
word libsvm
new topic 2
1
docu 14
topic 2
word regression
new topic 2
2
docu 14
topic 2
word support vector machines
new topic 2
0
docu 0
topic 3
word Hadoop
new topic 2
1
docu 0
topic 1
word Big Data
new topic 1
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 3
word Java
new topic 3
4
docu 0
topic 1
word Spark
new topic 3
5
docu 0
topic 1
word Storm
new topic 3
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 0
word Python
new topic 0
1
docu 2
topic 0
word s

0
docu 8
topic 1
word neural networks
new topic 1
1
docu 8
topic 1
word deep learning
new topic 1
2
docu 8
topic 1
word Big Data
new topic 3
3
docu 8
topic 1
word artificial intelligence
new topic 1
0
docu 9
topic 3
word Hadoop
new topic 3
1
docu 9
topic 3
word Java
new topic 3
2
docu 9
topic 3
word MapReduce
new topic 3
3
docu 9
topic 3
word Big Data
new topic 1
0
docu 10
topic 0
word statistics
new topic 0
1
docu 10
topic 0
word R
new topic 0
2
docu 10
topic 0
word statsmodels
new topic 0
0
docu 11
topic 0
word C++
new topic 0
1
docu 11
topic 1
word deep learning
new topic 1
2
docu 11
topic 1
word artificial intelligence
new topic 1
3
docu 11
topic 0
word probability
new topic 0
0
docu 12
topic 0
word pandas
new topic 0
1
docu 12
topic 0
word R
new topic 0
2
docu 12
topic 0
word Python
new topic 0
0
docu 13
topic 3
word databases
new topic 3
1
docu 13
topic 3
word HBase
new topic 3
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13
topic 3
word MySQL
new topic 3
4
docu 13
topic 3


new topic 0
1
docu 3
topic 0
word Python
new topic 0
2
docu 3
topic 0
word statistics
new topic 0
3
docu 3
topic 3
word regression
new topic 2
4
docu 3
topic 3
word probability
new topic 1
0
docu 4
topic 2
word machine learning
new topic 2
1
docu 4
topic 2
word regression
new topic 2
2
docu 4
topic 0
word decision trees
new topic 2
3
docu 4
topic 2
word libsvm
new topic 2
0
docu 5
topic 0
word Python
new topic 0
1
docu 5
topic 0
word R
new topic 0
2
docu 5
topic 3
word Java
new topic 1
3
docu 5
topic 0
word C++
new topic 0
4
docu 5
topic 0
word Haskell
new topic 0
5
docu 5
topic 0
word programming languages
new topic 0
0
docu 6
topic 1
word statistics
new topic 1
1
docu 6
topic 1
word probability
new topic 1
2
docu 6
topic 1
word mathematics
new topic 1
3
docu 6
topic 1
word theory
new topic 1
0
docu 7
topic 2
word machine learning
new topic 2
1
docu 7
topic 2
word scikit-learn
new topic 2
2
docu 7
topic 2
word Mahout
new topic 2
3
docu 7
topic 2
word neural networks
new topic 1
0
docu

topic 0
word scipy
new topic 0
3
docu 2
topic 0
word numpy
new topic 0
4
docu 2
topic 0
word statsmodels
new topic 0
5
docu 2
topic 0
word pandas
new topic 0
0
docu 3
topic 0
word R
new topic 0
1
docu 3
topic 1
word Python
new topic 0
2
docu 3
topic 1
word statistics
new topic 1
3
docu 3
topic 1
word regression
new topic 1
4
docu 3
topic 1
word probability
new topic 1
0
docu 4
topic 2
word machine learning
new topic 2
1
docu 4
topic 2
word regression
new topic 2
2
docu 4
topic 2
word decision trees
new topic 0
3
docu 4
topic 2
word libsvm
new topic 2
0
docu 5
topic 0
word Python
new topic 0
1
docu 5
topic 0
word R
new topic 0
2
docu 5
topic 3
word Java
new topic 3
3
docu 5
topic 0
word C++
new topic 3
4
docu 5
topic 3
word Haskell
new topic 3
5
docu 5
topic 3
word programming languages
new topic 0
0
docu 6
topic 1
word statistics
new topic 1
1
docu 6
topic 1
word probability
new topic 1
2
docu 6
topic 1
word mathematics
new topic 1
3
docu 6
topic 1
word theory
new topic 1
0
docu 7
topi

docu 5
topic 0
word R
new topic 0
2
docu 5
topic 3
word Java
new topic 3
3
docu 5
topic 0
word C++
new topic 3
4
docu 5
topic 0
word Haskell
new topic 0
5
docu 5
topic 3
word programming languages
new topic 0
0
docu 6
topic 1
word statistics
new topic 1
1
docu 6
topic 1
word probability
new topic 1
2
docu 6
topic 1
word mathematics
new topic 1
3
docu 6
topic 1
word theory
new topic 1
0
docu 7
topic 2
word machine learning
new topic 2
1
docu 7
topic 2
word scikit-learn
new topic 1
2
docu 7
topic 1
word Mahout
new topic 2
3
docu 7
topic 2
word neural networks
new topic 2
0
docu 8
topic 2
word neural networks
new topic 1
1
docu 8
topic 1
word deep learning
new topic 1
2
docu 8
topic 1
word Big Data
new topic 1
3
docu 8
topic 1
word artificial intelligence
new topic 1
0
docu 9
topic 3
word Hadoop
new topic 3
1
docu 9
topic 3
word Java
new topic 3
2
docu 9
topic 3
word MapReduce
new topic 3
3
docu 9
topic 3
word Big Data
new topic 3
0
docu 10
topic 1
word statistics
new topic 1
1
docu 10
to

new topic 1
0
docu 0
topic 3
word Hadoop
new topic 3
1
docu 0
topic 3
word Big Data
new topic 3
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 3
word Java
new topic 3
4
docu 0
topic 3
word Spark
new topic 3
5
docu 0
topic 3
word Storm
new topic 3
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 0
word Python
new topic 0
1
docu 2
topic 2
word scikit-learn
new topic 2
2
docu 2
topic 0
word scipy
new topic 0
3
docu 2
topic 2
word numpy
new topic 2
4
docu 2
topic 0
word statsmodels
new topic 0
5
docu 2
topic 0
word pandas
new topic 0
0
docu 3
topic 0
word R
new topic 0
1
docu 3
topic 0
word Python
new topic 0
2
docu 3
topic 0
word statistics
new topic 0
3
docu 3
topic 0
word regression
new topic 0
4
docu 3
topic 1
word probability
new topic 0
0
docu 4
topic 2
word ma

topic 2
word Mahout
new topic 2
3
docu 7
topic 2
word neural networks
new topic 2
0
docu 8
topic 1
word neural networks
new topic 2
1
docu 8
topic 1
word deep learning
new topic 1
2
docu 8
topic 3
word Big Data
new topic 2
3
docu 8
topic 1
word artificial intelligence
new topic 1
0
docu 9
topic 3
word Hadoop
new topic 3
1
docu 9
topic 3
word Java
new topic 3
2
docu 9
topic 3
word MapReduce
new topic 3
3
docu 9
topic 3
word Big Data
new topic 2
0
docu 10
topic 0
word statistics
new topic 0
1
docu 10
topic 0
word R
new topic 0
2
docu 10
topic 0
word statsmodels
new topic 0
0
docu 11
topic 1
word C++
new topic 0
1
docu 11
topic 1
word deep learning
new topic 1
2
docu 11
topic 1
word artificial intelligence
new topic 0
3
docu 11
topic 0
word probability
new topic 0
0
docu 12
topic 0
word pandas
new topic 0
1
docu 12
topic 0
word R
new topic 0
2
docu 12
topic 0
word Python
new topic 0
0
docu 13
topic 3
word databases
new topic 3
1
docu 13
topic 3
word HBase
new topic 3
2
docu 13
topic 3
wor

topic 3
word Java
new topic 3
2
docu 9
topic 3
word MapReduce
new topic 3
3
docu 9
topic 3
word Big Data
new topic 3
0
docu 10
topic 0
word statistics
new topic 0
1
docu 10
topic 0
word R
new topic 0
2
docu 10
topic 0
word statsmodels
new topic 0
0
docu 11
topic 0
word C++
new topic 0
1
docu 11
topic 2
word deep learning
new topic 2
2
docu 11
topic 2
word artificial intelligence
new topic 2
3
docu 11
topic 0
word probability
new topic 0
0
docu 12
topic 0
word pandas
new topic 0
1
docu 12
topic 0
word R
new topic 0
2
docu 12
topic 0
word Python
new topic 0
0
docu 13
topic 3
word databases
new topic 0
1
docu 13
topic 3
word HBase
new topic 0
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13
topic 3
word MySQL
new topic 3
4
docu 13
topic 3
word MongoDB
new topic 3
0
docu 14
topic 1
word libsvm
new topic 1
1
docu 14
topic 1
word regression
new topic 1
2
docu 14
topic 1
word support vector machines
new topic 1
0
docu 0
topic 3
word Hadoop
new topic 3
1
docu 0
topic 3
word Big Data
new t

topic 3
word Spark
new topic 3
5
docu 0
topic 3
word Storm
new topic 3
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 0
word Python
new topic 0
1
docu 2
topic 0
word scikit-learn
new topic 0
2
docu 2
topic 0
word scipy
new topic 0
3
docu 2
topic 0
word numpy
new topic 0
4
docu 2
topic 0
word statsmodels
new topic 0
5
docu 2
topic 0
word pandas
new topic 0
0
docu 3
topic 0
word R
new topic 0
1
docu 3
topic 0
word Python
new topic 0
2
docu 3
topic 0
word statistics
new topic 0
3
docu 3
topic 1
word regression
new topic 1
4
docu 3
topic 2
word probability
new topic 2
0
docu 4
topic 1
word machine learning
new topic 1
1
docu 4
topic 1
word regression
new topic 1
2
docu 4
topic 1
word decision trees
new topic 1
3
docu 4
topic 1
word libsvm
new topic 1
0
docu 5
topic 0
word 

docu 2
topic 0
word scikit-learn
new topic 0
2
docu 2
topic 0
word scipy
new topic 0
3
docu 2
topic 0
word numpy
new topic 0
4
docu 2
topic 0
word statsmodels
new topic 0
5
docu 2
topic 0
word pandas
new topic 0
0
docu 3
topic 0
word R
new topic 0
1
docu 3
topic 0
word Python
new topic 0
2
docu 3
topic 2
word statistics
new topic 2
3
docu 3
topic 1
word regression
new topic 0
4
docu 3
topic 2
word probability
new topic 2
0
docu 4
topic 1
word machine learning
new topic 1
1
docu 4
topic 1
word regression
new topic 1
2
docu 4
topic 1
word decision trees
new topic 1
3
docu 4
topic 1
word libsvm
new topic 1
0
docu 5
topic 0
word Python
new topic 0
1
docu 5
topic 0
word R
new topic 0
2
docu 5
topic 2
word Java
new topic 0
3
docu 5
topic 2
word C++
new topic 2
4
docu 5
topic 2
word Haskell
new topic 0
5
docu 5
topic 0
word programming languages
new topic 2
0
docu 6
topic 2
word statistics
new topic 2
1
docu 6
topic 2
word probability
new topic 2
2
docu 6
topic 2
word mathematics
new topic 2


docu 8
topic 2
word neural networks
new topic 3
1
docu 8
topic 2
word deep learning
new topic 3
2
docu 8
topic 3
word Big Data
new topic 3
3
docu 8
topic 2
word artificial intelligence
new topic 2
0
docu 9
topic 3
word Hadoop
new topic 3
1
docu 9
topic 3
word Java
new topic 3
2
docu 9
topic 3
word MapReduce
new topic 3
3
docu 9
topic 3
word Big Data
new topic 3
0
docu 10
topic 0
word statistics
new topic 0
1
docu 10
topic 0
word R
new topic 0
2
docu 10
topic 0
word statsmodels
new topic 0
0
docu 11
topic 2
word C++
new topic 2
1
docu 11
topic 2
word deep learning
new topic 3
2
docu 11
topic 2
word artificial intelligence
new topic 2
3
docu 11
topic 0
word probability
new topic 0
0
docu 12
topic 0
word pandas
new topic 0
1
docu 12
topic 0
word R
new topic 0
2
docu 12
topic 0
word Python
new topic 0
0
docu 13
topic 3
word databases
new topic 3
1
docu 13
topic 3
word HBase
new topic 3
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13
topic 3
word MySQL
new topic 3
4
docu 13
topic 3
wo

docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 1
word NoSQL
new topic 1
1
docu 1
topic 1
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 1
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 1
0
docu 2
topic 2
word Python
new topic 0
1
docu 2
topic 2
word scikit-learn
new topic 2
2
docu 2
topic 2
word scipy
new topic 2
3
docu 2
topic 2
word numpy
new topic 2
4
docu 2
topic 2
word statsmodels
new topic 0
5
docu 2
topic 2
word pandas
new topic 1
0
docu 3
topic 0
word R
new topic 0
1
docu 3
topic 0
word Python
new topic 0
2
docu 3
topic 0
word statistics
new topic 1
3
docu 3
topic 0
word regression
new topic 1
4
docu 3
topic 0
word probability
new topic 1
0
docu 4
topic 2
word machine learning
new topic 2
1
docu 4
topic 0
word regression
new topic 2
2
docu 4
topic 2
word decision trees
new topic 2
3
docu 4
topic 2
word libsvm
new topic 1
0
docu 5
topic 0
word Python
new topic 0
1
docu 5
topic 0
word R
new topic 0
2
docu 5
topic 3
w

new topic 3
0
docu 14
topic 2
word libsvm
new topic 2
1
docu 14
topic 2
word regression
new topic 2
2
docu 14
topic 1
word support vector machines
new topic 0
0
docu 0
topic 3
word Hadoop
new topic 3
1
docu 0
topic 3
word Big Data
new topic 3
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 3
word Java
new topic 3
4
docu 0
topic 3
word Spark
new topic 3
5
docu 0
topic 3
word Storm
new topic 3
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 1
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 0
word Python
new topic 0
1
docu 2
topic 0
word scikit-learn
new topic 0
2
docu 2
topic 0
word scipy
new topic 0
3
docu 2
topic 0
word numpy
new topic 2
4
docu 2
topic 0
word statsmodels
new topic 0
5
docu 2
topic 0
word pandas
new topic 0
0
docu 3
topic 0
word R
new topic 0
1
docu 3
topic 0
word Python
new topic 1
2
docu 3
topi

word statistics
new topic 0
1
docu 10
topic 0
word R
new topic 0
2
docu 10
topic 0
word statsmodels
new topic 0
0
docu 11
topic 0
word C++
new topic 0
1
docu 11
topic 2
word deep learning
new topic 1
2
docu 11
topic 2
word artificial intelligence
new topic 0
3
docu 11
topic 0
word probability
new topic 0
0
docu 12
topic 0
word pandas
new topic 0
1
docu 12
topic 0
word R
new topic 0
2
docu 12
topic 0
word Python
new topic 0
0
docu 13
topic 3
word databases
new topic 3
1
docu 13
topic 3
word HBase
new topic 3
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13
topic 3
word MySQL
new topic 3
4
docu 13
topic 3
word MongoDB
new topic 3
0
docu 14
topic 2
word libsvm
new topic 2
1
docu 14
topic 2
word regression
new topic 2
2
docu 14
topic 2
word support vector machines
new topic 2
0
docu 0
topic 3
word Hadoop
new topic 3
1
docu 0
topic 3
word Big Data
new topic 3
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 3
word Java
new topic 3
4
docu 0
topic 3
word Spark
new topic 3
5
docu 0


new topic 3
3
docu 13
topic 3
word MySQL
new topic 3
4
docu 13
topic 3
word MongoDB
new topic 3
0
docu 14
topic 2
word libsvm
new topic 2
1
docu 14
topic 2
word regression
new topic 2
2
docu 14
topic 2
word support vector machines
new topic 2
0
docu 0
topic 2
word Hadoop
new topic 2
1
docu 0
topic 2
word Big Data
new topic 2
2
docu 0
topic 1
word HBase
new topic 3
3
docu 0
topic 1
word Java
new topic 1
4
docu 0
topic 2
word Spark
new topic 1
5
docu 0
topic 1
word Storm
new topic 1
6
docu 0
topic 1
word Cassandra
new topic 1
0
docu 1
topic 3
word NoSQL
new topic 1
1
docu 1
topic 1
word MongoDB
new topic 3
2
docu 1
topic 1
word Cassandra
new topic 1
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 0
word Python
new topic 0
1
docu 2
topic 3
word scikit-learn
new topic 0
2
docu 2
topic 0
word scipy
new topic 0
3
docu 2
topic 0
word numpy
new topic 0
4
docu 2
topic 0
word statsmodels
new topic 0
5
docu 2
topic 0
word pandas
new topic 0
0
docu

word Python
new topic 0
0
docu 13
topic 3
word databases
new topic 3
1
docu 13
topic 3
word HBase
new topic 3
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13
topic 3
word MySQL
new topic 3
4
docu 13
topic 3
word MongoDB
new topic 3
0
docu 14
topic 1
word libsvm
new topic 1
1
docu 14
topic 1
word regression
new topic 1
2
docu 14
topic 1
word support vector machines
new topic 1
0
docu 0
topic 2
word Hadoop
new topic 2
1
docu 0
topic 2
word Big Data
new topic 2
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 2
word Java
new topic 2
4
docu 0
topic 2
word Spark
new topic 2
5
docu 0
topic 2
word Storm
new topic 3
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 0
word Python
new topic 0
1
docu 2
topic 0
word scikit-learn
new topic 0
2
docu 2
topic 0
word scipy
new

topic 1
word deep learning
new topic 1
2
docu 11
topic 1
word artificial intelligence
new topic 1
3
docu 11
topic 1
word probability
new topic 1
0
docu 12
topic 2
word pandas
new topic 0
1
docu 12
topic 0
word R
new topic 0
2
docu 12
topic 0
word Python
new topic 0
0
docu 13
topic 3
word databases
new topic 3
1
docu 13
topic 3
word HBase
new topic 3
2
docu 13
topic 3
word Postgres
new topic 3
3
docu 13
topic 3
word MySQL
new topic 3
4
docu 13
topic 3
word MongoDB
new topic 3
0
docu 14
topic 1
word libsvm
new topic 1
1
docu 14
topic 1
word regression
new topic 1
2
docu 14
topic 2
word support vector machines
new topic 1
0
docu 0
topic 3
word Hadoop
new topic 3
1
docu 0
topic 3
word Big Data
new topic 3
2
docu 0
topic 3
word HBase
new topic 3
3
docu 0
topic 3
word Java
new topic 3
4
docu 0
topic 3
word Spark
new topic 3
5
docu 0
topic 3
word Storm
new topic 3
6
docu 0
topic 3
word Cassandra
new topic 3
0
docu 1
topic 3
word NoSQL
new topic 1
1
docu 1
topic 3
word MongoDB
new topic 1
2
do

5
docu 2
topic 0
word pandas
new topic 0
0
docu 3
topic 0
word R
new topic 0
1
docu 3
topic 0
word Python
new topic 0
2
docu 3
topic 0
word statistics
new topic 0
3
docu 3
topic 0
word regression
new topic 1
4
docu 3
topic 0
word probability
new topic 0
0
docu 4
topic 2
word machine learning
new topic 1
1
docu 4
topic 1
word regression
new topic 1
2
docu 4
topic 1
word decision trees
new topic 1
3
docu 4
topic 1
word libsvm
new topic 1
0
docu 5
topic 0
word Python
new topic 3
1
docu 5
topic 3
word R
new topic 1
2
docu 5
topic 3
word Java
new topic 3
3
docu 5
topic 1
word C++
new topic 1
4
docu 5
topic 3
word Haskell
new topic 1
5
docu 5
topic 3
word programming languages
new topic 1
0
docu 6
topic 0
word statistics
new topic 0
1
docu 6
topic 0
word probability
new topic 0
2
docu 6
topic 0
word mathematics
new topic 0
3
docu 6
topic 0
word theory
new topic 0
0
docu 7
topic 2
word machine learning
new topic 1
1
docu 7
topic 2
word scikit-learn
new topic 1
2
docu 7
topic 2
word Mahout
new

2
docu 14
topic 1
word support vector machines
new topic 1
0
docu 0
topic 2
word Hadoop
new topic 2
1
docu 0
topic 3
word Big Data
new topic 2
2
docu 0
topic 3
word HBase
new topic 2
3
docu 0
topic 2
word Java
new topic 2
4
docu 0
topic 2
word Spark
new topic 2
5
docu 0
topic 2
word Storm
new topic 2
6
docu 0
topic 1
word Cassandra
new topic 2
0
docu 1
topic 3
word NoSQL
new topic 3
1
docu 1
topic 3
word MongoDB
new topic 3
2
docu 1
topic 3
word Cassandra
new topic 3
3
docu 1
topic 3
word HBase
new topic 3
4
docu 1
topic 3
word Postgres
new topic 3
0
docu 2
topic 1
word Python
new topic 1
1
docu 2
topic 1
word scikit-learn
new topic 1
2
docu 2
topic 1
word scipy
new topic 1
3
docu 2
topic 1
word numpy
new topic 3
4
docu 2
topic 1
word statsmodels
new topic 1
5
docu 2
topic 1
word pandas
new topic 2
0
docu 3
topic 1
word R
new topic 2
1
docu 3
topic 2
word Python
new topic 2
2
docu 3
topic 2
word statistics
new topic 0
3
docu 3
topic 1
word regression
new topic 0
4
docu 3
topic 2
word p

In [114]:
topic_word_counts

[Counter({'Big Data': 1,
          'C++': 2,
          'Cassandra': 0,
          'HBase': 0,
          'Hadoop': 0,
          'Haskell': 0,
          'Java': 0,
          'Mahout': 0,
          'MapReduce': 0,
          'MongoDB': 0,
          'MySQL': 0,
          'NoSQL': 0,
          'Postgres': 0,
          'Python': 0,
          'R': 0,
          'Spark': 0,
          'Storm': 0,
          'artificial intelligence': 2,
          'databases': 0,
          'decision trees': 1,
          'deep learning': 2,
          'libsvm': 0,
          'machine learning': 2,
          'mathematics': 0,
          'neural networks': 2,
          'numpy': 0,
          'pandas': 0,
          'probability': 1,
          'programming languages': 1,
          'regression': 0,
          'scikit-learn': 0,
          'scipy': 0,
          'statistics': 0,
          'statsmodels': 0,
          'support vector machines': 0,
          'theory': 0}),
 Counter({'Big Data': 0,
          'C++': 0,
          'Cass