In [7]:
corpus = [
    ["apple", "banana", "apple", "fruit", "fruit", "banana"],
    ["dog", "cat", "dog", "animal", "pet", "cat"],
    ["banana", "fruit", "apple", "orange", "fruit", "banana"]
]


In [8]:
def doc_topic(no_docs:int , no_topics : int ):
    table = {f"doc{i}" : [0]*no_topics for i in range(no_docs)}
    return table

In [9]:
def word_topic(no_topics:int , words : list ):
    table = {f"{word}" : [0]*no_topics for word in words}
    return table

In [10]:
no_topics = 2
topic_list = [ i for i in range(no_topics)]

In [11]:
import random

In [12]:
assignment = [ [ 0 for word in doc] for doc in corpus]
assignment

[[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]

In [13]:
def initialize(corpus , topic_list):

    for i,doc in enumerate(corpus):
        for j ,word in enumerate(doc) :

            assignment[i][j] = random.choice(topic_list)

    print(assignment)

In [14]:
def fill_tables(word_topic_table , doc_topic_table):

    for i , doc in enumerate(corpus):
        for j ,word in enumerate(doc) :

            topic = assignment[i][j]
            
            word_topic_table[word][topic] += 1

            doc_topic_table[f"doc{i}"][topic] += 1

    print(doc_topic_table)
    print(word_topic_table)

In [15]:
vocab = list(set([word for doc in corpus for word in doc ]))
vocab

['banana', 'fruit', 'dog', 'apple', 'cat', 'orange', 'animal', 'pet']

In [16]:
word_topic_table = word_topic(2 ,vocab)
doc_topic_table = doc_topic(3,2)
doc_topic_table

{'doc0': [0, 0], 'doc1': [0, 0], 'doc2': [0, 0]}

In [17]:
initialize(corpus , topic_list)

[[1, 0, 0, 0, 0, 0], [0, 0, 1, 1, 0, 1], [0, 1, 0, 0, 1, 1]]


In [18]:
fill_tables(word_topic_table ,doc_topic_table)

{'doc0': [5, 1], 'doc1': [3, 3], 'doc2': [3, 3]}
{'banana': [3, 1], 'fruit': [2, 2], 'dog': [1, 1], 'apple': [2, 1], 'cat': [1, 1], 'orange': [1, 0], 'animal': [0, 1], 'pet': [1, 0]}


In [19]:
def normalize(prob : list):
    total = sum(prob)
    prob_list = [ i/total for i in prob]
    return prob_list

In [20]:
def sample(choice:list , weight:list):
    #Normalize
    weight = normalize(weight)
    choice = random.choices(choice , weights = weight)
    return int(choice[0]) , weight

In [21]:
def iteration(alpha = 0.1 , beta=0.01, k = no_topics):
    
    for i , doc in enumerate(corpus):
        for j , word in enumerate(doc):

            current_topic = assignment[i][j]

            # Decreament step
            word_topic_table[word][current_topic] -= 1

            doc_topic_table[f"doc{i}"][current_topic]-= 1

            # temporary list to store probability of word getting reassigned a topic
            probability_list = []
            # Re-assigning word to topic
            for topic in range(no_topics):
                
                # Values
                # count of word 'w' in topic 'k'
                n_wk = word_topic_table[word][topic]
                
                #total number of words in topic k
                n_k = sum([doc_topic_table[document][topic] for document in doc_topic_table]) 
                
                # count of topic k in document d
                n_kd = doc_topic_table[f"doc{i}"][topic]
                
                # total words in document d
                n_d = len(doc)
                
                # total number of words in corpus
                v = len(vocab)
                
                # formula
                probability = (n_wk + beta)/(n_k + v * beta) * (n_kd + alpha)/(n_d + k *alpha)
                probability_list.append(probability)
                
            # Normalization and sampling
            sampled_topic , prob = sample(topic_list , weight=probability_list)

            # adding
            word_topic_table[word][sampled_topic] += 1
            doc_topic_table[f"doc{i}"][sampled_topic] += 1

            # re-assigning
            assignment[i][j] = sampled_topic

            # fill the probability table
            print(prob)
            probability_table[word] = prob

In [24]:
for _ in range(100):
    iteration(k=no_topics)

[0.9998222569010201, 0.00017774309897980597]
[0.9823614598812581, 0.01763854011874192]
[0.9998222569010201, 0.00017774309897980597]
[0.9336094983598231, 0.06639050164017701]
[0.9336094983598231, 0.06639050164017701]
[0.838969670930371, 0.16103032906962905]
[0.004688865930223317, 0.9953111340697767]
[0.0023582377532425773, 0.9976417622467574]
[0.0023582377532425773, 0.9976417622467574]
[0.19273137305924193, 0.807268626940758]
[0.02155950232690664, 0.9784404976730934]
[0.0002181163396042582, 0.9997818836603958]
[0.6574607866224853, 0.3425392133775147]
[0.32643144928571816, 0.6735685507142818]
[0.9899269249481557, 0.010073075051844332]
[0.3283759025757086, 0.6716240974242913]
[0.32643144928571816, 0.6735685507142818]
[0.6574607866224853, 0.3425392133775147]
[0.999063347443256, 0.0009366525567439709]
[0.9135001247042398, 0.08649987529576021]
[0.999063347443256, 0.0009366525567439709]
[0.017324553681467656, 0.9826754463185323]
[0.017324553681467656, 0.9826754463185323]
[0.8426111574078821, 

In [23]:
probability_table = { word : [0]*no_topics for word in vocab}
probability_table

{'banana': [0, 0],
 'fruit': [0, 0],
 'dog': [0, 0],
 'apple': [0, 0],
 'cat': [0, 0],
 'orange': [0, 0],
 'animal': [0, 0],
 'pet': [0, 0]}

In [25]:
print(probability_table)

{'banana': [0.9998813007551757, 0.000118699244824263], 'fruit': [0.9998813007551757, 0.000118699244824263], 'dog': [8.163375497917696e-05, 0.9999183662450208], 'apple': [0.9998222569010201, 0.00017774309897980597], 'cat': [8.163375497917696e-05, 0.9999183662450208], 'orange': [0.9655000622742558, 0.03449993772574418], 'animal': [0.008178247150492627, 0.9918217528495074], 'pet': [0.008178247150492627, 0.9918217528495074]}


In [26]:
def make_prob():
    new_table = {f"topic{i}" : [0]*len(vocab) for i in range(no_topics)}
    for i , word in enumerate(probability_table):
        for j , prob in enumerate(probability_table[word]):
                new_table[f"topic{j}"][i] = f"{prob}*{word}"
    return new_table

In [28]:
def show_topics():
    show_table = make_prob()
            
    for topic in show_table:
        print("Topic \n : ")
        print(show_table[topic])
        print("\n")
        print("------------+------------")

In [29]:
show_topics()

Topic 
 : 
['0.9998813007551757*banana', '0.9998813007551757*fruit', '8.163375497917696e-05*dog', '0.9998222569010201*apple', '8.163375497917696e-05*cat', '0.9655000622742558*orange', '0.008178247150492627*animal', '0.008178247150492627*pet']


------------+------------
Topic 
 : 
['0.000118699244824263*banana', '0.000118699244824263*fruit', '0.9999183662450208*dog', '0.00017774309897980597*apple', '0.9999183662450208*cat', '0.03449993772574418*orange', '0.9918217528495074*animal', '0.9918217528495074*pet']


------------+------------
