In [46]:
# Algorithm for autocompletion of sentences:

import random
from collections import Counter
 
# This function NextWordFrequency calculates the frequency of the (i+1)th word in the whole corpus,
# where i is the index of the word.
 
def NextWordFrequency(corpus, sentence):
    sen_len = len(sentence.split())
    word_list = []

    for i in range(len(corpus) - sen_len):
        if ' '.join(corpus[i : i + sen_len]).lower() == sentence.lower():
            word_list.append(corpus[i + sen_len])

    return dict(Counter(word_list))

 
# This function CumDistFn calculate the CDF(Cumulative Distribution Function) of each word in the
# Counter dictionary.
 
def CumDistFn(d):
    prob_sum, sum_val = 0, sum(d.values())
    for x, y in d.items():
 
        # Calculate the PMF(Probability Mass Function) of each word 
        # by dividing the frequency by total of all frequencies 
        # then add all the PMFs till ith word 
        # which is the CDF of the ith word.
         
        pmf = y / sum_val
        prob_sum += pmf
        d[x] = prob_sum
 
    # Returning the cdf dictionary
     
    return d
 
def main(sent, x, n):
    all_corpus = []

    # Reading multiple text files
    files = ["file1test.txt"]
    for file in files:
        with open(file, 'r', encoding="utf8") as f:
            all_corpus.extend(f.read().split())

    completed_sentence = sent + ' '
    current_word = sent

    while len(completed_sentence.split()) < n:
        func_out = NextWordFrequency(all_corpus, current_word)

        if not func_out:
            return  # If unable to complete sentence, exit function without printing

        cdf_dict = CumDistFn(func_out)
        rand = random.uniform(0, 1)

        for word, cdf in cdf_dict.items():
            if rand <= cdf:
                next_word = word
                break

        completed_sentence += next_word + ' '
        current_word = next_word

    print("Completed sentence:", completed_sentence.strip())


if __name__ == '__main__':
    # Reading the text file which has input words.
    with open('test_word.txt', 'r', encoding="utf8") as file:
        words = file.read().splitlines()
    
    # Iterate over each word from the text file test_words as input.
    # The output will have 10 words, including each input word.
    for inp_sent in words:
        main(inp_sent, len(inp_sent), 10)



Completed sentence: apple a versatile fruit that can be eaten on their
