# Needed imports

In [1]:
import nltk
import os
import random

In [4]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\abdal\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

# Data Preprocessing

In [30]:
dataset_path = "./data"
data_text = ""
files = os.listdir(dataset_path)
for file_index in range(300):
    filename = files[file_index]
    with open(os.path.join(dataset_path, filename), 'r', encoding='utf-8') as file:
        data_text += file.read()

data_text = data_text.replace('\n', ' ').replace('\r', ' ') # Replace new lines and carriage returns with spaces
data_text = ' '.join(data_text.split()) # Replace multiple whitespaces with a single space
data_text = data_text.lower() # Convert all text to lowercase
tokens = nltk.word_tokenize(data_text)
print("Sample of size: ", len(tokens), "is used")

Sample of size:  232465 is used


In [22]:
trigrams = {}
for i in range(len(tokens) - 2):
    trigram = (tokens[i], tokens[i+1], tokens[i+2])
    trigrams[trigram] = trigrams.get(trigram, 0) + 1

# Sort the dictionary based on the frequency count of each trigram
trigrams = {k: v for k, v in sorted(trigrams.items(), key=lambda item: item[1], reverse=True)}

# Define a function to suggest the next word based on the last two words of the input sentence
def suggest_next_word(sentence):
    words = nltk.word_tokenize(sentence.lower())
    if len(words) == 0:
        candidates = []
    elif len(words) == 1:
        candidates = [k[2] for k in trigrams.keys() if k[:1] == (words[0],)]
    else:
        last_two_words = tuple(words[-2:])
        candidates = [k[2] for k in trigrams.keys() if k[:2] == last_two_words]
    return candidates

# GUI

In [27]:
import tkinter as tk
root = tk.Tk()
root.title("Auto-filling model using trigram")

# create label and entry widgets
tk.Label(root, text="Enter text:").grid(row=0, column=0, padx=5, pady=5)
entry = tk.Entry(root, width=50)
entry.grid(row=0, column=1, padx=5, pady=5)

# create suggestions list widget
tk.Label(root, text="Suggestions:").grid(row=1, column=0, padx=5, pady=5)
listbox = tk.Listbox(root, height=5, width=50)
listbox.grid(row=1, column=1, padx=5, pady=5)

# define function to update suggestions list
def update_suggestions(event):
    if event.keysym == "Return":
        sentence = entry.get()
        suggestions = suggest_next_word(sentence)[:5]
        listbox.delete(0, tk.END)
        if(not suggestions):
            listbox.insert(tk.END, "--")
        for suggestion in suggestions:
            listbox.insert(tk.END, suggestion)

# bind entry widget to update_suggestions function
entry.bind("<KeyRelease>", update_suggestions)

root.mainloop()