In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import regex as re

In [2]:
def file_to_sentence_list(file_path):
    """Read a file and split its content into sentences."""
    try:
        # Open the file with UTF-8 encoding
        with open(file_path, 'r', encoding='utf-8') as file:
            text = file.read()

        # Splitting the text into sentences using delimiters like '.', '?', and '!'
        sentences = [sentence.strip() for sentence in re.split(
            r'(?<=[.!?])\s+', text) if sentence.strip()]

        return sentences
    except FileNotFoundError:
        print(f"Error: The file '{file_path}' does not exist.")
        return []
    except UnicodeDecodeError as e:
        print(f"Encoding error: {e}")
        return []

# File path
file_path = 'pizza.txt'

# Process text data
text_data = file_to_sentence_list(file_path)

if not text_data:
    print("No valid text data to process.")
else:
    # Tokenize the text data
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(text_data)
    total_words = len(tokenizer.word_index) + 1

    # Create input sequences
    input_sequences = []
    for line in text_data:
        token_list = tokenizer.texts_to_sequences([line])[0]
        for i in range(1, len(token_list)):
            n_gram_sequence = token_list[:i + 1]
            input_sequences.append(n_gram_sequence)

    # Pad sequences and split into predictors and label
    max_sequence_len = max([len(seq) for seq in input_sequences])
    input_sequences = np.array(pad_sequences(
        input_sequences, maxlen=max_sequence_len, padding='pre'))
    X, y = input_sequences[:, :-1], input_sequences[:, -1]

    # Convert target data to one-hot encoding
    y = tf.keras.utils.to_categorical(y, num_classes=total_words)

    print("Text data successfully processed!")


Text data successfully processed!


In [None]:
# Define the model  (USE THIS CELL ONLY ONCE TO CREATE AND SAVE MODEL, AFTER THAT COMMENT OUT THIS CELL)
model = Sequential()
model.add(Embedding(total_words, 10,
					input_length=max_sequence_len-1))
model.add(LSTM(128))
model.add(Dense(total_words, activation='softmax'))
model.compile(loss='categorical_crossentropy',
			optimizer='adam', metrics=['accuracy'])


# Train the model
model.fit(X, y, epochs=100, verbose=1)

# Save the model
model.save("Next_Word_Prediction_with_Deep_Learning_in_NLP.keras")

In [3]:
# Load the saved model
model = load_model('Next_Word_Prediction_with_Deep_Learning_in_NLP.keras')

# Parameters
max_sequence_len = model.input_shape[1] + 1  # Calculate based on the model's input shape

# Generate next word predictions
seed_text = "how are"
next_words = 1

for _ in range(next_words):
    # Convert seed text to sequences using the existing tokenizer
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    # Pad the sequence to match the model's expected input length
    token_list = pad_sequences([token_list], maxlen=max_sequence_len - 1, padding='pre')
    # Predict the next word probabilities
    predicted_probs = model.predict(token_list, verbose=0)
    # Get the word with the highest probability
    predicted_word = tokenizer.index_word.get(np.argmax(predicted_probs), '')
    # Append the predicted word to the seed text
    seed_text += " " + predicted_word

print(predicted_word)
print("Next predicted words:", seed_text)


you
Next predicted words: how are you


## Text editor with word prediction integrated 

In [4]:
import tkinter as tk
from tkinter import font
from tkinter import filedialog, messagebox
from docx import Document
from docx.shared import Pt
import numpy as np

In [7]:
# Load the saved model and tokenizer
model = load_model('Next_Word_Prediction_with_Deep_Learning_in_NLP.keras')

# Parameters
max_sequence_len = model.input_shape[1] + 1  # Calculate based on the model's input shape

# Placeholder for the suggestion word
suggested_word = "example"

def toggle_formatting(tag, font_style=None):
    """Toggle formatting like bold, italic, or underline on selected text."""
    try:
        start_index = text.index("sel.first")
        end_index = text.index("sel.last")

        if tag in text.tag_names("sel.first"):
            text.tag_remove(tag, "sel.first", "sel.last")
        else:
            text.tag_add(tag, "sel.first", "sel.last")
            text.tag_configure(tag, font=(current_font.get(), 12, font_style))
    except tk.TclError:
        messagebox.showwarning("Warning", "No text selected to apply formatting!")

def change_font(event=None):
    """Change font style for the selected text."""
    selected_font = current_font.get()
    text.config(font=(selected_font, 12))

def toggle_case(case_type):
    """Toggle between uppercase and lowercase for selected text."""
    try:
        start_index = text.index("sel.first")
        end_index = text.index("sel.last")
        selected_text = text.get("sel.first", "sel.last")
        new_text = (
            selected_text.upper() if case_type == "uppercase" else selected_text.lower()
        )
        text.delete("sel.first", "sel.last")
        text.insert(start_index, new_text)
    except tk.TclError:
        messagebox.showwarning("Warning", "No text selected to toggle case!")

def save_as_docx():
    """Save the content to a DOCX file with proper formatting."""
    file_name = file_name_entry.get()
    if not file_name:
        messagebox.showwarning("Warning", "Please enter a file name before saving!")
        return

    try:
        file_path = filedialog.asksaveasfilename(
            defaultextension=".docx", initialfile=file_name, filetypes=[("Word Files", "*.docx")]
        )
        if file_path:
            doc = Document()

            # Get the selected font from the current font variable
            selected_font = current_font.get()

            # Add the text content to the DOCX file with formatting
            for line_index, line in enumerate(text.get("1.0", "end").splitlines()):
                paragraph = doc.add_paragraph()

                tags = text.tag_names(f"{line_index + 1}.0")
                run = paragraph.add_run(line)

                # Apply font styling based on selected tags
                if "bold" in tags:
                    run.bold = True
                if "italic" in tags:
                    run.italic = True
                if "underline" in tags:
                    run.underline = True

                run.font.name = selected_font
                run.font.size = Pt(12)  # Set the font size to 12pt

            doc.save(file_path)
            messagebox.showinfo("Success", "File saved successfully as DOCX!")
    except Exception as e:
        messagebox.showerror("Error", f"Could not save file: {e}")

def toggle_theme():
    """Toggle between light and dark mode."""
    if theme_button.config('text')[-1] == 'Dark Mode':
        text.config(bg="white", fg="black", insertbackground="black")
        root.config(bg="white")
        toolbar.config(bg="white")
        for widget in toolbar.winfo_children():
            widget.config(bg="white", fg="black")
        theme_button.config(text="Light Mode")
    else:
        text.config(bg="black", fg="white", insertbackground="white")
        root.config(bg="black")
        toolbar.config(bg="black")
        for widget in toolbar.winfo_children():
            widget.config(bg="black", fg="white")
        theme_button.config(text="Dark Mode")

def get_suggestion(last_words):
    """Predict the next word based on the last few words."""
    global suggested_word
    # Convert the last words to sequences
    token_list = tokenizer.texts_to_sequences([" ".join(last_words)])[0]
    token_list = pad_sequences([token_list], maxlen=max_sequence_len - 1, padding='pre')
    
    # Predict the next word
    predicted_probs = model.predict(token_list, verbose=0)
    suggested_word = tokenizer.index_word.get(np.argmax(predicted_probs), '')
    
    # Update the suggestion label with the predicted word
    suggestion_label.config(text=f"Suggested word (Press Tab): {suggested_word}")

def update_last_words(event=None):
    """Fetch the last five words and generate a word suggestion."""
    current_text = text.get("1.0", "end").strip()
    words = current_text.split()
    last_five_words = words[-5:] if len(words) > 5 else words
    if last_five_words:
        get_suggestion(last_five_words)

def insert_suggestion(event=None):
    """Insert the suggested word at the current cursor position."""
    text.insert(tk.INSERT, f"{suggested_word} ")

# Create the main application window
root = tk.Tk()
root.title("Enhanced Text Editor")
root.geometry("1000x600")

current_font = tk.StringVar(value="Arial")

toolbar = tk.Frame(root)
toolbar.pack(fill="x", padx=5, pady=5)

font_label = tk.Label(toolbar, text="Font:")
font_label.pack(side="left", padx=5)

font_menu = tk.OptionMenu(toolbar, current_font, *font.families(), command=change_font)
font_menu.pack(side="left", padx=5)

bold_btn = tk.Button(toolbar, text="Bold", command=lambda: toggle_formatting("bold", "bold"))
bold_btn.pack(side="left", padx=5)

italic_btn = tk.Button(toolbar, text="Italic", command=lambda: toggle_formatting("italic", "italic"))
italic_btn.pack(side="left", padx=5)

underline_btn = tk.Button(toolbar, text="Underline", command=lambda: toggle_formatting("underline", "underline"))
underline_btn.pack(side="left", padx=5)

uppercase_btn = tk.Button(toolbar, text="Toggle Case", command=lambda: toggle_case("uppercase"))
uppercase_btn.pack(side="left", padx=5)

file_name_label = tk.Label(toolbar, text="File Name:")
file_name_label.pack(side="right", padx=5)

file_name_entry = tk.Entry(toolbar, width=20)
file_name_entry.pack(side="right", padx=5)

save_docx_btn = tk.Button(toolbar, text="Save as DOCX", command=save_as_docx)
save_docx_btn.pack(side="right", padx=5)

theme_button = tk.Button(toolbar, text="Change Mode", command=toggle_theme)
theme_button.pack(side="right", padx=5)

# Suggestion Label
suggestion_label = tk.Label(root, text=f"Suggested word (Press Tab): {suggested_word}", font=("Arial", 10), fg="blue")
suggestion_label.pack(fill="x")

# Text Area
text = tk.Text(root, wrap="word", font=("Arial", 12))
text.pack(expand=1, fill="both", padx=10, pady=10)

# Bind events
text.bind("<KeyRelease>", update_last_words)
text.bind("<Tab>", insert_suggestion)

root.mainloop()
