In [None]:
!pip -q install contractions emoji pyspellchecker

In [None]:
from sklearn.preprocessing import LabelEncoder
from tensorflow import keras
import json
import random
import pandas as pd
import numpy as np

In [None]:
import nltk
from nltk.data import find
from nltk import download
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk import pos_tag
from nltk.tokenize.treebank import TreebankWordDetokenizer
import contractions
import unicodedata
from bs4 import BeautifulSoup
import emoji
import re
from spellchecker import SpellChecker
import unittest

class Preprocessor:

    resources = [
        'tokenizers/punkt',
        'corpora/stopwords',
        'corpora/wordnet',
        'taggers/averaged_perceptron_tagger'
    ]

    def __init__(self) -> None:
        for resource in self.resources:
            try:
                find(resource)
                print(f"{resource} is already downloaded.")
            except LookupError:
                print(f"{resource} not found. Downloading...")
                download(resource.split('/')[1])

        # Stopword removal
        self.stop_words = set(stopwords.words('english'))
        # Initialize the WordNet lemmatizer
        self.lemmatizer = WordNetLemmatizer()

    def lower_sentence(self, sentence: str) -> str:
        '''
        Lowercase the sentence.
        :param data: The sentence to lowercase.
        :return: The lowercased sentence
        :rtype: str
        '''
        return sentence.lower()

    def remove_emails(self, sentence: str) -> str:
        '''
        Remove emails from the sentence.
        :param sentence: The sentence to remove emails from.
        :type sentence: str
        :return: The sentence without emails.
        :rtype: str
        '''
        return re.sub(r"\S*@\S*\s?", "", sentence)

    def remove_nonascii_diacritic(self, sentence: str) -> str:
        '''

        Remove diacritics from the sentence.

        :param sentence: The sentence to remove diacritics from.

        :type sentence: str

        :return: The sentence without diacritics.

        :rtype: str
        '''

        return unicodedata.normalize("NFKD", sentence).encode("ascii", "ignore").decode("utf-8", "ignore")

    def clean_html(self, sentence: str) -> str:
        '''
        Remove HTML tags from the sentence.
        :param sentence: The sentence to remove HTML tags from.
        :type sentence: str
        :return: The sentence without HTML tags.
        :rtype: str
        '''
        return BeautifulSoup(sentence, "html.parser").get_text()

    def replace_repeated_chars(self, sentence: str) -> str:
        '''
        Replace repeated characters in the sentence.
        :param sentence: The sentence to replace repeated characters in.
        :type sentence: str
        :return: The sentence with replaced repeated characters.
        :rtype: str
        '''
        # Replace consecutive occurrences of ',', '!', '.', and '?' with a single occurrence
        return re.sub(r'([,!?.])\1+', r'\1', sentence)

    def translate_emojis_to_text(self, sentence: str) -> str:
        '''
        Translate emojis in the sentence to text.
        :param sentence: The sentence to translate emojis to text.
        :type sentence: str
        :return: The sentence with translated emojis to text.
        :rtype: str
        '''
        line = ''
        for char in sentence:
            if emoji.is_emoji(char):
                emoji_text = emoji.demojize(char)[1:-1].replace('_', ' ')
                line += emoji_text
            else:
                line += char

        return line

    def expand_sentence(self, sentence: str) -> str:
        '''
        Expand the contractions in the sentence.
        :param sentence: The sentence to expand contractions in.
        :type sentence: str
        :return: The sentence with expanded contractions.
        :rtype: str
        '''
        return contractions.fix(sentence)

    def remove_url(self, sentence: str) -> str:
        '''
        Remove URLs from the sentence.
        :param sentence: The sentence to remove URLs from.
        :type sentence: str
        :return: The sentence without URLs.
        :rtype: str
        '''
        return re.sub("((http\://|https\://|ftp\://)|(www.))+(([a-zA-Z0-9\.-]+\.[a-zA-Z]{2,4})|([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}))(/[a-zA-Z0-9%:/-_\?\.'~]*)?", '', sentence)

    def remove_possessives(self, sentence: str) -> str:
        '''
        Strip possessives from the sentence.
        :param sentence: The sentence to strip possessives from.
        :type sentence: str
        :return: The sentence without possessives.
        :rtype: str
        '''
        # Stripping the possessives
        sentence = sentence.replace("'s", '')
        sentence = sentence.replace('’s', '')
        sentence = sentence.replace('s’', 's')
        sentence = sentence.replace("s'", 's')
        return sentence

    def remove_extra_space(self, sentence: str) -> str:
        '''
        Remove extra spaces from the sentence.
        :param sentence: The sentence to remove extra spaces from.
        :type sentence: str
        :return: The sentence without extra spaces.
        :rtype: str
        '''
        return re.sub(r'\s+', ' ', sentence).strip()


    def check_sentence_spelling(self, sentence: list[str]) -> list[str]:
        '''
        Check the spelling of the words in the sentence.
        :param sentence: The sentence to check the spelling of.
        :type sentence: list
        :return: The sentence with corrected spelling.
        :rtype: list
        '''
        spell = SpellChecker()
        corrected_sentence = []
        for word in sentence:
            if word != '':
                correction = spell.correction(word)
                if correction is not None:
                    corrected_sentence.append(correction)
                else:
                    corrected_sentence.append(word)
            else:
                corrected_sentence.append('')
        return corrected_sentence

    def tokenize_sentence(self, sentence: str) -> list[str]:
        '''
        Tokenize the sentence.
        :param sentence: The sentence to tokenize.
        :type sentence: str
        :return: The tokenized sentence.
        :rtype: str
        '''
        return nltk.word_tokenize(sentence)


    def remove_stop_words(self, sentence: list[str]) -> list[str]:
        '''
        Remove stop words from the sentence.
        :param sentence: The sentence to remove stop words from.
        :type sentence: list[str]
        :return: The sentence without stop words.
        :rtype: list[str]
        '''
        return [word for word in sentence if word not in self.stop_words]

    def lemm_sentence(self, sentence: list[str]) -> list[str]:
        '''
        Lemmatize the sentence.
        :param sentence: The sentence to lemmatize.
        :type sentence: list[str]
        :return: The lemmatized sentence.
        :rtype: list[str]
        '''
        # Perform POS tagging
        pos_tags = pos_tag(sentence)
        # Lemmatize each word based on its POS tag
        lemmatized_words = []
        for word, pos in pos_tags:
            # Map Penn Treebank POS tags to WordNet POS tags
            if pos.startswith('N'):  # Nouns
                pos = 'n'
            elif pos.startswith('V'):  # Verbs
                pos = 'v'
            elif pos.startswith('J'):  # Adjectives
                pos = 'a'
            elif pos.startswith('R'):  # Adverbs
                pos = 'r'
            else:
                pos = 'n'  # Default to noun if POS tag not found

            # Lemmatize the word using the appropriate POS tag
            lemma = self.lemmatizer.lemmatize(word, pos=pos)
            lemmatized_words.append(lemma)
        return lemmatized_words

    def detokenize_sentence(self, sentence: list[str]) -> str:
        '''
        Detokenize the sentence.
        :param sentence: The sentence to detokenize.
        :type sentence: list[str]
        :return: The detokenized sentence.
        :rtype: str
        '''
        return TreebankWordDetokenizer().detokenize(sentence)

    def remove_emojis(self,text:str) -> str:
        '''
        Removes specific patterns like (😃,🚀) and emojis from the given text.
        :type text: list[str]
        :return: Text without emojis.
        :rtype: str
        '''
        emoji_pattern = re.compile("["
                u"\U0001F600-\U0001F64F"  # emoticons
                u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                u"\U0001F680-\U0001F6FF"  # transport & map symbols
                u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                u"\U00002500-\U00002BEF"  # chinese char
                u"\U00002700-\U000027BF"  # Dingbats
                u"\U00002702-\U000027B0"
                u"\U000024C2-\U0001F251"
                u"\U0001f926-\U0001f937"
                u"\U00010000-\U0010ffff"
                u"\u2640-\u2642"
                u"\u2600-\u2B55"
                u"\u200d"
                u"\u23cf"
                u"\u23e9"
                u"\u231a"
                u"\u3030"
                "]+", flags=re.UNICODE)
        text = emoji_pattern.sub(r'', text)

        return text

    def remove_emoticons(self,text:str) -> str:
        '''
        Removes specific patterns like[:) | :(] and emoticons from the given text.
        :type text: list[str]
        :return: Text without emoticons.
        :rtype: str
        '''
        # Define a regular expression pattern to match emoticons
        emoticon_pattern = re.compile(r':(\)+)|:-(\))+|;(\))+|:-(D)+|:(D)+|;-(D)+|x(D)+|X(D)+|:-(\()+|:(\()+|:-(/)+|:(/)+|:-(\))+||:(\))+||:-(O)+|:(O)+|:-(\*)+|:(\*)+|<(3)+|:(P)+|:-(P)+|;(P)+|;-(P)+|:(S)+|>:(O)+|8(\))+|B-(\))+|O:(\))+', flags=re.IGNORECASE)
        # Remove emoticons using the pattern
        return emoticon_pattern.sub('', text)

    def remove_non_alphabetic(self,text:str) -> str:
        '''
        Removes non-alphabetic characters from the given text.
        :type text: str
        :return: Text without non-alphabetic characters.
        :rtype: str
        '''
        cleaned_text = re.sub(r'\W+', ' ', text)
        return cleaned_text

    def clean(self, line: str, steps: list[str] = None, empty: str ='Normal') -> list[str]:
        '''
        Clean the line and return it as a list of tokens
        :param line: the line to clean
        :type line: str
        :param steps: list of steps to apply
        :type steps: list[str]
        :return: the cleaned line as a list of tokens
        :rtype: list
        '''
        # Default steps to apply if none are specified
        default_steps = [
            'translate_emojis_to_text',
            'lower_sentence',
            'remove_nonascii_diacritic',
            'remove_emails',
            'clean_html',
            'remove_url',
            'replace_repeated_chars',
            'expand_sentence',
            'remove_possessives',
            'remove_extra_space',
            'tokenize_sentence',
            'check_sentence_spelling',
            'remove_stop_words',
            'lemm_sentence'
        ]

        # Use specified steps if provided, otherwise use default steps
        if steps is None:
            steps = default_steps

        # Define the processing functions
        processing_functions = {
            'translate_emojis_to_text': self.translate_emojis_to_text,
            'lower_sentence': self.lower_sentence,
            'remove_nonascii_diacritic': self.remove_nonascii_diacritic,
            'remove_emails': self.remove_emails,
            'clean_html': self.clean_html,
            'remove_url': self.remove_url,
            'replace_repeated_chars': self.replace_repeated_chars,
            'expand_sentence': self.expand_sentence,
            'remove_possessives': self.remove_possessives,
            'remove_extra_space': self.remove_extra_space,
            'tokenize_sentence': self.tokenize_sentence,
            'check_sentence_spelling': self.check_sentence_spelling,
            'remove_stop_words': self.remove_stop_words,
            'lemm_sentence': self.lemm_sentence,
            'detokenize_sentence': self.detokenize_sentence,
            'remove_emojis': self.remove_emojis,
            'remove_emoticons': self.remove_emoticons,
            'remove_non_alphabetic': self.remove_non_alphabetic
        }

        # Apply the specified steps
        for step in steps:
            if step in processing_functions:
                line = processing_functions[step](line)

        # Ensure tokenize_sentence was applied
        if isinstance(line, str):
            line = [line]

        if len(line) == 0:
            return [empty]

        return line

def test() -> None:
    class TestPreprocessor(unittest.TestCase):

        def setUp(self):
            self.preprocessor = Preprocessor()

        def test_lower_sentence(self):
            self.assertEqual(self.preprocessor.lower_sentence("HELLO WORLD"), "hello world")

        def test_remove_emails(self):
            self.assertEqual(self.preprocessor.remove_emails("Contact me at test@example.com"), "Contact me at ")

        def test_remove_nonascii_diacritic(self):
            self.assertEqual(self.preprocessor.remove_nonascii_diacritic("café"), "cafe")

        def test_clean_html(self):
            self.assertEqual(self.preprocessor.clean_html("<p>Hello, world!</p>"), "Hello, world!")

        def test_replace_repeated_chars(self):
            self.assertEqual(self.preprocessor.replace_repeated_chars("Heeellooo!!!!"), "Heeellooo!")

        def test_translate_emojis_to_text(self):
            self.assertEqual(self.preprocessor.translate_emojis_to_text("Hello 😊"), "Hello smiling face with smiling eyes")

        def test_expand_sentence(self):
            self.assertEqual(self.preprocessor.expand_sentence("can't won't"), "cannot will not")

        def test_remove_url(self):
            self.assertEqual(self.preprocessor.remove_url("Check http://example.com"), "Check ")

        def test_remove_possessives(self):
            self.assertEqual(self.preprocessor.remove_possessives("John's car"), "John car")

        def test_remove_extra_space(self):
            self.assertEqual(self.preprocessor.remove_extra_space("This  is   a test"), "This is a test")

        def test_tokenize_sentence(self):
            self.assertEqual(self.preprocessor.tokenize_sentence("This is a test."), ['This', 'is', 'a', 'test', '.'])

        def test_check_sentence_spelling(self):
            self.assertEqual(self.preprocessor.check_sentence_spelling(['This', 'is', 'a', 'tst']), ['This', 'is', 'a', 'test'])

        def test_remove_stop_words(self):
            self.assertEqual(self.preprocessor.remove_stop_words(['This', 'is', 'a', 'test']), ['This', 'test'])

        def test_lemm_sentence(self):
            self.assertEqual(self.preprocessor.lemm_sentence(['running', 'jumps', 'easily']), ['run', 'jump', 'easily'])

        def test_clean_with_default_steps(self):
            test_line = "This is a test line with an email@example.com and a link http://example.com 😊"
            cleaned_line = self.preprocessor.clean(test_line)
            self.assertEqual(cleaned_line, ['test', 'line', 'link', 'smile', 'face', 'smile', 'eye'])

        def test_clean_with_custom_steps(self):
            test_line = "This is a test line with an email@example.com and a url http://example.com"
            steps = ['lower_sentence', 'remove_emails', 'remove_url', 'tokenize_sentence']
            cleaned_line = self.preprocessor.clean(test_line, steps=steps)
            self.assertEqual(cleaned_line, ['this', 'is', 'a', 'test', 'line', 'with', 'an', 'and', 'a', 'url'])

    # Instantiate the test class and run it
    suite = unittest.TestLoader().loadTestsFromTestCase(TestPreprocessor)
    unittest.TextTestRunner().run(suite)


if __name__ == '__main__':
    print('test 1: Running a simple test case...')
    preprocessor = Preprocessor()
    line = "This is a sample sentence."
    cleaned_line = preprocessor.clean(line)
    print(cleaned_line)
    print('test 2: Running The Unit test...')
    # Call the test function to run the tests
    test()
    print('Exit...')

test 1: Running a simple test case...
tokenizers/punkt is already downloaded.
corpora/stopwords is already downloaded.
corpora/wordnet not found. Downloading...
taggers/averaged_perceptron_tagger is already downloaded.


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


['sample', 'sentence', '.']
test 2: Running The Unit test...
tokenizers/punkt is already downloaded.
corpora/stopwords is already downloaded.
corpora/wordnet not found. Downloading...
taggers/averaged_perceptron_tagger is already downloaded.


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
.[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
.[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
.[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
  return BeautifulSoup(sentence, "html.parser").get_text()


tokenizers/punkt is already downloaded.
corpora/stopwords is already downloaded.
corpora/wordnet not found. Downloading...
taggers/averaged_perceptron_tagger is already downloaded.
tokenizers/punkt is already downloaded.
corpora/stopwords is already downloaded.
corpora/wordnet not found. Downloading...
taggers/averaged_perceptron_tagger is already downloaded.
tokenizers/punkt is already downloaded.
corpora/stopwords is already downloaded.
corpora/wordnet not found. Downloading...
taggers/averaged_perceptron_tagger is already downloaded.


.[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
.[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
.[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
.[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
.[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
.[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
.[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
.[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
.[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_dat

tokenizers/punkt is already downloaded.
corpora/stopwords is already downloaded.
corpora/wordnet not found. Downloading...
taggers/averaged_perceptron_tagger is already downloaded.
tokenizers/punkt is already downloaded.
corpora/stopwords is already downloaded.
corpora/wordnet not found. Downloading...
taggers/averaged_perceptron_tagger is already downloaded.
tokenizers/punkt is already downloaded.
corpora/stopwords is already downloaded.
corpora/wordnet not found. Downloading...
taggers/averaged_perceptron_tagger is already downloaded.
tokenizers/punkt is already downloaded.
corpora/stopwords is already downloaded.
corpora/wordnet not found. Downloading...
taggers/averaged_perceptron_tagger is already downloaded.
tokenizers/punkt is already downloaded.
corpora/stopwords is already downloaded.
corpora/wordnet not found. Downloading...
taggers/averaged_perceptron_tagger is already downloaded.
tokenizers/punkt is already downloaded.
corpora/stopwords is already downloaded.
corpora/wordne

In [None]:
with open('/content/input.json', 'r') as f:
    data = json.load(f)

df = pd.DataFrame(data['intents'])

In [None]:
steps = [
    'translate_emojis_to_text',
    'lower_sentence',
    'remove_nonascii_diacritic',
    'remove_emails',
    'clean_html',
    'remove_url',
    'replace_repeated_chars',
    'expand_sentence',
    'remove_possessives',
    'remove_extra_space',
    # 'tokenize_sentence',
    # 'remove_stop_words',
    # 'detokenize_sentence'
]

preprocessor = Preprocessor()

tokenizers/punkt is already downloaded.
corpora/stopwords is already downloaded.
corpora/wordnet not found. Downloading...
taggers/averaged_perceptron_tagger is already downloaded.


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [None]:
def map_tag_pattern(df, tag_col, text_col, res_col):
  train_data = []
  train_labels = []

  for index, item in df.iterrows():
      ptrns = item[text_col]
      rspns = item[res_col]
      for j in range(len(ptrns)):
          cleaned_line = preprocessor.clean(ptrns[j], steps, '')[0]
          train_data.append(cleaned_line)
          cleaned_label = preprocessor.clean(random.choice(rspns), steps, '')[0]
          train_labels.append(cleaned_label)

  return train_data, train_labels


train_data, train_labels = map_tag_pattern(df, "tag", "patterns", "responses")

In [None]:
print(len(train_data), len(train_labels))

266 266


In [None]:
# for item in zip(train_data, train_labels):
#   print(item)

('hi', 'hi there. how are you feeling today?')
('hey', 'hello there. tell me how are you feeling today?')
('is anyone there?', 'hi there. what brings you here today?')
('hi there', 'hi there. how are you feeling today?')
('hello', 'hi there. what brings you here today?')
('hey there', 'hi there. how are you feeling today?')
('how do you', 'hello there. glad to see you are back. what is going on in your world right now?')
('hola', 'hello there. glad to see you are back. what is going on in your world right now?')
('bonjour', 'hi there. what brings you here today?')
('konnichiwa', 'hi there. how are you feeling today?')
('guten tag', 'hello there. tell me how are you feeling today?')
('ola', 'great to see you. how do you feel currently?')
('how are you?', 'hi, good thank you, how are you? please tell me your genisys user')
('hi how are you?', 'hi, how are you? i am great thanks! please tell me your genisys user')
('hello how are you?', 'hi, i am great, how are you? please tell me your ge

In [None]:
# Encoding the labels using LabelEncoder
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(train_labels)

# Tokenizing the training data
tokenizer = keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(train_data)
train_sequences = tokenizer.texts_to_sequences(train_data)
train_sequences = keras.preprocessing.sequence.pad_sequences(train_sequences)

In [None]:
# Defining the Sequential model
model = keras.models.Sequential()

# Adding an Embedding layer
model.add(keras.layers.Embedding(input_dim=len(tokenizer.word_index) + 1,
                                 output_dim=100,
                                 input_length=train_sequences.shape[1]))

# Adding a Flatten layer
model.add(keras.layers.Flatten())

# Adding a Dense layer with ReLU activation
model.add(keras.layers.Dense(64, activation='relu'))

# Adding the output layer with softmax activation
model.add(keras.layers.Dense(len(np.unique(encoded_labels)), activation='softmax'))

# Compiling the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Training the model
model.fit(train_sequences, encoded_labels, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x78b6b3142bc0>

In [None]:
# Function to generate response based on the input text
def generate_response(text):
    # Tokenizing and padding the input text
    sequence = tokenizer.texts_to_sequences([text])
    sequence = keras.preprocessing.sequence.pad_sequences(sequence, maxlen=train_sequences.shape[1])

    # Making a prediction
    prediction = model.predict(sequence)

    # Getting the label with the highest predicted probability
    predicted_label = np.argmax(prediction)

    # Decoding the predicted label
    response = label_encoder.inverse_transform([predicted_label])[0]

    return response

In [None]:
# Running an interactive loop for user input
while True:
    user_input = str(input("Input: (press 'q' to quit) "))

    if text.lower() == "q":
        print("Response: Exiting.....")
        break

    # Assuming `preprocessor.clean` is a predefined function to clean the user input
    cleaned_input = preprocessor.clean(user_input, steps, '')[0]

    # Generating and printing the response
    response = generate_response(cleaned_input)
    print("Response:", response)

Enter a message: hello
ChatBot:  hi there. what brings you here today?
Enter a message: how is it going
ChatBot:  knock knock.
Enter a message: how are you
ChatBot:  hi, good thank you, how are you? please tell me your genisys user
Enter a message: good morning
ChatBot:  good morning. i hope you had a good night sleep. how are you feeling today?
Enter a message: good night
ChatBot:  good night. sweet dreams.
Enter a message: i'm feeling sad
ChatBot:  fine, sorry to disturb you
Enter a message: what is your name
ChatBot:  call me pandora
Enter a message: thanks hany
ChatBot:  any time!


KeyboardInterrupt: Interrupted by user