Importing Libraries

In [1]:
import spacy
import nltk
import string
import random
import warnings
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

#suppressing warnings
warnings.filterwarnings("ignore")

# Load SpaCy model
nlp = spacy.load('en_core_web_sm')

#downloading necessary nltk packages
#nltk.download('punkt') #Tokenizatiom
#nltk.download('wordnet') #Lemmatization
#nltk.download('omw-1.4') #Open multillingual wordnet


Opening and reading a file

In [2]:
f = open('Output.txt', 'r', errors = 'ignore')
raw_doc = f.read()


Lowercasing

In [3]:
raw_doc = raw_doc.lower()


Tokenization

In [4]:
#Doc into sentences and words
#sentence_tokens = nltk.sent_tokenize(raw_doc)

# Tokenizing the document into sentences
sentence_tokens = list(nlp(raw_doc).sents)

word_tokens = nltk.word_tokenize(raw_doc)


Lemmatization

In [5]:
#Initializing Wordnet lemmatizer
from nltk.stem import WordNetLemmatizer
lemmer = nltk.stem.WordNetLemmatizer()

#function to lemmatize tokens
def LemTokens(sentence_tokens):
    return [lemmer.lemmatize(token) for token in sentence_tokens]

Remove Punctuations

In [6]:
remove_punc_dict = dict((ord(punct), None) for punct in string.punctuation)
print(string.punctuation)

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~


Normalization of text after Lemmatization

In [7]:
def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punc_dict)))

Bot Greeting Configuration

In [8]:
#pre-defined greetings and responses
greet_inputs = ('hello', 'hi', 'yo', 'hey', 'wassum')
greet_responses = ('hi', 'hey!', 'hey there', 'hola! ?como estas?', 'Konnichiwa')

#Function to check if the input sentence is a greeting and generate a random response
def greet(sentence):
    for word in sentence.split():
        if word.lower() in greet_inputs:
            return random.choice(greet_responses)

TFIDF & Cosine-Similarity (term frequency-inverse document frequency)

In [9]:
def response(user_response):
    robo1_response = ''
    sentence_tokens.append(user_response)
    TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english', token_pattern = r'(?u)\b\w\w+\b')
    #tfidf = TfidfVec.fit_transform(sentence_tokens + word_tokens)  # Include word_tokens from output.txt
    tfidf = TfidfVec.fit_transform(sentence_tokens)
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx = vals.argsort()[0][-2]  #Index of the most similar element
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    if req_tfidf == 0 or idx >= len(sentence_tokens):
        robo1_response = robo1_response + "I am sorry, I am unable to understand you"
        return robo1_response
    else:
        robo1_response = robo1_response + sentence_tokens[idx] + word_tokens[idx]
        return robo1_response

# Main execution part
flag = True
print('Bot: Hello, I am your personal chatbot, What can I help you with?')
while flag:
    user_input = input('You: ')
    user_input = user_input.lower()
    if user_input != 'bye':
        if user_input == 'thank you' or user_input == 'thanks':
            flag = False
            print('Bot: You are welcome')
        else:
            if greet(user_input) is not None:
                print('Bot:', greet(user_input))
            else:
                # If the user input is not a greeting, then generate a response using TF-IDF
                word_tokens += nltk.word_tokenize(user_input)
                final_words = list(set(word_tokens))
                bot_response = response(user_input)
                print('Bot:', bot_response[0] if isinstance(bot_response, list) else bot_response)
                sentence_tokens.remove(user_input)
    else:
        flag = False
        print('Bot: Goodbye!!')

# Read questions from output.txt
with open('Output.txt', 'r', errors='ignore') as f:
    questions = f.readlines()

# Process each question and generate responses
for question in questions:
    question = question.strip()  # Remove leading/trailing whitespace
    bot_response = response(question)
    print('Question:', question)
    print('Bot:', bot_response)


Bot: Hello, I am your personal chatbot, What can I help you with?


You:  wassum


Bot: Konnichiwa


You:  what is the wikipedia about?


Bot: natural language processing

    article
    talk

    read
    edit
    view history

tools

from wikipedia, the free encyclopedia
for other uses, see nlp.natural


You:  what can you tell me about nlp?


Bot: book generation
    not an nlp task proper but an extension of natural language generation and other nlp tasks is the creation of full-fledged books.insights


You:  


Bot: I am sorry, I am unable to understand you


You:  end


Bot: only the introduction of hidden markov models, applied to part-of-speech tagging, announced the end of the old rule-based approach.(


You:  thanks


Bot: You are welcome
Question: Natural language processing
Bot: this article is about natural language processing done by computers.language
Question: 
Bot: I am sorry, I am unable to understand you
Question: Article
Bot: this article is about natural language processing done by computers.language
Question: Talk
Bot: natural language processing

    article
    talk

    read
    edit
    view history

tools

from wikipedia, the free encyclopedia
for other uses, see nlp.natural
Question: 
Bot: I am sorry, I am unable to understand you
Question: Read
Bot: natural language processing

    article
    talk

    read
    edit
    view history

tools

from wikipedia, the free encyclopedia
for other uses, see nlp.natural
Question: Edit
Bot: natural language processing

    article
    talk

    read
    edit
    view history

tools

from wikipedia, the free encyclopedia
for other uses, see nlp.natural
Question: View history
Bot: natural language processing

    article
    talk

    read
   

In [1]:
import spacy
import random
import warnings
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Suppressing warnings
warnings.filterwarnings("ignore")

# Load SpaCy model
nlp = spacy.load('en_core_web_sm')

# Reading the document
with open('output.txt', 'r', errors='ignore') as f:
    raw_doc = f.read().lower()

# Tokenizing the document into sentences
sentence_tokens = list(nlp(raw_doc).sents)

# Function to preprocess text
def preprocess_text(text):
    doc = nlp(text.lower())
    tokens = [token.lemma_ for token in doc if not token.is_punct and not token.is_stop]
    return tokens

# Pre-defined greetings and responses
greet_inputs = ('hello', 'hi', 'wassup', 'hey', 'howdy', 'greetings')
greet_responses = ('hi', 'hey!', 'hey there!', 'hola', 'hello!', 'hi there!')

# Function to check if the input sentence is a greeting and generate a random response
def greet(sentence):
    for word in sentence.split():
        if word.lower() in greet_inputs:
            return random.choice(greet_responses)

# Function to generate a response to the user input using TF-IDF and cosine similarity
def response(user_response):
    robo1_response = ''
    sentence_tokens.append(nlp(user_response))
    TfidfVec = TfidfVectorizer(tokenizer=preprocess_text, stop_words='english')
    tfidf = TfidfVec.fit_transform([str(sent) for sent in sentence_tokens])
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx = vals.argsort()[0][-2]  # Index of the most similar element
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    if req_tfidf == 0:
        robo1_response = "I am sorry, I am unable to understand you."
    else:
        robo1_response = str(sentence_tokens[idx])
    sentence_tokens.pop()
    return robo1_response

# Main execution part
def chatbot():
    flag = True
    print('Bot: Hello, I am TAM chatbot. How can I help you?')
    while flag:
        user_response = input('You: ').lower()
        if user_response != 'bye':
            if user_response in ['thank you', 'thanks']:
                flag = False
                print('Bot: You are welcome!')
            else:
                if greet(user_response):
                    print('Bot:', greet(user_response))
                else:
                    bot_response = response(user_response)
                    print('Bot:', bot_response)
        else:
            flag = False
            print('Bot: Goodbye!')

if __name__ == "__main__":
    chatbot()



Bot: Hello, I am TAM chatbot. How can I help you?


You:  Hi


Bot: hello!


You:  What are you trained for?


Bot: document ai
    a document ai platform sits on top of the nlp technology enabling users with no prior experience of artificial intelligence, machine learning or nlp to quickly train a computer to extract the specific data they need from different document types.


You:  on what topics are you confident?


Bot: recognizing textual entailment
    given two text fragments, determine if one being true entails the other, entails the other's negation, or allows the other to be either true or false.[28]

topic segmentation and recognition
    given a chunk of text, separate it into segments each of which is devoted to a topic, and identify the topic of the segment.




You:  end


Bot: 40+ languages; 2018: 60+/100+ languages)
    elimination of symbolic representations (rule-based over supervised towards weakly supervised methods, representation learning and end-to-end systems)


You:  bye


Bot: Goodbye!


In [1]:
'''
import spacy
import nltk
import string
import random
import warnings
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from transformers import pipeline

# Suppressing warnings
warnings.filterwarnings("ignore")

# Load SpaCy model
nlp = spacy.load('en_core_web_sm')

# Initialize the question-answering pipeline with a specific model
qa_pipeline = pipeline('question-answering', model='bert-large-uncased-whole-word-masking-finetuned-squad')

# Initialize the language generation pipeline
gen_pipeline = pipeline('text-generation', model='gpt2')

# Reading the document
with open('Output.txt', 'r', errors='ignore') as f:
    raw_doc = f.read().lower()

# Tokenizing the document into sentences
sentence_tokens = list(nlp(raw_doc).sents)
word_tokens = nltk.word_tokenize(raw_doc)

# Function to preprocess text
def preprocess_text(text):
    doc = nlp(text.lower())
    tokens = [token.lemma_ for token in doc if not token.is_punct and not token.is_stop]
    return tokens

# Pre-defined greetings and responses
greet_inputs = ('hello', 'hi', 'yo', 'hey', 'wassup')
greet_responses = ('hi', 'hey!', 'hey there', 'hola! ¿cómo estás?', 'Konnichiwa')

# Function to check if the input sentence is a greeting and generate a random response
def greet(sentence):
    for word in sentence.split():
        if word.lower() in greet_inputs:
            return random.choice(greet_responses)

class Chatbot:
    def __init__(self):
        self.context = []
        self.greet_inputs = greet_inputs
        self.greet_responses = greet_responses
        self.qa_pipeline = qa_pipeline
        self.gen_pipeline = gen_pipeline
    
    def greet(self, sentence):
        for word in sentence.split():
            if word.lower() in self.greet_inputs:
                return random.choice(self.greet_responses)
    
    def response(self, user_response):
        self.context.append(user_response)
        context_text = " ".join([str(sent) for sent in sentence_tokens])
        response_text = self.generate_response(user_response, context_text)
        self.context.pop()
        return response_text
    
    def generate_response(self, user_response, context):
        response = self.qa_pipeline(question=user_response, context=context)
        if response['score'] > 0.5 and len(response['answer']) > 10:
            return response['answer']
        else:
            # Fallback to text generation model for more detailed responses
            gen_response = self.gen_pipeline(user_response, max_length=50)[0]['generated_text']
            return gen_response
    
    def run(self):
        flag = True
        print('Bot: Hello, I am your personal chatbot. What can I help you with?')
        while flag:
            user_response = input('You: ').lower()
            if user_response != 'bye':
                if user_response in ['thank you', 'thanks']:
                    flag = False
                    print('Bot: You are welcome!')
                else:
                    if self.greet(user_response):
                        print('Bot:', self.greet(user_response))
                    else:
                        bot_response = self.response(user_response)
                        print('Bot:', bot_response)
            else:
                flag = False
                print('Bot: Goodbye!')

if __name__ == "__main__":
    chatbot = Chatbot()
    chatbot.run()

# Read questions from output.txt and process them
with open('Output.txt', 'r', errors='ignore') as f:
    questions = f.readlines()

# Process each question and generate responses
for question in questions:
    question = question.strip()  # Remove leading/trailing whitespace
    bot_response = chatbot.response(question)
    print('Question:', question)
    print('Bot:', bot_response)

'''

