In [1]:
import PyPDF2
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
import string
from collections import Counter
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModel

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import numpy as np

from transformers import BertTokenizer, BertForQuestionAnswering
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, 'rb') as file:
        pdf_reader = PyPDF2.PdfReader(file)
        num_pages = len(pdf_reader.pages)
        for page_num in range(num_pages):
            page = pdf_reader.pages[page_num]
            text += page.extract_text()
    return text

In [3]:
def preprocess_text(text):
    # Convert text to lowercase
    text = text.lower()
    
    # Remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))
    
    # Custom stop word list (excluding important concepts from Bhagavad Gita)
    custom_stop_words = set(stopwords.words('english')) \
        - {'krishna', 'karma', 'dharma', 'arjuna', 'bhagavad'}  # Add more as needed

    # Tokenize the text
    tokens = word_tokenize(text)
    
    # Remove stop words and perform stemming
    stemmer = PorterStemmer()
    filtered_tokens = [stemmer.stem(word) for word in tokens if word not in custom_stop_words]
    
    # Join tokens back into a single string
    preprocessed_text = ' '.join(filtered_tokens)
    
    return preprocessed_text


In [4]:
def divide_into_sentences(text):
    # Split text into sentences using NLTK sentence tokenizer
    sentences = nltk.sent_tokenize(text)
    return sentences

In [5]:
def preprocess_input(text):
    # Remove extra whitespaces and special characters
    processed_text = re.sub(r'[^\w\s]', '', text.strip())
    return processed_text.lower()

In [6]:
def identify_questions(sentences):
    # Define a list of keywords
    keywords = ['what', 'who', 'when', 'where', 'why', 'how', 'explain', 'describe']

    # Initialize a list to store potential questions
    potential_questions = []

    # Iterate through each sentence and check for keywords
    for sentence in sentences:
        # Tokenize the sentence to check the first word
        first_word = sentence.split()[0].lower()
        # Check if the first word of the sentence is a keyword
        if first_word in keywords:
            potential_questions.append(sentence)

    return potential_questions


In [7]:
def compute_tfidf_vectors(texts):
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(texts)
    return tfidf_matrix, vectorizer

In [8]:
def weighted_cosine_similarity(query_vector, document_vectors, query_weights):
    # Compute dot product between query vector and document vectors
    dot_products = np.dot(query_vector * query_weights, document_vectors.T)

    # Compute L2 norm of query vector
    query_norm = np.linalg.norm(query_vector * query_weights)

    # Check if document vectors are empty or have zero dimensions
    if document_vectors.ndim > 1 and document_vectors.shape[0] > 0:
        # Compute L2 norm of document vectors
        document_norms = np.linalg.norm(document_vectors, axis=1)
    else:
        # If document vectors are empty, set document norms to zeros
        document_norms = np.zeros(1)

    # Compute cosine similarity scores
    cosine_similarities = dot_products / (query_norm * (document_norms + 1e-9))

    return cosine_similarities

In [9]:
def search_answers(question, preprocessed_text, min_word_count=4, max_answer_length=100, use_dot_product=False, top_k=3):
    # Preprocess the question
    preprocessed_question = preprocess_input(question)

    # Compute TF-IDF vectors
    sentence_list = preprocessed_text.split('.')  # Split preprocessed text into sentences
    tfidf_matrix, vectorizer = compute_tfidf_vectors([preprocessed_question] + sentence_list)
    query_vector = tfidf_matrix[0]
    document_vectors = tfidf_matrix[1:]

    # Check if document vectors are empty or have zero dimensions
    if document_vectors.ndim > 1 and document_vectors.shape[0] > 0:
        # Candidate answers based on word count threshold
        candidate_answers = []
        for i, sentence in enumerate(sentence_list):
            if len(sentence.split()) >= min_word_count:
                if use_dot_product:
                    # Option 1: Use dot product for similarity (without dimensionality reduction)
                    similarity = np.dot(query_vector, document_vectors[i])
                else:
                    # Option 2: No similarity calculation, filter by word count only
                    similarity = 1  # Placeholder value since not used for filtering
                candidate_answers.append((sentence, similarity))
        
        # Return limited answer snippets (sorted by similarity for option 1)
        if use_dot_product:
            candidate_answers.sort(key=lambda x: x[1], reverse=True)  # Sort by descending similarity
        
        # Select top-k answers
        top_answers = candidate_answers[:top_k]
        
        answers = []
        for sentence, _ in top_answers:
            # Limit answer length to max_answer_length words and add ellipsis (...) if truncated
            answer_snippet = sentence.strip()[:max_answer_length] + "..." if len(sentence) > max_answer_length else sentence.strip()
            # Ensure that the answer is limited to a single sentence
            answer_snippet = answer_snippet.split('.')[0]  # Take the first sentence
            answers.append(answer_snippet)
    else:
        answers = ["I couldn't find a direct answer to your question in the Bhagavad Gita."]
    
    return answers


In [10]:
def generate_response(user_input, sentences, preprocessed_text):
    # Preprocess user input
    preprocessed_input = preprocess_input(user_input)

    # Generate response
    if preprocessed_input.lower() == 'exit':
        response = "Thank you for chatting with the Bhagavad Gita Chatbot. Farewell!"
    else:
        # Identify potential questions in user input
        questions = identify_questions(sentences)
        if questions:
            # Attempt to answer the first identified question
            answer = search_answers(questions[0], preprocessed_text)
            response = f"Here's what I found about {questions[0]} in the Bhagavad Gita:\n {answer}"
        else:
            response = "I couldn't find any questions in your input. However, feel free to ask me anything about the Bhagavad Gita!"

    return response


In [11]:
def chatbot():
    print("Welcome to the Bhagavad Gita Chatbot!")
    print("You can ask me anything about the Bhagavad Gita, and I'll do my best to find answers within the text or identify potential questions in your input.")
    print("Type 'exit' to end the conversation.")
    
    pdf_path = 'Bhagavad Gita.pdf'
    text = extract_text_from_pdf(pdf_path)
    preprocessed_text = preprocess_text(text)
    sentences = nltk.sent_tokenize(text)

    while True:
        user_input = input("\nYou: ").strip()

        # Generate response
        response = generate_response(user_input, sentences, preprocessed_text)
        print("\nBhagavad Gita Chatbot:", response)

        # Check if the user wants to exit
        if response.lower() == "thank you for chatting with the bhagavad gita chatbot. farewell!":
            break


In [12]:
# Run the chatbot
if __name__ == "__main__":
    chatbot()

Welcome to the Bhagavad Gita Chatbot!
You can ask me anything about the Bhagavad Gita, and I'll do my best to find answers within the text or identify potential questions in your input.
Type 'exit' to end the conversation.



You:  who is krishna



Bhagavad Gita Chatbot: Here's what I found about Why	it	should	be	so	appealing	to	the	Western	mind
is	an	interesting	question. in the Bhagavad Gita:
 ['free download ebook direct reproduct origin bona fide person approv bless srila prabhupada ebook mad']



You:  a



Bhagavad Gita Chatbot: Here's what I found about Why	it	should	be	so	appealing	to	the	Western	mind
is	an	interesting	question. in the Bhagavad Gita:
 ['free download ebook direct reproduct origin bona fide person approv bless srila prabhupada ebook mad']



You:  a



Bhagavad Gita Chatbot: Here's what I found about Why	it	should	be	so	appealing	to	the	Western	mind
is	an	interesting	question. in the Bhagavad Gita:
 ['free download ebook direct reproduct origin bona fide person approv bless srila prabhupada ebook mad']



You:  



Bhagavad Gita Chatbot: Here's what I found about Why	it	should	be	so	appealing	to	the	Western	mind
is	an	interesting	question. in the Bhagavad Gita:
 ['free download ebook direct reproduct origin bona fide person approv bless srila prabhupada ebook mad']



You:  ss



Bhagavad Gita Chatbot: Here's what I found about Why	it	should	be	so	appealing	to	the	Western	mind
is	an	interesting	question. in the Bhagavad Gita:
 ['free download ebook direct reproduct origin bona fide person approv bless srila prabhupada ebook mad']



You:  



Bhagavad Gita Chatbot: Here's what I found about Why	it	should	be	so	appealing	to	the	Western	mind
is	an	interesting	question. in the Bhagavad Gita:
 ['free download ebook direct reproduct origin bona fide person approv bless srila prabhupada ebook mad']



You:  



Bhagavad Gita Chatbot: Here's what I found about Why	it	should	be	so	appealing	to	the	Western	mind
is	an	interesting	question. in the Bhagavad Gita:
 ['free download ebook direct reproduct origin bona fide person approv bless srila prabhupada ebook mad']



You:  



Bhagavad Gita Chatbot: Here's what I found about Why	it	should	be	so	appealing	to	the	Western	mind
is	an	interesting	question. in the Bhagavad Gita:
 ['free download ebook direct reproduct origin bona fide person approv bless srila prabhupada ebook mad']



You:  



Bhagavad Gita Chatbot: Here's what I found about Why	it	should	be	so	appealing	to	the	Western	mind
is	an	interesting	question. in the Bhagavad Gita:
 ['free download ebook direct reproduct origin bona fide person approv bless srila prabhupada ebook mad']



You:  



Bhagavad Gita Chatbot: Here's what I found about Why	it	should	be	so	appealing	to	the	Western	mind
is	an	interesting	question. in the Bhagavad Gita:
 ['free download ebook direct reproduct origin bona fide person approv bless srila prabhupada ebook mad']



You:  



Bhagavad Gita Chatbot: Here's what I found about Why	it	should	be	so	appealing	to	the	Western	mind
is	an	interesting	question. in the Bhagavad Gita:
 ['free download ebook direct reproduct origin bona fide person approv bless srila prabhupada ebook mad']



You:  

You:  



Bhagavad Gita Chatbot: Here's what I found about Why	it	should	be	so	appealing	to	the	Western	mind
is	an	interesting	question. in the Bhagavad Gita:
 ['free download ebook direct reproduct origin bona fide person approv bless srila prabhupada ebook mad']

Bhagavad Gita Chatbot: Here's what I found about Why	it	should	be	so	appealing	to	the	Western	mind
is	an	interesting	question. in the Bhagavad Gita:
 ['free download ebook direct reproduct origin bona fide person approv bless srila prabhupada ebook mad']



You:  s



Bhagavad Gita Chatbot: Here's what I found about Why	it	should	be	so	appealing	to	the	Western	mind
is	an	interesting	question. in the Bhagavad Gita:
 ['free download ebook direct reproduct origin bona fide person approv bless srila prabhupada ebook mad']



You:  



Bhagavad Gita Chatbot: Here's what I found about Why	it	should	be	so	appealing	to	the	Western	mind
is	an	interesting	question. in the Bhagavad Gita:
 ['free download ebook direct reproduct origin bona fide person approv bless srila prabhupada ebook mad']



You:  



Bhagavad Gita Chatbot: Here's what I found about Why	it	should	be	so	appealing	to	the	Western	mind
is	an	interesting	question. in the Bhagavad Gita:
 ['free download ebook direct reproduct origin bona fide person approv bless srila prabhupada ebook mad']



You:  



Bhagavad Gita Chatbot: Here's what I found about Why	it	should	be	so	appealing	to	the	Western	mind
is	an	interesting	question. in the Bhagavad Gita:
 ['free download ebook direct reproduct origin bona fide person approv bless srila prabhupada ebook mad']



You:  



Bhagavad Gita Chatbot: Here's what I found about Why	it	should	be	so	appealing	to	the	Western	mind
is	an	interesting	question. in the Bhagavad Gita:
 ['free download ebook direct reproduct origin bona fide person approv bless srila prabhupada ebook mad']



You:  



Bhagavad Gita Chatbot: Here's what I found about Why	it	should	be	so	appealing	to	the	Western	mind
is	an	interesting	question. in the Bhagavad Gita:
 ['free download ebook direct reproduct origin bona fide person approv bless srila prabhupada ebook mad']



You:  



Bhagavad Gita Chatbot: Here's what I found about Why	it	should	be	so	appealing	to	the	Western	mind
is	an	interesting	question. in the Bhagavad Gita:
 ['free download ebook direct reproduct origin bona fide person approv bless srila prabhupada ebook mad']



You:  



Bhagavad Gita Chatbot: Here's what I found about Why	it	should	be	so	appealing	to	the	Western	mind
is	an	interesting	question. in the Bhagavad Gita:
 ['free download ebook direct reproduct origin bona fide person approv bless srila prabhupada ebook mad']



KeyboardInterrupt

