In [30]:
import spacy
import wikipedia
import random
# Load spaCy's
nlp = spacy.load("en_core_web_sm")

In [31]:
# Define answer patterns for different question types
answer_patterns = {
    "who": [
        "The person you asked about is {description}.",
        "{} is known for {description}.",
        "{subject} is famous for {description}."
    ],
    "what": [
        "is {description}.",
        "{description} is related to {subject}.",
        "In brief, {subject} {description}."
    ],
    "when": [
        "was built {description}.",
        "{description} marks the construction of {subject}.",
        "{subject} dates back to {description}."
    ],
    "where": [
        "{subject} is located in {location}.",
        "{subject} can be found in {location}.",
        "{subject} is situated in {location}."
    ]
}


In [32]:
#processing the question and determining the question types.

'''This function takes a question(#input) as a string  and returns two outputs: 
   that are 1) question type and 2) a search quesry '''

def process_question(question):
    doc = nlp(question)
    question_type = None
    keywords = []

    # Determine question type based on the first word
    if doc[0].text.lower() in ['who', 'what', 'when', 'where']:
        question_type = doc[0].text.lower()
    
    # Here we tried to extract entities and nouns as keywords such that noun will be our first word in our answer
    # Entities like (persons or organizations ,..etc) and nouns are extrcted from the question given to generate the keywords.
    # the search query is formed from this extracted keywords by joining these keywords.
    for ent in doc.ents:
        keywords.append(ent.text)
    for token in doc:
        if token.pos_ in ['NOUN', 'PROPN'] and token.text not in keywords:
            keywords.append(token.text)
    search_query = ' '.join(keywords)
    return question_type, search_query

#Here the outputs returned are "question type" : it is a string which determines the question type.(like who, what, when, where or none)
#second one is "search query" :geneerated from the extracted keywords in the question. ,this can be used for further processing'''

' Here the outputs returned are "question type" : it is a string which determines the question type.(like who, what, when, where or none)\nsecond one is "search query" :geneerated from the extracted keywords in the question. ,this can be used for further processing'

In [33]:
# Search for the question in wikipidieia and retrieve the answer based on the given question type
# It basically deals with accessing external information sources to retrieve answers.
# It utilizes wikipidiea library to interact with the wikipidiea api and retrieve information.

## This function takes question string as input and returns answer fetched from wikipedia or appropriate message if it is unable to fetch any answer.
def answer_question(question):
    question_type, search_query = process_question(question)
    if not question_type:
        return "Sorry, I don't know the answer for that type of question."
    
    if search_query.strip() == "":
        return "Please provide a valid search query."

## In here if the question type cannot be determined or if search query is empty, then respective error message is return.

    try:
        page_summary = wikipedia.summary(search_query, sentences=1)
        return page_summary
        #return page_summary
    except wikipedia.exceptions.DisambiguationError as e:
        return f"Ambiguous search query. Possible options: {', '.join(e.options)}"
    except wikipedia.exceptions.PageError:
        return "Sorry, Page not found"
    
## It then attempts to retrieve a summary of that wikipedia page corresponding to the query using "wikipedia.summary".
## A special feature if search query is ambiguous, the function returns a list of all possible options.[Disambiguation error]
## Finally if no relevant page or answer is found, it returns page not found error message[Page error].

In [34]:
#Retrieve the answer specifically using written answer patterns

def extract_answer_using_patterns(question):
    question_type, search_query = process_question(question)
    if not question_type:
        return "Sorry, I don't know the answer."
    
    if question_type not in answer_patterns:
        return "Sorry, I don't have answer patterns for that question type."
    
## If the question type cannot be determined or if there is no answer patterns avialable for that type, the respective error messages are returned
# if not 
    answer_pattern = random.choice(answer_patterns[question_type])
## It selects a random answer pattern from the predefined patterns 

    # Extracting subject from the question
    subject = search_query.split("who is"| "who was")[-1].strip("?") if question_type == "who" else \
              search_query.split("what is"| "what did")[-1].strip("?") if question_type == "what" else \
              search_query.split("when was" | "when did")[-1].strip("?") if question_type == "when" else \
              search_query.split("where is")[-1].strip("?")
    
    return answer_pattern.format(subject=subject, description=page_summary)
## The it extracts the subject fromt the question(eg.. persons name or location..etc)
##Fills the selected answer pattern witht the extracted subject and additional information if available.

In [35]:
# Display Message 
print("THIS IS A QA-SYSTEM created by Team#05.\n" 
      "It will try to answer questions that start with Who, What, When, or Where.\n" 
      "Enter 'exit' to leave the program.")
## Loops interactively
while True:
    user_input = input("Question :")
    if user_input.lower() == "exit":
        print("Thank you! Goodbye.")
        break
    print(" Answer :", answer_question(user_input))


THIS IS A QA-SYSTEM created by Team#05.
It will try to answer questions that start with Who, What, When, or Where.
Enter 'exit' to leave the program.
Question :what is natural language processing?
 Answer : Natural language processing (NLP) is an interdisciplinary subfield of computer science and linguistics.
Question :What is meant by Serendipity?
 Answer : Serendipity is an unplanned fortunate discovery.
Question :what is population?
 Answer : Population is the term typically used to refer to the number of people in a single area.
Question :what is an adjective?
 Answer : An adjective (abbreviated adj.) is a word that describes or defines a noun or noun phrase.
Question :where is Charminar located? 
 Answer : The Charminar (lit. 'four minarets') is a monument located in Hyderabad, Telangana, India.
Question :where is United States of America?
 Answer : The United States of America (USA or U.S.A.), commonly known as the United States (US or U.S.) or America, is a country primarily loc