In [None]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize, sent_tokenize
from collections import defaultdict

def preprocess_text(text):
    # Tokenize the text
    tokens = word_tokenize(text)

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [word for word in tokens if word.lower() not in stop_words]

    # Lemmatize the tokens
    lemmatizer = WordNetLemmatizer()
    lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]

    return lemmatized_tokens

def find_answer(question, context):
    # Preprocess the context
    context_tokens = preprocess_text(context)

    # Preprocess the question
    question_tokens = preprocess_text(question)

    # Create a dictionary to store the relevance of each sentence
    sentence_relevance = defaultdict(int)

    # Iterate through the sentences in the context
    for sentence in sent_tokenize(context):
        sentence_tokens = preprocess_text(sentence)
        for token in question_tokens:
            if token in sentence_tokens:
                sentence_relevance[sentence] += 1

    # Find the most relevant sentence
    most_relevant_sentence = max(sentence_relevance, key=sentence_relevance.get)

    # Return the most relevant sentence as the answer
    return most_relevant_sentence

# Load the SQuAD dataset
from datasets import load_dataset
dataset = load_dataset('squad')

# Extract the context from the dataset
context = dataset['train'][0]['context']

while True:
    user_input = input("You: ")
    if user_input.lower() == 'exit':
        break
    answer = find_answer(user_input, context)
    print("Chatbot:", answer)