
## Loading dependencies

In [3]:
import re
import pandas as pd
import numpy as np
import nltk

from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\lakha\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\lakha\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\lakha\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

## Pre Processing Function

In [5]:
stop_words = set(stopwords.words("english"))
lemmatizer = WordNetLemmatizer()


def clean_text(text: str) -> str:   
    text = text.lower()
    text = re.sub(r"[^a-z\s]", "", text)
    tokens = nltk.word_tokenize(text)
#  we are usimg list comprehension for getting cleand list 
    cleaned = [
        lemmatizer.lemmatize(token)
        for token in tokens
        if token not in stop_words
    ]

    return " ".join(cleaned)

## Loading Data

In [7]:
df = pd.read_csv(r"C:\Users\lakha\Downloads\Question&ans (1).csv")

## Cleaning Data and VECTORIZE IT

In [9]:
df["clean_question"] = df["question"].apply(clean_text)
vectorizer = TfidfVectorizer()
question_vectors = vectorizer.fit_transform(df["clean_question"])

## Thresolds We can Change that 

In [11]:

HIGH_THRESHOLD = 0.60
MEDIUM_THRESHOLD = 0.40


## Chat Bot 

In [None]:
print("Q&A Bot Started (type 'exit' to quit)")

while True:
    user_input = input("\nAsk your question: ")

    if user_input.lower() == "exit":
        print("Bot stopped.")
        break

    # clean user input
    cleaned_input = clean_text(user_input)
    user_vector = vectorizer.transform([cleaned_input])

    # cosine similarity
    similarity_scores = cosine_similarity(
        user_vector, question_vectors
    )[0]

    best_index = np.argmax(similarity_scores)
    best_score = similarity_scores[best_index]

    print(f"\nSimilarity Score: {best_score:.2f}")

    #  DECISION LOGIC

    if best_score >= HIGH_THRESHOLD:
        print("\nAnswer:")
        print(df.iloc[best_index]["answer"])

    elif best_score >= MEDIUM_THRESHOLD:
        print("\nDid you mean one of these questions?")
        top_3 = similarity_scores.argsort()[-3:][::-1]

        for idx in top_3:
            print(f"- {df.iloc[idx]['question']}")

    else:
        print("\nPlease check the question and try again.")


Q&A Bot Started (type 'exit' to quit)



Ask your question:  what



Similarity Score: 0.00

Please check the question and try again.



Ask your question:  what is



Similarity Score: 0.00

Please check the question and try again.



Ask your question:  what is blood pressure?/



Similarity Score: 0.80

Answer:
High blood pressure occurs when the force of blood against artery walls is too strong.



Ask your question:  what is blood pressure?



Similarity Score: 0.80

Answer:
High blood pressure occurs when the force of blood against artery walls is too strong.



Ask your question:  hyper tensions



Similarity Score: 0.00

Please check the question and try again.



Ask your question:  hyper tension



Similarity Score: 0.00

Please check the question and try again.



Ask your question:  hyper



Similarity Score: 0.00

Please check the question and try again.



Ask your question:  hypertension



Similarity Score: 1.00

Answer:
Hypertension is a medical condition in which blood pressure in the arteries is persistently high.



Ask your question:  what is noebjfb



Similarity Score: 0.00

Please check the question and try again.



Ask your question:  ndksndk



Similarity Score: 0.00

Please check the question and try again.



Ask your question:  Can hypertension 



Similarity Score: 1.00

Answer:
Hypertension is a medical condition in which blood pressure in the arteries is persistently high.



Ask your question:  Can hypertension arew curesd



Similarity Score: 1.00

Answer:
Hypertension is a medical condition in which blood pressure in the arteries is persistently high.



Ask your question:  Can hypertension arew cured



Similarity Score: 1.00

Answer:
It can be controlled but not always cured.
