In [None]:
import pandas as pd
import unicodedata
import re
import os
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from IPython.display import Image, display

In [None]:
# Normalize input text (handles Unicode/Nepali spacing)
def normalize_input(text):
    text = text.lower().strip()
    text = unicodedata.normalize("NFKC", text)
    return re.sub(r"\s+", " ", text)

In [None]:
# Convert text to TF-IDF vectors for similarity computation
vectorizer = TfidfVectorizer(analyzer="word", token_pattern=r"\b\w+\b")
tfidf_matrix = vectorizer.fit_transform(df["Combined_Question"])

In [None]:
# Match user query to the most similar question in the knowledge base
def get_best_answer(query, threshold=0.4):
    query_norm = normalize_input(query)
    query_tfidf = vectorizer.transform([query_norm])
    cos_scores = cosine_similarity(query_tfidf, tfidf_matrix).flatten()
    best_index = cos_scores.argmax()
    best_score = cos_scores[best_index]
    if best_score < threshold:
        return None, None
    return df.iloc[best_index]["Answer"], df.iloc[best_index]["Image_path"]

In [None]:
# Display image associated with answer if available
def show_image(image_path):
    if not isinstance(image_path, str) or not image_path.strip():
        return
    image_filename = image_path.replace("Images/", "").strip()
    full_path = os.path.join(BASE_IMAGE_PATH, image_filename)
    if os.path.exists(full_path):
        display(Image(filename=full_path))