In [None]:
# Search Engine Demo for Truyện Kiều

import sys
sys.path.append('..')
from src.preprocessor import KieuPreprocessor
from src.vectorizer import TfidfVectorizer
from src.search_engine import KieuSearchEngine

# Load and preprocess data
preprocessor = KieuPreprocessor(stopwords_file='../data/vietnamese_stopwords.txt')
verses = preprocessor.load_poem('../data/truyen_kieu.txt')
tokenized_verses = preprocessor.preprocess_all_verses(verses)

# Create TF-IDF matrix
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(tokenized_verses)

# Initialize search engine
search_engine = KieuSearchEngine(preprocessor, vectorizer, verses, tfidf_matrix)

# Demo some searches
queries = [
    "vầng trăng",
    "hồng nhan",
    "kim trọng",
    "thúy kiều"
]

for query in queries:
    print(f"\nSearch results for '{query}':")
    results = search_engine.search(query, top_k=5)
    for i, (idx, verse, score) in enumerate(results, 1):
        print(f"{i}. [Line {idx+1}] {verse} (score: {score:.4f})")

# Interactive demo - uncomment to use
"""
print("\nInteractive Search Mode - Enter 'quit' to exit")
while True:
    query = input("\nEnter your query: ")
    if query.lower() == 'quit':
        break
        
    results = search_engine.search(query, top_k=5)
    if not results:
        print("No matching verses found.")
    else:
        for i, (idx, verse, score) in enumerate(results, 1):
            print(f"{i}. [Line {idx+1}] {verse} (score: {score:.4f})")
"""