In [None]:
import json
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


class StandardSearchEngine:
    def __init__(self, document_path):
        self.documents = self.load_documents(document_path)
        self.corpus = [doc["content"] for doc in self.documents]

        
        self.vectorizer = TfidfVectorizer(
            stop_words="english",                  
            lowercase=True
        )

    
        self.tfidf_matrix = self.vectorizer.fit_transform(self.corpus)

    def load_documents(self, path):
        with open(path, "r", encoding="utf-8") as f:
            return json.load(f)

    def search(self, query, top_k=5):
        query_vector = self.vectorizer.transform([query])

        similarities = cosine_similarity(query_vector, self.tfidf_matrix)[0]

        ranked_indices = np.argsort(similarities)[::-1]

        results = []
        for idx in ranked_indices[:top_k]:
            if similarities[idx] > 0:
                results.append({
                    "title": self.documents[idx]["title"],
                    "score": float(similarities[idx]),
                    "source": self.documents[idx]["source"],
                    "timestamp": self.documents[idx]["timestamp"]
                })

        return results


if __name__ == "__main__":
    engine = StandardSearchEngine("data.json")

    while True:
        query = input("\nSearch Query (or type 'exit'): ")
        if query.lower() == "exit":
            break

        results = engine.search(query)

        print("\nResults:")
        for res in results:
            print(f"- {res['title']} | Score: {res['score']:.3f}")



Search Query (or type 'exit'):  fire



Results:
