In [None]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [None]:
model = SentenceTransformer('all-MiniLM-L6-v2')

In [None]:
def load_documents(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        documents = file.readlines()
    documents = [doc.strip() for doc in documents if doc.strip()]  # إزالة الفراغات والسطور الفارغة
    return documents

In [None]:
def encode_documents(documents, model):
    embeddings = model.encode(documents)
    return embeddings

In [None]:
def search(query, documents, doc_embeddings, model, top_k=1):
    query_embedding = model.encode([query])
    similarities = cosine_similarity(query_embedding, doc_embeddings)
    top_k_idx = np.argsort(similarities[0])[::-1][:top_k]
    results = [(documents[idx], similarities[0][idx]) for idx in top_k_idx]
    return results

In [None]:
if __name__ == "__main__":
    documents = load_documents('Document.txt')
    doc_embeddings = encode_documents(documents, model)

    query = input("Enter your query: ")
    results = search(query, documents, doc_embeddings, model, top_k=3)

    print("\nTop results:")
    for idx, (doc, score) in enumerate(results):
        print(f"{idx+1}. {doc} (Similarity: {score:.4f})")