In [28]:
# ! pip install spacy

In [39]:
import json
from sentence_transformers import SentenceTransformer, util

# Load a pre-trained SBERT model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Load the cleaned JSON data
with open('../data/final_clean_with_keywords.json', 'r') as file:
    data = json.load(file)

# Extract cleaned text from the pages
documents = [page['original_text'] for page in data['pages']]

# User query
query = "Explain the process for a member of a State Legislative Assembly to take an oath and the rules regarding voting in the Assembly."

# Encode the documents and query into sentence embeddings
doc_embeddings = model.encode(documents)
query_embedding = model.encode(query)

# Compute cosine similarity scores between the query and documents
cosine_similarities = util.cos_sim(query_embedding, doc_embeddings)

# Get indices of documents sorted by similarity score
sorted_indices = cosine_similarities.argsort(descending=True).tolist()[0]

# Set a similarity threshold
threshold = 0.5

# Print the top relevant documents
print("Top relevant pages:")
top_5_res=[]
for idx in sorted_indices[:5]:  # Get top 5 relevant documents
    if cosine_similarities[0][idx] >= threshold:
        print(f"Page Number: {data['pages'][idx]['page_number']}, Similarity: {cosine_similarities[0][idx]:.4f}")
        print(f"Cleaned Text: {documents[idx]}\n")
        top_5_res.append(documents[idx])
    else:
        break
print(top_5_res)

Top relevant pages:
Page Number: 75, Similarity: 0.5863
Cleaned Text: 44 THE CONSTITUTION OF INDIA
(Part V.—The Union)
Conduct of Business
99. Oath or affirmation by members.—Every member of either House
of Parliament shall, before taking his seat, make and subscribe before the
President, or some person appointed in that behalf by him, an oath or
affirmation according to the form set out for the purpose in the Third Schedule.
100.Voting in Houses, power of Houses to act notwithstanding
vacancies and quorum.—(1)Save as otherwise provided in this Constitution,
all questions at any sitting of either House or joint sitting of the Houses shall be
determined by a majority of votes of the members present and voting, other
than the Speaker or person acting as Chairman or Speaker.
The Chairman or Speaker, or person acting as such, shall not vote in the
first instance, but shall have and exercise a casting vote in the case of an
equality of votes.
(2) Either House of Parliament shall have power 

In [37]:
len(top_5_res)

0