In [3]:
from sentence_transformers import SentenceTransformer
import faiss
import spacy

In [4]:
nlp = spacy.load('en_core_web_sm')

In [42]:
def sentencize(text):
    doc = nlp(text)
    sentences = []
    for sentence in doc.sents:
        sentences.append(f'{sentence}')
    return sentences

In [43]:
def read_file(file_path):
    with open(file_path, "r") as file:
        lines = file.readlines()
    return lines   

In [44]:
file_path = '/home/abhay/Desktop/health'

In [45]:
text = read_file(file_path)

text = " ".join(text)

In [46]:
sentences = sentencize(text)

In [48]:
sentences = [sentence.strip() for sentence in sentences]

In [49]:
sentences

['Health is one of the most valuable assets a person can have.',
 'Maintaining good health is essential for leading a fulfilling life, as it directly impacts our physical, mental, and emotional well-being.',
 'A balanced diet, regular exercise, and adequate sleep play crucial roles in keeping the body functioning optimally.',
 'Additionally, managing stress and maintaining positive social connections contribute significantly to overall health.',
 'When individuals prioritize their health, they enhance their quality of life and increase their longevity.',
 'Physical health is the foundation of a strong and active lifestyle.',
 'Engaging in regular physical activity, such as walking, running, or strength training, helps keep the heart, muscles, and bones in good condition.',
 'It also aids in weight management and reduces the risk of chronic diseases like diabetes, hypertension, and heart disease.',
 'Furthermore, adopting healthy eating habits, such as consuming fruits, vegetables, lean

In [50]:
checkpoint = "sentence-transformers/multi-qa-MiniLM-L6-cos-v1"

model = SentenceTransformer(checkpoint)

In [51]:
sentence_embedding = model.encode(sentences, normalize_embeddings=True)

In [53]:
sentence_embedding.shape

(22, 384)

In [55]:
embedding_size = sentence_embedding.shape[1]

embedding_size

384

In [56]:
index = faiss.IndexFlatIP(embedding_size)

In [57]:
index.add(sentence_embedding)

In [58]:
index.ntotal

22

In [59]:
index.is_trained

True

In [60]:
query = ['What harmful habits should be avoided']

In [61]:
query_embedding = model.encode(query, normalize_embeddings=True)

In [64]:
k = 3 #no of output to fetch

scores, indices = index.search(query_embedding, 3)

In [65]:
scores

array([[0.6801988 , 0.42958993, 0.37373215]], dtype=float32)

In [80]:
indices

array([[16, 18,  4]])

In [84]:
sentences[indices[0][0]]

'Avoiding harmful habits like smoking, excessive alcohol consumption, and substance abuse can also significantly lower health risks.'

In [95]:
results = []
for i in range(k):
    results.append(sentences[indices[0][i]])
" ".join(results)

'Avoiding harmful habits like smoking, excessive alcohol consumption, and substance abuse can also significantly lower health risks. Ultimately, a healthy lifestyle is about balance and consistency. When individuals prioritize their health, they enhance their quality of life and increase their longevity.'