In [1]:
from FlagEmbedding import FlagReranker, BGEM3FlagModel

reranker = FlagReranker('BAAI/bge-reranker-v2-m3', use_fp16=True)
embeddings = BGEM3FlagModel('BAAI/bge-m3', use_fp16=True)

  from .autonotebook import tqdm as notebook_tqdm
Fetching 30 files: 100%|██████████| 30/30 [00:00<00:00, 215092.51it/s]
  colbert_state_dict = torch.load(os.path.join(model_dir, 'colbert_linear.pt'), map_location='cpu')
  sparse_state_dict = torch.load(os.path.join(model_dir, 'sparse_linear.pt'), map_location='cpu')


In [2]:
from qdrant_client import QdrantClient, models
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from utils.utils import convert_defaultdict, format_docs
from langchain_core.documents import Document
import numpy as np

In [3]:
client = QdrantClient("http://localhost:6333")
llm = ChatGroq(model="llama3-70b-8192")

In [4]:
prompt = ChatPromptTemplate.from_template("""Answer the question based on the provided context only. Try your best to provide the most accurate response.
<context>
{context}
</context>

Question: {input}
""")

chain = prompt | llm

In [5]:
query = "I'm 22 years old and I usually sleep at 2am. How can this affect my health?"

In [6]:
res = embeddings.encode([query], return_sparse=True, return_colbert_vecs=True, max_length=512)

In [7]:
result = client.query_points(
    "summary",
    prefetch=[
        models.Prefetch(
            query=res['dense_vecs'][0],
            using="dense",
            limit=20
        ),
        models.Prefetch(
            query=models.SparseVector(**convert_defaultdict(res['lexical_weights'][0])),
            using="sparse",
            limit=20
        ),
        models.Prefetch(
            query=res['colbert_vecs'][0],
            using='colbert',
            limit=20
        )
    ],
    query=models.FusionQuery(
        fusion=models.Fusion.RRF,
    ),
    limit=10
)

In [8]:
result.points

[ScoredPoint(id='e8c5e083-e663-4b19-8287-fc716c06c6ca', version=251, score=0.8666667, payload={'doc_id': 'e8c5e083-e663-4b19-8287-fc716c06c6ca', 'title': 'Title: The Global Problem of Insufficient Sleep and Its Serious Public Health Implications', 'content': 'This chunk of text discusses the recommended sleep duration for adults, the pathophysiology of insufficient sleep, contributing factors, and the manifestations of insufficient sleep on cognitive function and mood. The American Academy of Sleep Medicine, Sleep Research Society, and National Sleep Foundation recommend 7-9 hours of sleep per night for ideal sleep health. Insufficient sleep leads to changes in biomarkers, such as proinflammatory markers, and affects cholesterol metabolism and inflammatory responses. Factors contributing to insufficient sleep include gender, marital status, presence of children, biological or circadian disruption, genetic influences, lifestyle habits, and various types of stress. Insufficient sleep is 

In [9]:
scores = reranker.compute_score([[query, point.payload['content']] for point in result.points[:5]], max_length=8096, batch_size=8, normalize=True)
scores = np.array(scores)
scores

array([0.02052779, 0.0433658 , 0.04707418, 0.02556521, 0.02585878])

In [10]:
reranking = list(np.array(result.points)[scores.argsort()][::-1])

In [11]:
relevant_docs = []
for point in reranking:
    doc = client.scroll(
        collection_name="original",
        scroll_filter=models.Filter(
            must=[
                models.FieldCondition(
                    key="doc_id",
                    match=models.MatchValue(value=point.id)
                )
            ]
        )
    )
    temp_payload = doc[0][0].payload
    res_doc = Document(page_content=temp_payload['page_content'], metadata={'source':temp_payload['source'], 'doc_id': temp_payload['doc_id'], 'title': temp_payload['title']})
    relevant_docs.append(res_doc)

In [12]:
len(relevant_docs)

5

In [13]:
print(relevant_docs[1].page_content)

# Sleep’s role in the development and resolution of adolescent depression

## Abstract
Two adolescent mental health fields — sleep and depression — have advanced largely in parallel until about four years ago. Although sleep problems have been thought to be a symptom of adolescent depression, emerging evidence suggests that sleep difficulties arise before depression does. In this Review, we describe how the combination of adolescent sleep biology and psychology uniquely predispose adolescents to develop depression. We describe multiple pathways and contributors, including a delayed circadian rhythm, restricted sleep duration and greater opportunity for repetitive negative thinking while waiting for sleep. We match each contributor with evidence-based sleep interventions, including bright light therapy, exogenous melatonin and cognitive-behaviour therapy techniques. Such treatments improve sleep and alleviate depression symptoms, highlighting the utility of sleep treatment for comorbid 

In [14]:
relevant_docs[0].metadata

{'source': 'extracted/TÁC HẠI/Social jetlag in health and behavioral research  a systematic review.md',
 'doc_id': '8387bb97-b802-45cc-8575-f845e0ecbdeb',
 'title': 'Title: Social jetlag in health and behavioral research: a systematic review'}

In [15]:
context = format_docs(relevant_docs[:4])

In [16]:
response = chain.invoke({"context": context, "input": query})

In [17]:
print(response)

content="Based on the provided context, sleeping at 2am can have several negative effects on your health. Since you're 22 years old, you're considered an adolescent, and research suggests that adolescents who sleep late and sleep too little are at risk of various health problems.\n\nSome potential effects of your sleep pattern on your health include:\n\n1. **Depression**: Sleeping late and sleeping too little can increase the risk of depression. In fact, research suggests that sleep problems can arise before depression does.\n2. **Obesity and metabolic disorders**: Irregular sleep patterns can disrupt your body's natural rhythms, leading to weight gain, obesity, and metabolic disorders.\n3. **Cognitive performance and academic achievement**: Sleeping late can impair your cognitive abilities, leading to poor academic performance, and lower general cognitive ability.\n4. **Aggression and conduct problems**: Irregular sleep patterns have been linked to aggression, conduct problems, and de