source
https://www-llamaindex-ai.translate.goog/blog/boosting-rag-picking-the-best-embedding-reranker-models-42d079022e83?_x_tr_sl=auto&_x_tr_tl=vi&_x_tr_hl=vi&_x_tr_hist=true


In [1]:
import os
from langchain.document_loaders import TextLoader
from concurrent.futures import ThreadPoolExecutor

def load_multiple_text_files(directory):

    def load_file(file_path):
        loader = TextLoader(file_path)
        return loader.load() 
    documents = []
    txt_files = [f for f in os.listdir(directory) if f.endswith('.txt')]
    
    with ThreadPoolExecutor() as executor:
        results = list(executor.map(load_file, [os.path.join(directory, f) for f in txt_files]))
    for result in results:
        documents.extend(result)  
    print(documents)
    return documents

directory_path = "./data" 
loaded_documents = load_multiple_text_files(directory_path)

print(f"Successfully loaded {len(loaded_documents)} documents.")



[Document(metadata={'source': './data/env_artical_1.txt'}, page_content="Article 1: Tackling Environmental Challenges in the 21st Century\nThe environment faces numerous challenges today, many of which have escalated due to human activities. The primary threats include climate change, deforestation, pollution, and biodiversity loss. These issues have complex and far-reaching consequences for the planet’s health and humanity's future.\n\nClimate Change is perhaps the most pressing issue. Global temperatures are rising due to the excessive emission of greenhouse gases from industrial activities, transportation, and agriculture. This warming leads to extreme weather patterns, sea-level rise, and the melting of polar ice caps. If unchecked, climate change could result in severe disruptions to food and water resources, increased natural disasters, and the displacement of millions of people.\n\nDeforestation is another critical challenge, driven by the demand for agricultural land, timber, a

In [None]:
# import os
# from langchain.document_loaders import TextLoader
# from datetime import datetime

# class EnhancedTextLoader(TextLoader):
#     def load_with_metadata(self):
#         # Load the text content
#         text_content = self.load()
#         # Extract metadata
#         metadata = {
#             'file_name': os.path.basename(self.file_path),
#             'file_size': os.path.getsize(self.file_path),
#             'modified_time': datetime.fromtimestamp(os.path.getmtime(self.file_path)).isoformat(),
#         }
#         text_content[0].metadata = metadata
#         print(text_content[0].metadata)
#         return text_content

# def load_multiple_text_files(directory):
#     documents = []
#     txt_files = [f for f in os.listdir(directory) if f.endswith('.txt')]
    
#     for file_name in txt_files:
#         loader = EnhancedTextLoader(os.path.join(directory, file_name))
#         doc_with_metadata = loader.load_with_metadata()
#         documents.append(doc_with_metadata)
#     print(documents)
#     return documents
# directory_path = "./data" 
# loaded_documents = load_multiple_text_files(directory_path)

# print(f"Successfully loaded {len(loaded_documents)} documents.")


In [2]:
len(loaded_documents)

3

In [3]:
MODEL_EMBEDDING = "thenlper/gte-large"
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name=MODEL_EMBEDDING)

  embeddings = HuggingFaceEmbeddings(model_name=MODEL_EMBEDDING)
  from tqdm.autonotebook import tqdm, trange


In [13]:
from langchain_qdrant import QdrantVectorStore
QDRANT_URL = "http://localhost:6333"
# qdrant = QdrantVectorStore.from_documents(
#     loaded_documents,
#     embeddings,
#     url=QDRANT_URL,
#     prefer_grpc=False,
#     collection_name="articles",
# )
qdrant = QdrantVectorStore.from_existing_collection(
    embedding=embeddings,
    collection_name="articles",
    prefer_grpc=False,
    url=QDRANT_URL,
)


In [None]:
# from langchain.vectorstores import Qdrant
# from qdrant_client import QdrantClient
# client = QdrantClient(
#     url=QDRANT_URL, prefer_grpc=False
#     )
# db = Qdrant(client=client,
#         embeddings=embeddings,
#         collection_name="articles")


In [14]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from optimum.bettertransformer import BetterTransformer
import torch
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)
# model_rerank = AutoModelForSequenceClassification.from_pretrained('amberoad/bert-multilingual-passage-reranking-msmarco').to(device)
# model_rerank = BetterTransformer.transform(model_rerank)
# tokenizer_rerank = AutoTokenizer.from_pretrained('amberoad/bert-multilingual-passage-reranking-msmarco')
tokenizer_rerank = AutoTokenizer.from_pretrained("fluid-ai/bert-multilingual-passage-reranking-msmarco")
model_rerank = AutoModelForSequenceClassification.from_pretrained("fluid-ai/bert-multilingual-passage-reranking-msmarco").to(device)
# model_rerank = BetterTransformer.transform(model_rerank)
# Load model directly
# tokenizer_rerank = AutoTokenizer.from_pretrained("BAAI/bge-reranker-large")
# model_rerank = AutoModelForSequenceClassification.from_pretrained("BAAI/bge-reranker-large").to(device)

cuda


In [15]:
def rerank_resultdb(query):
    query = query.replace('\n', ' ')
    vectorstore = qdrant.as_retriever(search_kwargs={"k":15})
    docs = vectorstore.get_relevant_documents(query=query)
    # Remove duplicates based on page_content
    unique_documents = []
    seen_content = set()
    for doc in docs:
        if doc.page_content not in seen_content:
            seen_content.add(doc.page_content)
            unique_documents.append(doc)
    docs = unique_documents     
    candidates = [doc.page_content for doc in docs]
    
    queries = [query]*len(candidates)
    features = tokenizer_rerank(queries, candidates,  padding='longest',max_length=512, truncation=True, return_tensors="pt").to(device)
    with torch.no_grad():
        scores = model_rerank(**features).logits
        values, indices = torch.sum(scores, dim=1).sort()
        print(values)
    selected_docs = [docs[idx] for idx in indices]
    # Filter indices based on the score range
    filtered_indices = [idx for idx, value in zip(indices, values) if -0.5 <= value.item() <= 0.5]
    print(filtered_indices)
    # Select documents based on filtered indices
    selected_docs = [docs[idx] for idx in filtered_indices]
    results = [f"{doc.page_content} source: {doc.metadata}" for doc in selected_docs]
    return results

In [16]:
#query ="Article 1: Tackling Environmental Challenges in the 21st CenturyThe environment faces numerous challenges today, many of which have escalated due to human activities. The primary threats include climate change, deforestation, pollution, and biodiversity loss. These issues have complex and far-reaching consequences for the planet’s health and humanity's future.Climate Change is perhaps the most pressing issue. Global temperatures are rising due to the excessive emission of greenhouse gases from industrial activities, transportation, and agriculture. This warming leads to extreme weather patterns, sea-level rise, and the melting of polar ice caps. If unchecked, climate change could result in severe disruptions to food and water resources, increased natural disasters, and the displacement of millions of people.Deforestation is another critical challenge, driven by the demand for agricultural land, timber, and urban expansion. Forests play a vital role in absorbing carbon dioxide and supporting biodiversity. However, large-scale logging and land clearing not only reduce the Earth's capacity to regulate its climate but also threaten countless species with extinction.Pollution, particularly plastic and air pollution, continues to degrade the environment. Oceans are choking with millions of tons of plastic waste, harming marine life and contaminating the food chain. Similarly, air pollution in urban areas causes severe health issues for humans, from respiratory diseases to premature deaths.Biodiversity loss poses long-term risks. Ecosystems depend on a delicate balance of species, and the extinction of key species can lead to the collapse of entire ecosystems. The loss of biodiversity also diminishes the natural resources that humans rely on for food, medicine, and clean water.Addressing these environmental challenges requires a concerted global effort. Governments, businesses, and individuals must work together to reduce carbon emissions, protect natural habitats, and promote sustainable living. The future of our planet depends on immediate, large-scale action to reverse the damage we’ve caused and to build a more sustainable world for future generations."
#query="Climate change results from greenhouse gas emissions, leading to rising temperatures and extreme weather. Deforestation threatens forests' role in carbon absorption and species survival. Pollution, especially plastic and air pollution, harms ecosystems and human health. Biodiversity loss risks ecosystem stability and natural resource availability"
query= '''
The environment is facing critical challenges due to human activities, including climate change, deforestation, pollution, and biodiversity loss.

Climate change results from greenhouse gas emissions, leading to rising temperatures and extreme weather. Deforestation threatens forests' role in carbon absorption and species survival. Pollution, especially plastic and air pollution, harms ecosystems and human health. Biodiversity loss risks ecosystem stability and natural resource availability.

Addressing these issues requires global collaboration among governments, businesses, and individuals to reduce emissions, protect habitats, and promote sustainability for a healthier future.
'''
q='''
The prior month, Hurricane Lee, a major hurricane currently traveling up the Atlantic, meteorologists observed the third-fastest case of rapid intensification ever recorded. On September 7, wind speeds inside Lee more than doubled, boosting it from an 80 mph Cat 1 storm to a terrifying 165 mph Cat 5 storm.

Rapid intensification only crops up in a handful of tropical cyclones each year. However, a study published last year in Nature found that, within 240 miles of coastlines, rapidly intensifying storms are now significantly more common than they were 40 years ago. Examples include Hurricane Ian in 2022 and Hurricane Michael in 2018. The latter leapt from a Category 2 storm to Category 5 the day before making landfall in the Florida panhandle. It claimed dozens of lives and caused $25 billion in damage.
'''
q1= '''The article discusses significant environmental challenges caused by human activities, including climate change, deforestation, pollution, and biodiversity loss. Climate change, driven by greenhouse gas emissions, leads to extreme weather and resource disruptions. Deforestation, fueled by agriculture and urban expansion, threatens ecosystems and species. Pollution, particularly plastic and air pollution, harms marine life and human health. Biodiversity loss jeopardizes ecosystems and vital resources. Addressing these issues requires global cooperation among governments, businesses, and individuals to reduce emissions, protect habitats, and promote sustainable living for a healthier planet. Immediate action is crucial for future sustainability.'''
q2 ='''The private zoo was permitted to keep wild and rare animals for preservation. It currently holds over 3,000 wild animals of nearly 90 species, including Bengal tigers, white lions, bears, hippos, panthers, and zebras, on a total area of 23,000 square metres.

Between September 6 and October 1, 20 Bengal tigers and a black panther died at the site. Investigations by local authorities concluded that the animals died from the A/H5N1 virus infection from chicken served to them for meals.

My Quynh Zoo in the neighbouring Long An Province also reported the deaths of 27 tigers and three lions between early August and mid-September. Their samples also tested positive for the A/H5N1 virus, while the sources of infection have remained unclear'''
rerank_resultdb(query=query)

tensor([-0.2158, -0.1180,  1.0154], device='cuda:0')
[tensor(0, device='cuda:0'), tensor(1, device='cuda:0')]


  docs = vectorstore.get_relevant_documents(query=query)


["Article 1: Tackling Environmental Challenges in the 21st Century\nThe environment faces numerous challenges today, many of which have escalated due to human activities. The primary threats include climate change, deforestation, pollution, and biodiversity loss. These issues have complex and far-reaching consequences for the planet’s health and humanity's future.\n\nClimate Change is perhaps the most pressing issue. Global temperatures are rising due to the excessive emission of greenhouse gases from industrial activities, transportation, and agriculture. This warming leads to extreme weather patterns, sea-level rise, and the melting of polar ice caps. If unchecked, climate change could result in severe disruptions to food and water resources, increased natural disasters, and the displacement of millions of people.\n\nDeforestation is another critical challenge, driven by the demand for agricultural land, timber, and urban expansion. Forests play a vital role in absorbing carbon dioxi

In [12]:
# from langchain.llms import VLLM
# # llm = VLLM(
# #           model=GENERATE_MODEL_NAME,
# #           trust_remote_code=True,  # mandatory for hf models
# #           max_new_tokens=max_new_tokens,
# #             # temperature=1.0,
# #             # top_k=50,
# #             # top_p=0.9,
# #           top_k=10,
# #           top_p=0.95,
# #           temperature=0.4,
# #           dtype="half",
# #       )
# llm = VLLM(
#     model="mosaicml/mpt-7b",
#     trust_remote_code=True,  # mandatory for hf models
#     max_new_tokens=128,
#     top_k=10,
#     top_p=0.95,
#     temperature=0.8,
# )
# print(llm.invoke("What is the capital of France ?"))

  


INFO 10-08 07:48:05 llm_engine.py:226] Initializing an LLM engine (v0.6.1.dev238+ge2c6e0a82) with config: model='mosaicml/mpt-7b', speculative_config=None, tokenizer='mosaicml/mpt-7b', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=2048, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=mosaicml/mpt-7b, use_v2_block_manager=False, num_scheduler_steps=1, multi_step_stream_outputs=False, enable_prefix_caching=Fa

  @torch.library.impl_abstract("xformers_flash::flash_fwd")
  @torch.library.impl_abstract("xformers_flash::flash_bwd")


ValidationError: 1 validation error for VLLM
  Value error, Bfloat16 is only supported on GPUs with compute capability of at least 8.0. Your Tesla P100-PCIE-16GB GPU has compute capability 6.0. You can use float16 instead by explicitly setting the`dtype` flag in CLI, for example: --dtype=half. [type=value_error, input_value={'model': 'mosaicml/mpt-7...gs': {}, 'client': None}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.9/v/value_error

In [17]:
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings
QDRANT_URL = "http://localhost:6333"
class LLMServe:
    def __init__(self) -> None:
      self.query = None
      self.embeddings = self.load_embeddings()
      self.pipe = self.load_model_pipeline()
      self.prompt = self.load_prompt_template()
      self.rag_pipeline = None
    def load_embeddings(self):
      embeddings = HuggingFaceEmbeddings(model_name=MODEL_EMBEDDING)
      return embeddings

    def load_model_pipeline(self):
      llm = ChatOllama(
          model="llama3.2",
          top_k=10,
          top_p=0.95,
          temperature=0.01)
      
      return llm

    def load_prompt_template(self):
      prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                "You are an AI assistant, based on the infomation I provide as follows {database} if database is none reply no database,   please give the exact information: article name, author, source if the information is not in the data i provided, please reply that this is no information",
            ),
            ("human","{input}")
        ]
    )
      return prompt

    def load_rag_pipeline(self,llm ,prompt,query):
      vector_results = rerank_resultdb(query)
      chain = prompt | llm
      result = chain.invoke({
          "database":vector_results,
          "input":query
      }).content
      return result

    def rag(self, query):
        self.query = query
        self.rag_pipeline = self.load_rag_pipeline(llm=self.pipe,                         
                                      prompt=self.prompt, query= self.query)
        return self.rag_pipeline

In [18]:
app = LLMServe()

In [19]:
q='''
The prior month, Hurricane Lee, a major hurricane currently traveling up the Atlantic, meteorologists observed the third-fastest case of rapid intensification ever recorded. On September 7, wind speeds inside Lee more than doubled, boosting it from an 80 mph Cat 1 storm to a terrifying 165 mph Cat 5 storm.

Rapid intensification only crops up in a handful of tropical cyclones each year. However, a study published last year in Nature found that, within 240 miles of coastlines, rapidly intensifying storms are now significantly more common than they were 40 years ago. Examples include Hurricane Ian in 2022 and Hurricane Michael in 2018. The latter leapt from a Category 2 storm to Category 5 the day before making landfall in the Florida panhandle. It claimed dozens of lives and caused $25 billion in damage.
'''
q2 ='''The private zoo was permitted to keep wild and rare animals for preservation. It currently holds over 3,000 wild animals of nearly 90 species, including Bengal tigers, white lions, bears, hippos, panthers, and zebras, on a total area of 23,000 square metres.

Between September 6 and October 1, 20 Bengal tigers and a black panther died at the site. Investigations by local authorities concluded that the animals died from the A/H5N1 virus infection from chicken served to them for meals.

My Quynh Zoo in the neighbouring Long An Province also reported the deaths of 27 tigers and three lions between early August and mid-September. Their samples also tested positive for the A/H5N1 virus, while the sources of infection have remained unclear'''
data = app.rag(q)
print(data)

tensor([0.8975, 0.9468, 1.1050], device='cuda:0')
[]
Here is the information you requested:

* Article Name: No specific article name provided
* Author: No author mentioned
* Source: Nature (study published last year)
* Additional Information: The study found that rapidly intensifying storms are now significantly more common within 240 miles of coastlines, compared to 40 years ago.


In [20]:
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
from fastapi import FastAPI
from typing import Optional
origins = ["*"]
app_api = FastAPI()
app_api.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

@app_api.get("/")
def read_root():
    return "API RAG"

@app_api.get("/rag")
async def read_item( q: Optional[str] = None):
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                "You are a good theorist, please summarize this {input} for me.",
            ),
            ("human", "{input}"),
        ])
    
    if q:
        data = app.rag(q)
        print(data)
        res = {
            "result" : data,
            "source_documents":"NO"
        }
        return JSONResponse(content=jsonable_encoder(res))
    return None


In [21]:
import nest_asyncio
import uvicorn
nest_asyncio.apply()
uvicorn.run(app_api, port=8001)

INFO:     Started server process [714715]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8001 (Press CTRL+C to quit)
