# Env

In [None]:
%pip install -q langchain langchain_community langchain_chroma langchain_experimental langchain-text-splitters
%pip install -q langchain-groq langchain_openai
%pip install -q langchain-huggingface
%pip install -qU langchain-qdrant
%pip install -qU qdrant_client
%pip install -qU langchain-core
%pip install -qU langchain-hub

In [None]:
import getpass
import os
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAI
from langchain_openai import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.prompts import PromptTemplate
from qdrant_client import QdrantClient
from qdrant_client.http import models
from sklearn.cluster import KMeans
from typing import Any
from collections import defaultdict
import numpy as np
from tiktoken import Encoding, encoding_for_model, get_encoding
from statistics import mean

os.environ["LANGCHAIN_TRACING_V2"] = os.getenv("LANGCHAIN_TRACING_V2")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
qdrant_url = os.getenv("QDRANT_URL")
qdrant_api_key = os.getenv("QDRANT_API_KEY")



# RAG

In [None]:
#LLMs and embedding model
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
llm_drafter = ChatOpenAI(model="gpt-4o-mini", logprobs=True)
llm_verifier = ChatOpenAI(model="gpt-4o", logprobs=True)

In [None]:
#Vector store
qdrant_client = QdrantClient(
    url=qdrant_url,
    api_key=qdrant_api_key
)

# Specify your collection name and query vector
collection_name = ""

### Multi perspective sampling

In [None]:
def multi_perspective_sampling(
    k: int, retrieved_points: list[models.ScoredPoint], seed: int = 1399
) -> list[list[str]]:
    # Generate clusters
    print(f"Finding {k} clusters.")
    algo: Any = KMeans(n_clusters=k, random_state=seed)
    _vectors = [point.vector for point in retrieved_points]
    clusters: list[int] = algo.fit_predict(X=_vectors)

    # Unique clusters
    unique_clusters: set[int] = set(clusters)

    # Create a dictionary with the members of each cluster
    cluster_dict: defaultdict[int, list[int | None]] = defaultdict(list)
    for index, cluster in enumerate(clusters):
        cluster_dict[cluster].append(index)
    print(f"Clusters distribution: {dict(cluster_dict)}")

    # M subsets
    m: int = min(len(indices) for indices in cluster_dict.values())
    print(f"{m} document subsets will be created.")

    np.random.seed(seed=seed)
    subsets: list[list[str]] = []

    for _ in range(m):
        subset: list[int] = []
        for cluster in unique_clusters:
            chosen_element: int = np.random.choice(cluster_dict[cluster])
            subset.append(chosen_element)
            cluster_dict[cluster].remove(chosen_element)
        subset_documents = [
            retrieved_points[idx].payload.get("page_content") for idx in subset
        ]
        subsets.append(subset_documents)

    return subsets

### RAG drafting

In [None]:
#RAG draft
prompt_draft = PromptTemplate.from_template("""Trả lời câu hỏi dựa trên các văn bản được cung cấp. Đồng thời cung cấp lý do đưa ra câu trả lời của bạn.
## Câu hỏi: {query}

## Văn bản cung cấp: {evidence}""")

class RagDraftingResponse(BaseModel):
    """Câu trả lời là lý do vì sao đưa ra câu trả lời như thế"""
    rationale: str = Field(description="Lý do đưa ra trả lời.")
    response: str = Field(description="Câu trả lời cho câu hỏi.")
llm_drafter_chain = (
    prompt_draft
    | llm_drafter.with_structured_output(RagDraftingResponse, include_raw=True)
)

### RAG verifier

In [None]:
#RAG verify
prompt_verify = PromptTemplate.from_template("""## Câu hỏi: {query}

## Câu trả lời: {response}

## Lý do: {rationale}

Lý do có đủ tốt để hỗ trợ cho câu trả lời không? (Có hoặc không)""")

llm_verifier_chain = (
    prompt_verify
    | llm_verifier
)

def rag_verifier_generator(subsets, query):
    rag_verifications: list[tuple[str, float]] = []
    for subset in subsets:
        res_draft = llm_drafter_chain.invoke({"query": query, "evidence": "\n".join([f"[{idx}] {doc}" for idx, doc in enumerate(subset, start=1)])})
        response = llm_verifier_chain.invoke({"query": query, "response": res_draft.response, "rationale": res_draft.rationale})
        encoder: Encoding = encoding_for_model(model_name=llm_verifier.model_name)
        cond: bool = encoder.encode(text=response.content.lower()) == encoder.encode(text="có.")
        p_yes: float = (
            np.exp(mean(token['logprob'] for token in response.response_metadata['logprobs']['content']))
            if cond
            else 0.0
        )  # Naive
        rag_verifications.append((res_draft.response, p_yes))


### End - to - end

In [None]:
seed = 1234
query = ""
query_vector = embeddings.embed_query(query)
docs = qdrant_client.search(
    collection_name=collection_name,
    query_vector=query_vector,
    with_vectors=True
)
subsets = multi_perspective_sampling(k=2, retrieved_points=docs, seed = seed)
res_draft = llm_drafter_chain.invoke({"query": query, "evidence": "\n".join([f"[{idx}] {doc}" for idx, doc in enumerate(subsets[1], start=1)])})

rag_verifications = rag_verifier_generator(subsets=subsets, query=query)
best_answer: int = np.argmax(
    draft_score.p_yes for draft_score in rag_verifications
)
rag_verifications[best_answer][0]