In [None]:
!pip install -q --force transformers==4.52.4
!pip install -U bitsandbytes
!pip install -q --force accelerate==1.7.0
!pip install -q --force langchain==0.3.25
!pip install -q --force langchainhub==0.1.21
!pip install -q --force langchain-chroma==0.2.4
!pip install -q --force langchain_experimental==0.3.4
!pip install -q --force langchain-community==0.3.24
!pip install -q --force langchain_huggingface==0.2.0
!pip install -q --force python-dotenv==1.1.0
!pip install -q --force pypdf
!pip install langchain_openai
!pip install langchain-google-genai
!pip install rank_bm25


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
contourpy 1.2.0 requires numpy<2.0,>=1.20, but you have numpy 2.3.4 which is incompatible.
datasets 3.6.0 requires fsspec[http]<=2025.3.0,>=2023.1.0, but you have fsspec 2025.10.0 which is incompatible.
gensim 4.3.3 requires numpy<2.0,>=1.18.5, but you have numpy 2.3.4 which is incompatible.
numba 0.60.0 requires numpy<2.1,>=1.22, but you have numpy 2.3.4 which is incompatible.
s3fs 2024.6.1 requires fsspec==2024.6.1.*, but you have fsspec 2025.10.0 which is incompatible.
scipy 1.13.1 requires numpy<2.3,>=1.22.4, but you have numpy 2.3.4 which is incompatible.
streamlit 1.37.1 requires packaging<25,>=20, but you have packaging 25.0 which is incompatible.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependenc

In [21]:
from dotenv import load_dotenv
import os

# Load environment variables from .env in project root
load_dotenv()

googleAPIKey = os.getenv('googleAPIKey')
gptkey =  os.getenv('myAPIKey')

In [22]:
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoModelForSequenceClassification,AutoTokenizer, pipeline
from langchain_huggingface import HuggingFacePipeline
from langchain_huggingface import HuggingFaceEmbeddings
from langchain import hub

from langchain_experimental.text_splitter import SemanticChunker
from langchain_openai import OpenAIEmbeddings
import re

import torch
import json
import glob
import os


## Load file

In [3]:
metadata_dir = "C:\\uit_HK5\\CS431\\final_project\\data\\metadata.json"
transcript_dir = "C:\\uit_HK5\\CS431\\final_project\\data\\transcripts_final"
output_dir = "C:\\uit_HK5\\CS431\\final_project\\data\\semantic_chunks"
os.makedirs(output_dir, exist_ok=True)


In [6]:
from typing import Union

class Loader:
    @staticmethod
    def parse_transcript(file_path: str) -> tuple[str, list[dict], str]:
        """ƒê·ªçc file transcript, t√°ch t·ª´ng d√≤ng th√†nh block c√≥ start-end-text"""
        full_text = ""
        position_map = []  # l∆∞u v·ªã tr√≠ start c·ªßa m·ªói ƒëo·∫°n text trong full_text

        with open(file_path, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line or "[√¢m nh·∫°c]" in line.lower():
                    continue
                filename = os.path.basename(file_path).replace(".txt", "") # l·∫•y file name
                match = re.match(r"(\d+:\d+:\d+)\s*-\s*(\d+:\d+:\d+),\s*(.+)", line)
                if match:
                    start, end, text = match.groups()
                    pos = len(full_text)
                    full_text += text + " "
                    position_map.append({
                        "start": start,
                        "end": end,
                        "text": text,
                        "pos_start": pos, # v·ªã tr√≠ b·∫Øt ƒë·∫ßu c·ªßa ƒëo·∫°n text trong full_text
                        "pos_end": len(full_text) # v·ªã tr√≠ k·∫øt th√∫c c·ªßa ƒëo·∫°n text trong full_text
                    })

        return full_text.strip(), position_map, filename
    
    def map_metadata(self, metadata_path: str, filename: str) -> tuple[Union[str, None], Union[str, None]]:
        """ƒê·ªçc file metadata v√† tr·∫£ v·ªÅ dict mapping id -> metadata"""

        with open(metadata_path, "r", encoding="utf-8") as f:
            metadata_list = json.load(f)
        metadata = metadata_list["videos"]

        video_title, video_url = next(((item["title"], item["url"]) for item in metadata if item["video_id"] == filename), (None, None))

        return video_title, video_url
    
    def load_dir(self, transcript_dir: str, metadata_path: str) -> list[dict]:
        """ƒê·ªçc t·∫•t c·∫£ file transcript trong th∆∞ m·ª•c v√† tr·∫£ v·ªÅ danh s√°ch dict ch·ª©a full_text, position_map, filename, title, url"""
        import glob

        file_paths = glob.glob(os.path.join(transcript_dir, "*.txt"))
        data = []

        for file_path in file_paths:
            full_text, position_map, filename = self.parse_transcript(file_path)
            title, url = self.map_metadata(metadata_path, filename)

            data.append({
                "full_text": full_text,
                "position_map": position_map,
                "filename": filename,
                "title": title,
                "url": url
            })

        return data
        

## Chunking

![image.png](attachment:image.png)

In [10]:

from pyparsing import line


class TranscriptChunker:
    def __init__(self, open_api_key: str):
        self.embeddings = OpenAIEmbeddings(
            model="text-embedding-3-large",
            openai_api_key=open_api_key
        )
        self.splitter = SemanticChunker(
            embeddings=self.embeddings,
            breakpoint_threshold_type="percentile",
            breakpoint_threshold_amount=85,
            min_chunk_size=300,

            add_start_index=True,
            buffer_size=1
        )
        self.loader = Loader()

    def chunk_dir(self, transcript_dir: str, metadata_path: str, output_dir: str) -> list:
        data = self.loader.load_dir(transcript_dir, metadata_path)
        all_chunks = []

        for item in data:
            full_text = item["full_text"]
            position_map = item["position_map"]
            filename = item["filename"]
            title = item["title"]
            url = item["url"]

            # d√πng l·∫°i logic mapping timestamp
            chunks = self.splitter.create_documents(
                texts=[full_text],
                metadatas=[{
                    "video_url": url,
                    "filename": filename,
                    "title": title
                }]
            )
            for i, chunk in enumerate(chunks):
                start_index = chunk.metadata.pop("start_index")
                end_index = start_index + len(chunk.page_content)  # t·ª± t√≠nh end_index

                
                # t√¨m timestamp ƒë·∫ßu ti√™n bao ph·ªß ƒëo·∫°n text n√†y
                matched_ts = [
                    pos for pos in position_map
                    if not (pos["pos_end"] < start_index or pos["pos_start"] > end_index)
                ]

                if matched_ts:
                    chunk.metadata["start_timestamp"] = matched_ts[0]["start"]
                    chunk.metadata["end_timestamp"] = matched_ts[-1]["end"]
                else:
                    chunk.metadata["start_timestamp"] = None
                    chunk.metadata["end_timestamp"] = None

                chunk.metadata["chunk_id"] = i
            all_chunks.extend(chunks)
        # l∆∞u t·∫•t c·∫£ chunks v√†o file json
        output_path = os.path.join(output_dir, "semantic_chunks.json")
        with open(output_path, "w", encoding="utf-8") as f:
            json.dump([{
                "page_content": chunk.page_content,
                "metadata": chunk.metadata
            } for chunk in all_chunks], f, ensure_ascii=False, indent=4)
        print(f"Saved {len(all_chunks)} chunks to {output_path}")
        return all_chunks


In [11]:
splitter = TranscriptChunker(
    open_api_key= gptkey
)
data = splitter.chunk_dir(transcript_dir, metadata_dir, output_dir)

Saved 461 chunks to C:\uit_HK5\CS431\final_project\data\semantic_chunks\semantic_chunks.json


In [12]:
#test print first 3 chunks
for chunk in data[:3]:
    print(chunk.page_content)
    print(chunk.metadata["start_timestamp"], chunk.metadata["end_timestamp"])
    print(chunk.metadata["video_url"])
    print(chunk.metadata["title"])
    print("-----")

Nh∆∞ v·∫≠y th√¨ trong ph·∫ßn s·ªë 2 n√†y th√¨ ch√∫ng ta ƒë√£ c√πng t√¨m hi·ªÉu v·ªÅ nh·ªØng ch·ªß ƒë·ªÅ sau. ƒê·∫ßu ti√™n l√† ch√∫ng ta t√¨m hi·ªÉu v·ªÅ maximum likelihood cho c√°i log c·ªßa PX. Ch√∫ng ta mong mu·ªën c√≥ ƒë∆∞·ª£c m·ªôt m√¥ h√¨nh ƒë·ªÉ t·∫°o ra m·ªôt c√°i ·∫£nh x gi·ªëng th·∫≠t, gi·ªëng v·ªõi l·∫°i c√°i Pdata. Th·∫ø th√¨ ƒë·ªÉ ƒë·∫°t ƒë∆∞·ª£c c√°i vi·ªác n√†y th√¨ c√°i likelihood c·ªßa log P n√†y ph·∫£i l√† l·ªõn nh·∫•t. V√† khi ƒë∆∞a c√°i log c·ªßa PX n√†y l√™n c·ª±c ƒë·∫°i th√¨ n√≥ s·∫Ω ƒë∆∞a ƒë·∫øn m·ªôt c√°i gi·∫£i ph√°p, ƒë√≥ l√† ch√∫ng ta s·∫Ω ƒë·∫©y c√°i ch·∫∑n d∆∞·ªõi c·ªßa log P. Th√¨ ƒë√≥ ch√≠nh l√† c√°i ELBO l√† evidence lower bound.
0:00:14 0:01:06
https://youtube.com/watch?v=--6FInuIyys
[CS315 - Ch∆∞∆°ng 3] Deep Generative Models (2) - T·ªïng k·∫øt
-----
ƒê·∫©y c√°i ELBO n√†y l√™n, maximum ELBO n√†y l√™n. V√† khi ch√∫ng ta maximum ELBO n√†y l√™n th√¨ ch√∫ng ta s·∫Ω c√≥ hai c√°i m√¥ h√¨nh, ƒë√≥ l√† VAE v√† m√¥ h√¨nh diffusion. V√† ƒë·ªëi v·ªõi c√°i m√¥ h√¨nh

In [17]:
result = retriever.invoke("diffusion l√† g√¨")

print(result[0].page_content)

Ch√∫ng ta s·∫Ω c√πng ƒë·∫øn v·ªõi c√°c m√¥ h√¨nh t·∫°o sinh h·ªçc sau Deep Generated Model ph·∫ßn 2, m√¥ h√¨nh Diffusion. C√°c m√¥ h√¨nh t·∫°o sinh h√¨nh ·∫£nh ƒë·ªÅu c√≥ g·ªëc g√°c s·ª≠ d·ª•ng m√¥ h√¨nh ph√°t t√°n, m√¥ h√¨nh Diffusion Model. ƒê√¢y c√≥ th·ªÉ n√≥i l√† m·ªôt trong nh·ªØng m√¥ h√¨nh c√≥ t√≠nh ·ª©ng d·ª•ng r·∫•t cao do t·∫°o ra nh·ªØng ·∫£nh c√≥ ƒë·ªô ph√¢n gi·∫£i cao, ƒë·ªìng th·ªùi c√≥ th·ªÉ cho ch√∫ng ta can thi·ªáp v√† ƒëi·ªÅu h∆∞·ªõng n·ªôi dung c·ªßa t·∫•m ·∫£nh. V·∫≠y th√¨ √Ω t∆∞·ªüng c·ªßa Diffusion l√† g√¨ v√† c√°ch th·ª©c hu·∫•n luy·ªán ra sao, ch√∫ng ta s·∫Ω c√πng t√¨m hi·ªÉu trong b√†i ng√†y h√¥m nay. C√°c v·∫•n ƒë·ªÅ ch√≠nh khi ch√∫ng ta t√¨m hi·ªÉu m·ªôt m√¥ h√¨nh Diffusion Model, m√¥ h√¨nh ph√°t t√°n, ƒë√≥ l√† ch√∫ng ta s·∫Ω t√¨m hi·ªÉu v·ªÅ m√¥ h√¨nh t·∫°o sinh t·ªïng qu√°t. M√¥ h√¨nh t·∫°o sinh t·ªïng qu√°t n√†y s·∫Ω d·ª±a tr√™n l√Ω thuy·∫øt v·ªÅ x√°c su·∫•t th·ªëng k√™.


In [16]:
from sklearn.manifold import TSNE
import plotly.graph_objects as go
import numpy as np

# üîπ L·∫•y to√†n b·ªô embedding v√† metadata t·ª´ Chroma
# (n·∫øu b·∫°n ƒë√£ load vector_db = Chroma.from_documents(...) nh∆∞ tr√™n)
data = vector_db.get()  

vectors = vector_db.get(include=["embeddings"])
vectors = np.array(vectors["embeddings"])
documents = data["documents"]
metadatas = data["metadatas"]

# üîπ N·∫øu b·∫°n c√≥ nhi·ªÅu lo·∫°i document, c√≥ th·ªÉ tr√≠ch ra t·ª´ metadata
doc_types = [m.get("title", "unknown") for m in metadatas]
colors = ["blue" if t == "unknown" else "red" for t in doc_types]

# üîπ Gi·∫£m s·ªë chi·ªÅu xu·ªëng 2D ƒë·ªÉ tr·ª±c quan h√≥a
tsne = TSNE(n_components=2, random_state=42, perplexity=1)
reduced_vectors = tsne.fit_transform(vectors)

# üîπ V·∫Ω bi·ªÉu ƒë·ªì scatter 2D
fig = go.Figure(data=[go.Scatter(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    mode='markers',
    marker=dict(size=6, color=colors, opacity=0.8),
    text=[
        f"<b>Lo·∫°i:</b> {t}<br><b>VƒÉn b·∫£n:</b> {d[:200]}..." 
        for t, d in zip(doc_types, documents)
    ],
    hoverinfo='text'
)])

fig.update_layout(
    title='üìä Ph√¢n b·ªë embedding trong Chroma Vector Store (2D)',
    xaxis_title='TSNE Dimension 1',
    yaxis_title='TSNE Dimension 2',
    width=900,
    height=700,
    margin=dict(r=20, b=10, l=10, t=40),
)

# üîπ Hi·ªÉn th·ªã tr·ª±c ti·∫øp tr√™n browser
fig.show(renderer="browser")


## Reranking by BM25 + create hybrid search with semantic search + bm25

## Load db + vector retriever


In [23]:
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings


model_name = "BAAI/bge-m3"            # ƒëa ng√¥n ng·ªØ, g·ªçn nh·∫π, khuy√™n d√πng

embedding = HuggingFaceEmbeddings(model_name=model_name,model_kwargs={"device": "cuda"})

## l∆∞u v√†o db
vector_db = Chroma( embedding_function= embedding, persist_directory="../database")
vector_retriever = vector_db.as_retriever( search_type="mmr", search_kwargs={"k": 40, "fetch_k": 80, "lambda_mult": 0.3})

Task exception was never retrieved
future: <Task finished name='Task-1' coro=<main_evaluation() done, defined at C:\Users\ADMIN\AppData\Local\Temp\ipykernel_6816\3633396511.py:93> exception=OpenAIError('The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable')>
Traceback (most recent call last):
  File "C:\Program Files\Python310\lib\asyncio\tasks.py", line 232, in __step
    result = coro.send(None)
  File "C:\Users\ADMIN\AppData\Local\Temp\ipykernel_6816\3633396511.py", line 243, in main_evaluation
    result = evaluate(
  File "d:\DAI_HOC\CS431\DoAn\Rag_QABot\venv310\lib\site-packages\ragas\_analytics.py", line 277, in wrapper
    result = func(*args, **kwargs)
  File "d:\DAI_HOC\CS431\DoAn\Rag_QABot\venv310\lib\site-packages\ragas\evaluation.py", line 461, in evaluate
    return run(_async_wrapper())
  File "d:\DAI_HOC\CS431\DoAn\Rag_QABot\venv310\lib\site-packages\ragas\async_utils.py", line 156, in run
  

## Reranker

In [24]:
import torch
from typing import List
from transformers import AutoTokenizer, AutoModelForSequenceClassification

from langchain_huggingface import HuggingFaceEmbeddings

rerank_model_name = "BAAI/bge-reranker-base"
tok = AutoTokenizer.from_pretrained(rerank_model_name)
reranker = AutoModelForSequenceClassification.from_pretrained(rerank_model_name)
reranker.to(device="cuda" if torch.cuda.is_available() else "cpu")
reranker.eval()


XLMRobertaForSequenceClassification(
  (roberta): XLMRobertaModel(
    (embeddings): XLMRobertaEmbeddings(
      (word_embeddings): Embedding(250002, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): XLMRobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x XLMRobertaLayer(
          (attention): XLMRobertaAttention(
            (self): XLMRobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): XLMRobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=

In [25]:
@torch.no_grad()
def batch_crossencoder_scores(q: str, texts: List[str], batch_size: int = 16, max_len: int = 512) -> List[float]:
    scores = []
    for i in range(0, len(texts), batch_size):
        batch = texts[i : i + batch_size]
        inputs = tok([q]*len(batch), batch, padding=True, truncation=True, max_length=max_len, return_tensors="pt")
        inputs = {k: v.to(device="cuda" if torch.cuda.is_available() else "cpu") for k, v in inputs.items()} # chuy·ªÉn l√™n gpu
        logits = reranker(**inputs).logits.squeeze(-1)
        scores.extend(logits.tolist())
    return scores


def crossencoder_rerank(docs, query: str, top_k: int = 10):
    texts = [d.page_content for d in docs]
    scores = batch_crossencoder_scores(query, texts, batch_size=16, max_len=512)
    ranked = sorted(zip(docs, scores), key=lambda x: x[1], reverse=True)

    BAD_HINTS = ("C·∫£m ∆°n c√°c b·∫°n ƒë√£ xem", "ƒëƒÉng k√Ω k√™nh", "subscribe", "like v√† share")
    final_docs = []
    for d, s in ranked:
        if all(h.lower() not in d.page_content.lower() for h in BAD_HINTS):
            final_docs.append(d)
        if len(final_docs) >= top_k:
            break
    return final_docs

In [26]:
## test reranker
query = "diffusion l√† g√¨"
docs = vector_retriever.get_relevant_documents(query)
reranked_docs = crossencoder_rerank(docs, query, top_k=10)

for i, d in enumerate(reranked_docs):
    m = d.metadata
    print(f"[{i}]  | {m.get('title', '')} | {m.get('video_url', '')}")
    print(f"    {m.get('start_timestamp', '?')} ‚Üí {m.get('end_timestamp', '?')}")
    text = d.page_content.replace("\n", " ").strip()
    print("   " + (text[:300] + ("..." if len(text) > 300 else "")))
    print("-" * 80)


[0]  | [CS315 - Ch∆∞∆°ng 3] Deep Generative Models (2) - Part 1 | https://youtube.com/watch?v=SCNZncN1Hvk
    0:00:14 ‚Üí 0:01:11
   Ch√∫ng ta s·∫Ω c√πng ƒë·∫øn v·ªõi c√°c m√¥ h√¨nh t·∫°o sinh h·ªçc sau Deep Generated Model ph·∫ßn 2, m√¥ h√¨nh Diffusion. C√°c m√¥ h√¨nh t·∫°o sinh h√¨nh ·∫£nh ƒë·ªÅu c√≥ g·ªëc g√°c s·ª≠ d·ª•ng m√¥ h√¨nh ph√°t t√°n, m√¥ h√¨nh Diffusion Model. ƒê√¢y c√≥ th·ªÉ n√≥i l√† m·ªôt trong nh·ªØng m√¥ h√¨nh c√≥ t√≠nh ·ª©ng d·ª•ng r·∫•t cao do t·∫°o ra nh·ªØng ·∫£nh c√≥ ƒë·ªô ph√¢n gi·∫£i...
--------------------------------------------------------------------------------
[1]  | [CS315 - Ch∆∞∆°ng 0] Gi·ªõi thi·ªáu m√¥n h·ªçc (Ph·∫ßn 1) | https://youtube.com/watch?v=RU8d6QAuX0k
    0:04:07 ‚Üí 0:05:48
   ƒê√≥ ch√≠nh l√† m√¥ h√¨nh d·ª±a tr√™n x√°c su·∫•t v√† c·ª• th·ªÉ c·ªßa m·ªôt m√¥ h√¨nh d·ª±a tr√™n x√°c su·∫•t ƒë√≥ ch√≠nh l√† m√¥ h√¨nh khu·∫øch t√°n l√† Diffusion Model. Sau ƒë√≥ sang tu·∫ßn th·ª© 11 th√¨ ch√∫ng ta s·∫Ω c√πng t√¨m hi·ªÉu v·ªÅ nh·ªØng m√¥ h√¨nh h·ªçc 

## bm25

In [27]:
from rank_bm25 import BM25Okapi
from langchain.schema import Document
from langchain_core.runnables import RunnableLambda
from langchain.retrievers import EnsembleRetriever

# ===== 1. L·∫•y doc t·ª´ Chroma =====
raw = vector_db.get(include=["documents", "metadatas"])
docs = []
for content, metadata in zip(raw["documents"], raw["metadatas"]):
    docs.append(Document(
        page_content=content,
        metadata={
            "filename": metadata.get("filename", ""),
            "video_url": metadata.get("video_url", ""),
            "start_timestamp": metadata.get("start_timestamp", "")
        }
    ))

# ===== 2. BM25 retriever an to√†n =====
if docs:
    corpus_tokens = [doc.page_content.split() for doc in docs]
    bm25_model = BM25Okapi(corpus_tokens)

    def bm25_retriever(query, top_k=5):
        tokenized_query = query.split()
        scores = bm25_model.get_scores(tokenized_query)
        top_k = min(top_k, len(docs))  # b·∫£o v·ªá out-of-range
        top_indices = scores.argsort()[-top_k:][::-1]
        return [docs[i] for i in top_indices]

    class BM25Retriever:
        def __init__(self, top_k=30):
            self.top_k = top_k

        def get_relevant_documents(self, query: str):
            return bm25_retriever(query, top_k=self.top_k)

    keyword_retriever = BM25Retriever(top_k=30)
    bm25_runnable = RunnableLambda(lambda x: keyword_retriever.get_relevant_documents(x))
else:
    # fallback n·∫øu kh√¥ng c√≥ docs
    bm25_runnable = RunnableLambda(lambda x: [])



# ===== 4. Hybrid retriever =====
hybrid_retriever = EnsembleRetriever(
    retrievers=[bm25_runnable, vector_retriever],
    weights=[0.5, 0.5]
)


## generate text

### Ch·ªçn m√¥ h√¨nh (qwen 0.6b ho·∫∑c api gemini)

In [28]:
nf4_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True
)

MODEL_NAME = "Qwen/Qwen3-0.6B"


model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    trust_remote_code=True,
    dtype=torch.float16,   # d√πng float16 cho GPU
    #device_map="auto"      # t·ª± ƒë·ªông ƒë·∫∑t model l√™n GPU
)
model.to("cuda")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True,trust_remote_code=True)



In [29]:
model_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    pad_token_id=tokenizer.eos_token_id,
    device=0  # ch·∫°y tr√™n CPU

)

llm = HuggingFacePipeline(pipeline=model_pipeline)


Device set to use cuda:0


## gemini 

In [30]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",   # ho·∫∑c gemini-1.5-pro, gemini-2.0-flash
    temperature=0.0,
    google_api_key=googleAPIKey  # üëà th√™m d√≤ng n√†y
)

In [31]:
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.output_parsers import JsonOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.runnables import RunnableMap,RunnableLambda


class VideoAnswer(BaseModel):
    text: str = Field(description="C√¢u tr·∫£ l·ªùi t√≥m t·∫Øt trong 3 c√¢u")
    filename: str = Field(description="T√™n file transcript g·ªëc")
    video_url: str = Field(description="URL c·ªßa video g·ªëc")
    start_timestamp: str = Field(description="Th·ªùi ƒëi·ªÉm b·∫Øt ƒë·∫ßu (format: HH:MM:SS)")
    end_timestamp: str = Field(description="Th·ªùi ƒëi·ªÉm k·∫øt th√∫c (format: HH:MM:SS)")
    confidence: str = Field(description="ƒê·ªô tin c·∫≠y: zero/low/medium/high")

parser = JsonOutputParser(pydantic_object=VideoAnswer)

# ===== Prompt =====
prompt = ChatPromptTemplate.from_template("""
D·ª±a v√†o transcript sau, tr·∫£ l·ªùi c√¢u h·ªèi c·ªßa ng∆∞·ªùi d√πng b·∫±ng ti·∫øng Vi·ªát.Ph·∫ßn t√≥m t·∫Øt n·ªôi dung th√¨ n√™n t√≥m t·∫Øt trong 3 c√¢u, 
d·ª±a v√†o c√°c ƒëo·∫°n transcript ƒë∆∞·ª£c cung c·∫•p v√† ch·ªâ ra ƒëo·∫°n video ch·ª©a th√¥ng tin ƒë√≥ (video url, th·ªùi ƒëi·ªÉm b·∫Øt ƒë·∫ßu v√† k·∫øt th√∫c).
ƒê·ªìng th·ªùi l√†m m∆∞·ª£t l·∫°i n·ªôi dung t√≥m t·∫Øt ƒë√≥
Khi tr√≠ch d·∫´n th√¥ng tin, **lu√¥n s·ª≠ d·ª•ng ƒë√∫ng [Video URL] v√† [Start] t·ª´ doc ch·ª©a n·ªôi dung ƒë√≥**.
N·∫øu kh√¥ng bi·∫øt c√¢u tr·∫£ l·ªùi th√¨ c·ª© tr·∫£ l·ªùi l√† t√¥i kh√¥ng bi·∫øt v√† ƒë·ªô tin c·∫≠y l√† zero
N·∫øu c√¢u h·ªèi kh√¥ng li√™n quan ƒë·∫øn n·ªôi dung video th√¨ tr·∫£ l·ªùi t√¥i ch·ªâ ƒë∆∞·ª£c hu·∫•n luy·ªán tr·∫£ l·ªùi c√°c c√¢u h·ªèi li√™n quan ƒë·∫øn n·ªôi dung video v√† ƒë·ªô tin c·∫≠y l√† zero
Kh√¥ng b·ªãa ra th√¥ng tin kh√¥ng c√≥ cƒÉn c·ª©, kh√¥ng tr·∫£ l·ªùi sai format
N·∫øu b·∫°n c·ª±c k·ª≥ ch·∫Øc ch·∫Øn v·ªÅ c√¢u tr·∫£ l·ªùi, h√£y ƒë·∫∑t ƒë·ªô tin c·∫≠y l√† high. N·∫øu b·∫°n kh√° ch·∫Øc ch·∫Øn, h√£y ƒë·∫∑t ƒë·ªô tin c·∫≠y l√† medium. N·∫øu b·∫°n kh√¥ng ch·∫Øc ch·∫Øn v·ªÅ c√¢u tr·∫£ l·ªùi, h√£y ƒë·∫∑t ƒë·ªô tin c·∫≠y l√† low.
ƒê·ªãnh d·∫°ng ƒë·∫ßu ra ph·∫£i tu√¢n theo JSON schema sau:
{format_instructions}
Transcript:
{context}

C√¢u h·ªèi: {question}
\nAnswer:                                          
""")

def format_doc(docs):
    formatted = []
    for doc in docs:
        url = doc.metadata.get("video_url", "")
        filename = doc.metadata.get("filename", "")
        start = doc.metadata.get("start_timestamp", "")
        end = doc.metadata.get("end_timestamp", "")
        content = doc.page_content
        formatted.append(f"""[Video URL]: {url}
[Filename]: {filename}
[Start]: {start}
[End]: {end}
[Content]: {content}""")
    return "\n\n".join(formatted)

def extract_json_from_output(output: str) -> str:
    return output.split('Answer')[1].strip()
    
    # H√†m rerank l·∫•y docs v√† query
def rerank_with_query(docs_and_query) -> List[Document]:
    docs, query = docs_and_query
    reranked = crossencoder_rerank(docs, query, top_k=10)
    return reranked

# ===== T·∫°o RAG chain =====
rag_chain = (
    {
        "question": RunnablePassthrough(),
        "context": RunnableLambda(lambda query: (
            vector_retriever.get_relevant_documents(query),
            query
        ))
        | RunnableLambda(rerank_with_query)
        | RunnableLambda(lambda docs: format_doc(docs))
    }
    | prompt.partial(format_instructions=parser.get_format_instructions())
    | llm
    #| #extract_json_from_output # l·∫•y ph·∫ßn ƒë·∫±ng sau answer ( l√† ƒë·ªãnh d·∫°ng json ƒë√£ chu·∫©n b·ªã)
    #RunnableLambda(lambda x: extract_json_from_output(x))
    #|parser
)

In [32]:
import time
start = time.time()
result = rag_chain.invoke("T·∫°i sao vi·ªác t·ª± hu·∫•n luy·ªán m√¥ h√¨nh CLIP t·ª´ ƒë·∫ßu l√† kh√≥ kh·∫£ thi?")
print(result.content)
end = time.time()
print("Th·ªùi gian:", end - start)


```json
{
  "text": "Vi·ªác t·ª± hu·∫•n luy·ªán m√¥ h√¨nh CLIP t·ª´ ƒë·∫ßu l√† kh√¥ng kh·∫£ thi v√¨ n√≥ ƒë√≤i h·ªèi m·ªôt l∆∞·ª£ng d·ªØ li·ªáu c·ª±c k·ª≥ l·ªõn. Ngo√†i ra, qu√° tr√¨nh n√†y c√≤n y√™u c·∫ßu t√†i nguy√™n t√≠nh to√°n kh·ªïng l·ªì, kh·∫£ nƒÉng x·ª≠ l√Ω song song v√† c√°c GPU r·∫•t ƒë·∫Øt ti·ªÅn. Do ƒë√≥, gi·∫£i ph√°p kh·∫£ thi nh·∫•t l√† s·ª≠ d·ª•ng c√°c m√¥ h√¨nh CLIP ƒë√£ ƒë∆∞·ª£c ti·ªÅn hu·∫•n luy·ªán ƒë·ªÉ gi·∫£i quy·∫øt c√°c t√°c v·ª• hi·ªán c√≥.",
  "filename": "yPzXzbEhUW0",
  "video_url": "https://youtube.com/watch?v=yPzXzbEhUW0",
  "start_timestamp": "00:00:14",
  "end_timestamp": "00:01:03",
  "confidence": "high"
}
```
Th·ªùi gian: 38.36908531188965


## Model Evaluation

In [35]:
import os
import json
import random
import asyncio
import nest_asyncio
from typing import List, Dict
from datasets import Dataset

# --- Ragas & Datasets ---
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall,
)
from datasets import Dataset
nest_asyncio.apply()

# --- H√ÄM M·ªöI: T·∫°o b·ªô test t·ªïng h·ª£p t·ª´ Vector DB ---
async def generate_synthetic_test_set_from_db(llm: ChatGoogleGenerativeAI, db: Chroma, num_samples: int = 5) -> List[Dict]:
    """
    L·∫•y c√°c chunk t·ª´ Chroma DB, ch·ªçn ng·∫´u nhi√™n, v√† d√πng LLM t·∫°o c√¢u h·ªèi + c√¢u tr·∫£ l·ªùi m·∫´u.
    """
    print(f"ƒêang l·∫•y chunk t·ª´ Vector DB ƒë·ªÉ t·∫°o b·ªô test...")
    try:
        # L·∫•y T·∫§T C·∫¢ c√°c chunk t·ª´ DB
        raw_data = db.get(include=["documents", "metadatas"])
        all_chunks_data = [
            {"page_content": doc, "metadata": meta}
            for doc, meta in zip(raw_data["documents"], raw_data["metadatas"])
        ]
    except Exception as e:
        print(f"L·ªñI: Kh√¥ng th·ªÉ l·∫•y d·ªØ li·ªáu t·ª´ Chroma DB: {e}")
        return []

    # L·ªçc c√°c chunk qu√° ng·∫Øn ho·∫∑c c√≥ n·ªôi dung kh√¥ng ph√π h·ª£p
    valid_chunks = [
        chunk for chunk in all_chunks_data
        if len(chunk["page_content"]) > 200 and "subscribe" not in chunk["page_content"].lower()
    ]

    if len(valid_chunks) < num_samples:
        print(f"C·∫£nh b√°o: Ch·ªâ t√¨m th·∫•y {len(valid_chunks)} chunk h·ª£p l·ªá. S·∫Ω d√πng t·∫•t c·∫£.")
        num_samples = len(valid_chunks)
        if num_samples == 0:
            print("L·ªñI: Kh√¥ng c√≥ chunk h·ª£p l·ªá ƒë·ªÉ t·∫°o b·ªô test.")
            return []

    sampled_chunks = random.sample(valid_chunks, num_samples)

    generation_prompt_template = ChatPromptTemplate.from_template(
        """
        B·∫°n l√† m·ªôt chuy√™n gia t·∫°o d·ªØ li·ªáu. D·ª±a v√†o NG·ªÆ C·∫¢NH (context) sau ƒë√¢y, h√£y t·∫°o ra 1 c·∫∑p (c√¢u h·ªèi, c√¢u tr·∫£ l·ªùi) m√† NG·ªÆ C·∫¢NH n√†y c√≥ th·ªÉ tr·∫£ l·ªùi tr·ª±c ti·∫øp.
        - C√¢u tr·∫£ l·ªùi (answer) ph·∫£i ƒë∆∞·ª£c r√∫t ra TR·ª∞C TI·∫æP t·ª´ NG·ªÆ C·∫¢NH.
        - C√¢u h·ªèi (question) ph·∫£i l√† c√¢u h·ªèi m√† m·ªôt ng∆∞·ªùi xem video s·∫Ω h·ªèi.
        - Tr·∫£ l·ªùi b·∫±ng ti·∫øng Vi·ªát.
        - Ch·ªâ tr·∫£ v·ªÅ 1 JSON object v·ªõi 2 key: "question" v√† "ground_truth".

        NG·ªÆ C·∫¢NH:
        {context}

        JSON:
        """
    )

    generation_chain = generation_prompt_template | llm

    test_set = []
    print(f"ƒêang d√πng LLM ƒë·ªÉ t·∫°o {num_samples} m·∫´u test...")
    for chunk_data in sampled_chunks:
        context = chunk_data["page_content"]
        try:
            response = await generation_chain.ainvoke({"context": context})
            # LLM c√≥ th·ªÉ tr·∫£ v·ªÅ ```json ... ```, c·∫ßn l√†m s·∫°ch
            json_str = response.content.strip().replace("```json", "").replace("```", "")
            data = json.loads(json_str)

            test_set.append({
                "question": data["question"],
                "ground_truth": data["ground_truth"], # ƒê√¢y l√† c√¢u tr·∫£ l·ªùi m·∫´u
                # Ragas c·∫ßn 'ground_truth_context' l√† m·ªôt list
                "ground_truth_context": [context]
            })
        except Exception as e:
            print(f"L·ªói khi t·∫°o c√¢u h·ªèi cho chunk: {e}")
            print(f"Chunk l·ªói: {context[:100]}...")

    print(f"ƒê√£ t·∫°o xong {len(test_set)} m·∫´u test.")
    return test_set

# --- H√ÄM M MAIN ƒê·ªÇ CH·∫†Y ƒê√ÅNH GI√Å ---
async def main_evaluation():
    print("B·∫Øt ƒë·∫ßu qu√° tr√¨nh ƒë√°nh gi√° RAG...")

    # --- 1. L·∫•y c√°c bi·∫øn ƒë√£ kh·ªüi t·∫°o t·ª´ cell tr∆∞·ªõc ---
    try:
        # S·ª¨A ƒê·ªîI: Th√™m 'embedding' v√†o danh s√°ch ki·ªÉm tra
        if 'llm' not in globals() or 'vector_retriever' not in globals() or 'bm25_runnable' not in globals() or 'embedding' not in globals():
            print("L·ªñI: 'llm', 'vector_retriever', 'bm25_runnable', ho·∫∑c 'embedding' ch∆∞a ƒë∆∞·ª£c ƒë·ªãnh nghƒ©a.")
            print("H√£y ch·∫°y c√°c cell ·ªü tr√™n tr∆∞·ªõc khi ch·∫°y cell n√†y.")
            return
        
        # T·∫°o b·∫£n sao (ho·∫∑c tham chi·∫øu) ƒë·ªÉ code d·ªÖ ƒë·ªçc h∆°n
        current_llm = llm
        current_vector_retriever = vector_retriever
        current_bm25_runnable = bm25_runnable
        current_vector_db = vector_db
        current_embedding = embedding # S·ª¨A ƒê·ªîI: L·∫•y embedding ƒë√£ ƒë∆∞·ª£c ƒë·ªãnh nghƒ©a

    except NameError as e:
        print(f"L·ªñI: Thi·∫øu bi·∫øn. H√£y ch·∫°y c√°c cell ·ªü tr√™n tr∆∞·ªõc. L·ªói: {e}")
        return

    # --- 2. ƒê·ªãnh nghƒ©a 2 chain R·ªúI R·∫†C ƒë·ªÉ ƒë√°nh gi√° ---

    # H√†m rerank (ƒë√£ c√≥ ·ªü cell tr√™n)
    def rerank_with_query(docs_and_query) -> List[Document]:
        docs, query = docs_and_query
        reranked = crossencoder_rerank(docs, query, top_k=10) # D√πng h√†m 'crossencoder_rerank' ƒë√£ ƒë·ªãnh nghƒ©a
        return reranked

    # Chain 2a: RETRIEVER CHAIN (Hybrid + Rerank)
    hybrid_retriever = EnsembleRetriever(
        retrievers=[current_bm25_runnable, current_vector_retriever],
        weights=[0.5, 0.5]
    )

    retriever_chain = (
        RunnableLambda(lambda query: (hybrid_retriever.get_relevant_documents(query), query))
        | RunnableLambda(rerank_with_query)
    )

    # Chain 2b: GENERATION CHAIN (Prompt + LLM + Parser)
    # D√πng 'prompt' v√† 'parser' ƒë√£ ƒë·ªãnh nghƒ©a ·ªü cell tr√™n
    parser = JsonOutputParser(pydantic_object=VideoAnswer) # ƒê·∫£m b·∫£o VideoAnswer ƒë√£ ƒë∆∞·ª£c ƒë·ªãnh nghƒ©a
    
    generation_prompt = ChatPromptTemplate.from_template("""
    D·ª±a v√†o transcript sau, tr·∫£ l·ªùi c√¢u h·ªèi c·ªßa ng∆∞·ªùi d√πng b·∫±ng ti·∫øng Vi·ªát.Ph·∫ßn t√≥m t·∫Øt n·ªôi dung th√¨ n√™n t√≥m t·∫Øt trong 3 c√¢u, 
    d·ª±a v√†o c√°c ƒëo·∫°n transcript ƒë∆∞·ª£c cung c·∫•p v√† ch·ªâ ra ƒëo·∫°n video ch·ª©a th√¥ng tin ƒë√≥ (video url, th·ªùi ƒëi·ªÉm b·∫Øt ƒë·∫ßu v√† k·∫øt th√∫c).
    ƒê·ªìng th·ªùi l√†m m∆∞·ª£t l·∫°i n·ªôi dung t√≥m t·∫Øt ƒë√≥
    Khi tr√≠ch d·∫´n th√¥ng tin, **lu√¥n s·ª≠ d·ª•ng ƒë√∫ng [Video URL] v√† [Start] t·ª´ doc ch·ª©a n·ªôi dung ƒë√≥**.
    N·∫øu kh√¥ng bi·∫øt c√¢u tr·∫£ l·ªùi th√¨ c·ª© tr·∫£ l·ªùi l√† t√¥i kh√¥ng bi·∫øt v√† ƒë·ªô tin c·∫≠y l√† zero
    N·∫øu c√¢u h·ªèi kh√¥ng li√™n quan ƒë·∫øn n·ªôi dung video th√¨ tr·∫£ l·ªùi t√¥i ch·ªâ ƒë∆∞·ª£c hu·∫•n luy·ªán tr·∫£ l·ªùi c√°c c√¢u h·ªèi li√™n quan ƒë·∫øn n·ªôi dung video v√† ƒë·ªô tin c·∫≠y l√† zero
    Kh√¥ng b·ªãa ra th√¥ng tin kh√¥ng c√≥ cƒÉn c·ª©, kh√¥ng tr·∫£ l·ªùi sai format
    N·∫øu b·∫°n c·ª±c k·ª≥ ch·∫Øc ch·∫Øn v·ªÅ c√¢u tr·∫£ l·ªùi, h√£y ƒë·∫∑t ƒë·ªô tin c·∫≠y l√† high. N·∫øu b·∫°n kh√° ch·∫Øc ch·∫Øn, h√£y ƒë·∫∑t ƒë·ªô tin c·∫≠y l√† medium. N·∫øu b·∫°n kh√¥ng ch·∫Øc ch·∫Øn v·ªÅ c√¢u tr·∫£ l·ªùi, h√£y ƒë·∫∑t ƒë·ªô tin c·∫≠y l√† low.
    ƒê·ªãnh d·∫°ng ƒë·∫ßu ra ph·∫£i tu√¢n theo JSON schema sau:
    {format_instructions}
    Transcript:
    {context}

    C√¢u h·ªèi: {question}
    \nAnswer:
    """) # D√πng prompt ƒë√£ ƒë·ªãnh nghƒ©a ·ªü cell tr√™n

    generation_chain = (
        generation_prompt.partial(format_instructions=parser.get_format_instructions())
        | current_llm
    )

    # --- 3. T·∫°o b·ªô Test Set ---
    # D√πng LLM c·ªßa RAG (Gemini) ƒë·ªÉ t·∫°o test set t·ª´ Chroma
    test_set = await generate_synthetic_test_set_from_db(current_llm, current_vector_db, num_samples=5) # TƒÉng num_samples (vd: 20) ƒë·ªÉ k·∫øt qu·∫£ tin c·∫≠y h∆°n

    if not test_set:
        print("Kh√¥ng th·ªÉ t·∫°o test set. D·ª´ng ƒë√°nh gi√°.")
        return

    # --- 4. Ch·∫°y pipeline v√† thu th·∫≠p d·ªØ li·ªáu ---
    print("ƒêang ch·∫°y pipeline tr√™n b·ªô test set...")
    evaluation_data = []

    for i, item in enumerate(test_set):
        print(f"ƒêang x·ª≠ l√Ω m·∫´u {i+1}/{len(test_set)}: {item['question'][:50]}...")
        question = item['question']

        # 4a. L·∫•y contexts
        retrieved_docs = await retriever_chain.ainvoke(question)
        contexts_list = [d.page_content for d in retrieved_docs]

        # 4b. L·∫•y answer
        formatted_context_str = format_doc(retrieved_docs) # D√πng h√†m format_doc ƒë√£ ƒë·ªãnh nghƒ©a
        response_msg = await generation_chain.ainvoke({
            "question": question,
            "context": formatted_context_str
        })

        try:
            # Tr√≠ch xu·∫•t c√¢u tr·∫£ l·ªùi text t·ª´ JSON
            # LLM c·ªßa b·∫°n tr·∫£ v·ªÅ AIMessage(content="```json\n{...}\n```")
            json_str = response_msg.content.strip().replace("```json", "").replace("```", "")
            answer_json = json.loads(json_str)
            answer_text = answer_json.get("text", "")
        except Exception:
            answer_text = response_msg.content # Fallback n·∫øu JSON l·ªói

        evaluation_data.append({
            "question": question,
            "answer": answer_text,
            "contexts": contexts_list,
            "ground_truth": item["ground_truth"] # C√¢u tr·∫£ l·ªùi m·∫´u
        })

    print("ƒê√£ thu th·∫≠p xong d·ªØ li·ªáu. Chu·∫©n b·ªã cho Ragas...")

    # --- 5. Ch·∫°y Ragas Evaluate ---
    if not evaluation_data:
        print("L·ªñI: Kh√¥ng c√≥ d·ªØ li·ªáu ƒë·ªÉ ƒë√°nh gi√°.")
        return

    # Chuy·ªÉn ƒë·ªïi list dictionary th√†nh Hugging Face Dataset
    dataset = Dataset.from_list(evaluation_data)


    
    metrics = [
        faithfulness,     # C√¢u tr·∫£ l·ªùi c√≥ b√°m s√°t context kh√¥ng? (Kh√¥ng b·ªãa)
        answer_relevancy, # C√¢u tr·∫£ l·ªùi c√≥ li√™n quan ƒë·∫øn c√¢u h·ªèi kh√¥ng?
        context_precision,# Context truy xu·∫•t c√≥ li√™n quan kh√¥ng?
    ]
    
    # Th√™m context_recall n·∫øu b·ªô test c√≥ ground_truth_context
    if "ground_truth_context" in test_set[0]:
        print("ƒê√£ ph√°t hi·ªán ground_truth_context, s·∫Ω ƒëo context_recall.")
        # Th√™m ground_truth_context v√†o evaluation_data cho Ragas
        for i in range(len(evaluation_data)):
            evaluation_data[i]["ground_truth_context"] = test_set[i]["ground_truth_context"]
        
        dataset = Dataset.from_list(evaluation_data) # T·∫°o l·∫°i dataset v·ªõi key m·ªõi
        metrics.append(context_recall)


    print("ƒêang ch·∫°y Ragas evaluate... (S·ª≠ d·ª•ng Gemini l√†m Judge, vi·ªác n√†y c√≥ th·ªÉ m·∫•t v√†i ph√∫t)")
    
    # S·ª¨A ƒê·ªîI: Cung c·∫•p 'llm' v√† 'embeddings' c·ªßa b·∫°n cho Ragas
    result = evaluate(
        dataset=dataset,
        metrics=metrics,
        llm=current_llm,        # Y√™u c·∫ßu Ragas d√πng Gemini (ƒë√£ c√≥ API key)
        embeddings=current_embedding # Y√™u c·∫ßu Ragas d√πng BAAI/bge-m3
    )

    print("--- K·∫æT QU·∫¢ ƒê√ÅNH GI√Å RAGAS ---")
    print(result)

    # Chuy·ªÉn sang dataframe ƒë·ªÉ xem cho ƒë·∫πp
    try:
        import pandas as pd
        df = result.to_pandas()
        print("\n--- B·∫£ng k·∫øt qu·∫£ chi ti·∫øt ---")
        print(df.to_markdown(index=False))
    except ImportError:
        print("\nC√†i 'pandas' v√† 'tabulate' ƒë·ªÉ xem b·∫£ng k·∫øt qu·∫£ ƒë·∫πp h∆°n.")

# --- B·∫Øt ƒë·∫ßu ch·∫°y ƒë√°nh gi√° ---
print("Chu·∫©n b·ªã ch·∫°y ƒë√°nh gi√°...")
# B·ªçc main() trong m·ªôt h√†m run ƒë·ªÉ b·∫Øt l·ªói
def run_evaluation_notebook():
    try:
        loop = asyncio.get_event_loop()
        loop.run_until_complete(main_evaluation())
    except RuntimeError as e:
        if "cannot be called from a running event loop" in str(e):
            print("\nL·ªñI: V·∫´n g·∫∑p l·ªói asyncio.")
            print("H√£y th·ª≠ kh·ªüi ƒë·ªông l·∫°i kernel notebook v√† ch·∫°y l·∫°i cell n√†y.")
        else:
            raise e

run_evaluation_notebook()
print("Ho√†n t·∫•t ƒë√°nh gi√°.")

Chu·∫©n b·ªã ch·∫°y ƒë√°nh gi√°...
B·∫Øt ƒë·∫ßu qu√° tr√¨nh ƒë√°nh gi√° RAG...
ƒêang l·∫•y chunk t·ª´ Vector DB ƒë·ªÉ t·∫°o b·ªô test...
ƒêang d√πng LLM ƒë·ªÉ t·∫°o 5 m·∫´u test...
ƒê√£ t·∫°o xong 5 m·∫´u test.
ƒêang ch·∫°y pipeline tr√™n b·ªô test set...
ƒêang x·ª≠ l√Ω m·∫´u 1/5: ƒê·ªãnh d·∫°ng c·∫ßn chu·∫©n b·ªã tr∆∞·ªõc cho ph·∫ßn vƒÉn b·∫£n l√† g...
ƒêang x·ª≠ l√Ω m·∫´u 2/5: M·ªôt v√≠ d·ª• v·ªÅ b√†i to√°n ph·ª©c t·∫°p m√† m·∫°ng Perceptron ...
ƒêang x·ª≠ l√Ω m·∫´u 3/5: M·ª•c ti√™u c·ªßa vi·ªác ph√°t tri·ªÉn m√¥ h√¨nh l√† g√¨?...
ƒêang x·ª≠ l√Ω m·∫´u 4/5: T·∫°i sao vi·ªác t·ª± hu·∫•n luy·ªán m√¥ h√¨nh CLIP l·∫°i kh√¥ng ...
ƒêang x·ª≠ l√Ω m·∫´u 5/5: M√¥ h√¨nh t·∫°o sinh gi√∫p √≠ch g√¨ trong vi·ªác hu·∫•n luy·ªán...
ƒê√£ thu th·∫≠p xong d·ªØ li·ªáu. Chu·∫©n b·ªã cho Ragas...
ƒê√£ ph√°t hi·ªán ground_truth_context, s·∫Ω ƒëo context_recall.
ƒêang ch·∫°y Ragas evaluate... (S·ª≠ d·ª•ng Gemini l√†m Judge, vi·ªác n√†y c√≥ th·ªÉ m·∫•t v√†i ph√∫t)


Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Retrying langchain_google_genai.chat_models._achat_with_retry.<locals>._achat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. 
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 10
Please retry in 51.496811783s. [links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "g

--- K·∫æT QU·∫¢ ƒê√ÅNH GI√Å RAGAS ---
{'faithfulness': 0.8333, 'answer_relevancy': 0.9508, 'context_precision': nan, 'context_recall': 1.0000}

--- B·∫£ng k·∫øt qu·∫£ chi ti·∫øt ---
| user_input                                                                    | retrieved_contexts                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              