In [None]:
!pip install transformers tiktoken tqdm pandas numpy scikit-learn chardet



In [None]:
import os
import re
import json
import uuid
import pandas as pd
import numpy as np
from tqdm import tqdm
import tiktoken
import time
import torch
from sklearn.metrics.pairwise import cosine_similarity
from transformers import AutoTokenizer, AutoModel
from google.colab import drive
import chardet

In [None]:
class SimpleManinRAG:
    """
    Simplified RAG system for Manim documentation that creates document embeddings
    and saves them to a CSV file.
    """

    def __init__(self,
                 docs_folder="manim_docs",
                 output_folder="manim_vectors",
                 embedding_model="sentence-transformers/all-MiniLM-L6-v2",
                 chunk_size=500,
                 chunk_overlap=100,
                 batch_size=16):
        """
        Initialize the RAG system.

        Args:
            docs_folder (str): Path to the folder containing Manim documentation
            output_folder (str): Folder to save the CSV vector file
            embedding_model (str): HuggingFace model to use for embeddings
            chunk_size (int): Size of text chunks in tokens for splitting documents
            chunk_overlap (int): Overlap between chunks in tokens
            batch_size (int): Batch size for embedding generation
        """
        self.docs_folder = docs_folder
        self.output_folder = output_folder
        self.embedding_model = embedding_model
        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap
        self.batch_size = batch_size

        # Ensure output folder exists
        os.makedirs(output_folder, exist_ok=True)

        # Initialize tokenizer for counting tokens
        self.enc = tiktoken.encoding_for_model("gpt-4")

        # Load embedding model
        print(f"Loading embedding model: {embedding_model}")
        self.tokenizer = AutoTokenizer.from_pretrained(embedding_model)
        self.model = AutoModel.from_pretrained(embedding_model)

        # Move model to GPU if available
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)

        print(f"Model loaded on {self.device}")

    def _detect_encoding(self, file_path):
        """
        Detect the encoding of a file.

        Args:
            file_path (str): Path to the file

        Returns:
            str: Detected encoding
        """
        with open(file_path, 'rb') as f:
            result = chardet.detect(f.read())
        return result['encoding']

    def _load_file(self, file_path):
        """
        Load a file with proper encoding detection.

        Args:
            file_path (str): Path to the file

        Returns:
            str: File content
        """
        try:
            encoding = self._detect_encoding(file_path)
            with open(file_path, 'r', encoding=encoding) as f:
                return f.read()
        except Exception as e:
            print(f"Error loading file {file_path}: {e}")
            return ""

    def _split_text(self, text, file_path):
        """
        Split text into chunks based on token count with overlap.

        Args:
            text (str): Text to split
            file_path (str): Path to the source file for metadata

        Returns:
            list: List of document chunks with metadata
        """
        # Add file source to text
        text = f"Source: {file_path}\n\n{text}"

        # Tokenize the text
        tokens = self.enc.encode(text)
        chunks = []

        # Split the text into chunks based on token count
        for i in range(0, len(tokens), self.chunk_size - self.chunk_overlap):
            # Get token span for this chunk
            chunk_tokens = tokens[i:i + self.chunk_size]

            # Don't add tiny chunks at the end
            if len(chunk_tokens) < 50:  # Minimum 50 tokens per chunk
                continue

            # Decode tokens back to text
            chunk_text = self.enc.decode(chunk_tokens)

            chunks.append({
                "content": chunk_text,
                "source": file_path,
                "id": str(uuid.uuid4()),
                "token_count": len(chunk_tokens)
            })

        return chunks

    def _process_docs_folder(self):
        """
        Process all documents in the docs folder.

        Returns:
            list: List of all document chunks
        """
        all_chunks = []
        print(f"Processing documents in {self.docs_folder}")

        # Process both core and plugin docs
        for root, _, files in os.walk(self.docs_folder):
            for file in tqdm(files, desc="Processing files"):
                if file.endswith(('.md', '.py')):
                    file_path = os.path.join(root, file)

                    # Determine if this is core or plugin
                    if "plugin_docs" in root:
                        doc_type = os.path.basename(os.path.dirname(file_path))
                    else:
                        doc_type = "manim-core"

                    # Load and process the file
                    content = self._load_file(file_path)
                    if content:
                        chunks = self._split_text(content, file_path)
                        for chunk in chunks:
                            chunk["type"] = doc_type
                        all_chunks.extend(chunks)

        # Print stats
        token_lengths = [chunk["token_count"] for chunk in all_chunks]
        print(f"Total chunks: {len(all_chunks)}")
        print(f"Token length statistics: "
              f"Min: {min(token_lengths)}, Max: {max(token_lengths)}, "
              f"Mean: {sum(token_lengths) / len(token_lengths):.1f}")

        return all_chunks

    def _create_embeddings(self, chunks):
        """
        Create embeddings for all chunks.

        Args:
            chunks (list): List of document chunks

        Returns:
            list: List of chunks with embeddings
        """
        print("Generating embeddings")

        # Process in batches
        for i in tqdm(range(0, len(chunks), self.batch_size), desc="Embedding batches"):
            batch = chunks[i:i + self.batch_size]

            # Extract content for this batch
            texts = [chunk["content"] for chunk in batch]

            # Tokenize the texts
            encoded_input = self.tokenizer(
                texts,
                padding=True,
                truncation=True,
                max_length=512,
                return_tensors='pt'
            ).to(self.device)

            # Generate embeddings
            with torch.no_grad():
                model_output = self.model(**encoded_input)

            # Mean pooling
            attention_mask = encoded_input['attention_mask']
            token_embeddings = model_output.last_hidden_state

            # Calculate mean pooling
            input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
            sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
            sum_mask = input_mask_expanded.sum(1)
            sum_mask = torch.clamp(sum_mask, min=1e-9)

            # Calculate embeddings and normalize
            embeddings = (sum_embeddings / sum_mask).cpu().numpy()
            embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)

            # Add embeddings to chunks
            for j, chunk in enumerate(batch):
                chunk["embedding"] = embeddings[j].tolist()

            # Sleep to avoid potential OOM issues
            if i % 50 == 0 and i > 0:
                time.sleep(1)

        return chunks

    def _chunks_to_dataframe(self, chunks):
        """
        Convert chunks to a DataFrame.

        Args:
            chunks (list): List of document chunks with embeddings

        Returns:
            pd.DataFrame: DataFrame with document chunks and embeddings
        """
        # Convert to DataFrame
        df = pd.DataFrame([{
            'id': chunk['id'],
            'content': chunk['content'],
            'source': chunk['source'],
            'type': chunk['type'],
            'token_count': chunk['token_count'],
            'embedding': json.dumps(chunk['embedding'])  # Store as JSON string
        } for chunk in chunks])

        return df

    def process_and_save(self):
        """
        Process documents and save embeddings to CSV.

        Returns:
            str: Path to the saved CSV file
        """
        # Process documents
        chunks = self._process_docs_folder()

        # Create embeddings
        chunks_with_embeddings = self._create_embeddings(chunks)

        # Convert to DataFrame
        df = self._chunks_to_dataframe(chunks_with_embeddings)

        # Save as CSV
        csv_path = os.path.join(self.output_folder, "manim_vectors_v2.csv")
        df.to_csv(csv_path, index=False)
        print(f"Saved vector store to {csv_path}")

        # Also save a JSON version
        json_path = os.path.join(self.output_folder, "manim_vectors_v2.json")
        df.to_json(json_path, orient="records")
        print(f"Saved JSON vector store to {json_path}")

        return csv_path

    def query_vectors(self, query, k=5):
        """
        Query the vector store.

        Args:
            query (str): Query string
            k (int): Number of results to return

        Returns:
            list: List of similar documents
        """
        # Load vectors
        csv_path = os.path.join(self.output_folder, "manim_vectors_v2.csv")
        df = pd.read_csv(csv_path)

        # Convert embedding strings back to arrays
        df['embedding_array'] = df['embedding'].apply(lambda x: np.array(json.loads(x)))

        # Generate query embedding
        encoded_input = self.tokenizer(
            query,
            padding=True,
            truncation=True,
            max_length=512,
            return_tensors='pt'
        ).to(self.device)

        with torch.no_grad():
            model_output = self.model(**encoded_input)

        # Mean pooling
        attention_mask = encoded_input['attention_mask']
        token_embeddings = model_output.last_hidden_state

        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)

        query_embedding = (sum_embeddings / sum_mask).cpu().numpy()
        query_embedding = query_embedding / np.linalg.norm(query_embedding, axis=1, keepdims=True)

        # Calculate similarity scores
        similarities = []
        for idx, row in df.iterrows():
            similarity = cosine_similarity(
                query_embedding,
                row['embedding_array'].reshape(1, -1)
            )[0][0]
            similarities.append((idx, similarity))

        # Sort by similarity
        similarities.sort(key=lambda x: x[1], reverse=True)

        # Get top k results
        results = []
        for idx, score in similarities[:k]:
            row = df.iloc[idx]
            results.append({
                'content': row['content'],
                'source': row['source'],
                'type': row['type'],
                'score': float(score)
            })

        return results

In [None]:

drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
rag = SimpleManinRAG(
    docs_folder="/content/drive/MyDrive/final-project/manim_docs",
    output_folder="/content/drive/MyDrive/manim_vectors",
    embedding_model="sentence-transformers/all-MiniLM-L6-v2",
    chunk_size=500,
    chunk_overlap=100
)

Loading embedding model: sentence-transformers/all-MiniLM-L6-v2


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Model loaded on cuda


In [None]:
# Process and save
vectors_path = rag.process_and_save()
print(f"Vector store saved to: {vectors_path}")

Processing documents in /content/drive/MyDrive/final-project/manim_docs


Processing files: 0it [00:00, ?it/s]
Processing files: 100%|██████████| 1/1 [00:00<00:00, 10407.70it/s]
Processing files: 100%|██████████| 1/1 [00:00<00:00, 11184.81it/s]
Processing files: 100%|██████████| 2/2 [00:01<00:00,  1.78it/s]
Processing files: 100%|██████████| 4/4 [00:02<00:00,  1.65it/s]
Processing files: 100%|██████████| 381/381 [00:12<00:00, 31.17it/s]
Processing files: 100%|██████████| 4/4 [00:02<00:00,  1.87it/s]
Processing files: 100%|██████████| 2/2 [00:00<00:00, 13729.31it/s]
Processing files: 100%|██████████| 5/5 [00:03<00:00,  1.40it/s]
Processing files: 100%|██████████| 15/15 [00:13<00:00,  1.09it/s]
Processing files: 100%|██████████| 3/3 [00:02<00:00,  1.21it/s]
Processing files: 100%|██████████| 3/3 [00:02<00:00,  1.32it/s]
Processing files: 100%|██████████| 5/5 [00:03<00:00,  1.32it/s]
Processing files: 100%|██████████| 26/26 [00:18<00:00,  1.37it/s]
Processing files: 100%|██████████| 3/3 [00:01<00:00,  1.94it/s]
Processing files: 100%|██████████| 7/7 [00:05<00:0

Total chunks: 3763
Token length statistics: Min: 50, Max: 500, Mean: 449.0
Generating embeddings


Embedding batches: 100%|██████████| 236/236 [00:31<00:00,  7.39it/s]


Saved vector store to /content/drive/MyDrive/manim_vectors/manim_vectors_v2.csv
Saved JSON vector store to /content/drive/MyDrive/manim_vectors/manim_vectors_v2.json
Vector store saved to: /content/drive/MyDrive/manim_vectors/manim_vectors_v2.csv


In [None]:
# Test querying
query = "how can I create and position text elements in a scene?"
results = rag.query_vectors(query, k=5)
print("\nTest query results:")
for i, result in enumerate(results):
    print(f"query: {query}")
    print(f"\nResult {i+1} (Score: {result['score']:.4f}):")
    print(f"Source: {result['source']}")
    print(f"Type: {result['type']}")
    print(f"Content snippet: {result['content']}...")


Test query results:
query: how can I create and position text elements in a scene?

Result 1 (Score: 0.5058):
Source: /content/drive/MyDrive/final-project/manim_docs/manim_docs/manim_core/source/mobject/text/text_mobject.py
Type: manim-core
Content snippet: Source: /content/drive/MyDrive/final-project/manim_docs/manim_docs/manim_core/source/mobject/text/text_mobject.py

"""Mobjects used for displaying (non-LaTeX) text.

.. note::
   Just as you can use :class:`~.Tex` and :class:`~.MathTex` (from the module :mod:`~.tex_mobject`)
   to insert LaTeX to your videos, you can use :class:`~.Text` to to add normal text.

.. important::

   See the corresponding tutorial :ref:`using-text-objects`, especially for information about fonts.


The simplest way to add text to your animations is to use the :class:`~.Text` class. It uses the Pango library to render text.
With Pango, you are also able to render non-English alphabets like `你好` or  `こんにちは` or `안녕하세요` or `مرحبا بالعالم`.

Examples
-------

In [None]:
test_cases = [
            {
                "query": "How to create a circle in Manim?",
                "expected_sources": ["circle", "shapes", "geometry"],
                "expected_content_keywords": ["Circle", "add", "play", "radius"],
                "category": "basic_shapes"
            },
            {
                "query": "What is Scene class in Manim?",
                "expected_sources": ["scene", "basics", "core"],
                "expected_content_keywords": ["Scene", "construct", "self", "class"],
                "category": "core_concepts"
            },
            {
                "query": "How to animate objects moving?",
                "expected_sources": ["animation", "transform", "movement"],
                "expected_content_keywords": ["animate", "move", "shift", "Transform"],
                "category": "animation"
            },
            {
                "query": "Create text and mathematical expressions",
                "expected_sources": ["text", "tex", "math", "latex"],
                "expected_content_keywords": ["Text", "Tex", "MathTex", "formula"],
                "category": "text_math"
            },
            {
                "query": "How to change colors of objects?",
                "expected_sources": ["color", "style", "appearance"],
                "expected_content_keywords": ["color", "set_color", "fill", "stroke"],
                "category": "styling"
            },
            {
                "query": "Camera controls and positioning",
                "expected_sources": ["camera", "frame", "view"],
                "expected_content_keywords": ["camera", "frame", "zoom", "position"],
                "category": "camera"
            },
            {
                "query": "Group objects together",
                "expected_sources": ["group", "vgroup", "collection"],
                "expected_content_keywords": ["Group", "VGroup", "add", "arrange"],
                "category": "grouping"
            },
            {
                "query": "3D objects and scenes",
                "expected_sources": ["3d", "three", "dimensional"],
                "expected_content_keywords": ["ThreeDScene", "3D", "axes", "surface"],
                "category": "3d"
            },
            {
                "query": "Plot graphs and functions",
                "expected_sources": ["plot", "graph", "axes", "function"],
                "expected_content_keywords": ["plot", "Axes", "graph", "function"],
                "category": "plotting"
            },
            {
                "query": "Configuration and settings",
                "expected_sources": ["config", "settings", "quality"],
                "expected_content_keywords": ["config", "quality", "resolution", "fps"],
                "category": "configuration"
            }
        ]


In [None]:
"""
Comprehensive Retrieval Evaluation với 100 test cases và visualization
"""

from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def evaluate_retrieval(rag_instance, test_cases, k=5):
    """
    Evaluates the retrieval performance based on a list of test cases.

    Args:
        rag_instance (SimpleManinRAG): An instance of the SimpleManinRAG class.
        test_cases (list): A list of dictionaries, where each dictionary
                          contains a "query" and potentially "expected_sources"
                          or "expected_content_keywords".
        k (int): The number of top results to consider for evaluation.

    Returns:
        dict: A dictionary containing evaluation metrics.
    """
    total_test_cases = len(test_cases)
    correct_retrievals = 0
    precision_scores = []
    recall_scores = []
    mrr_scores = []

    print(f"\n--- Evaluating Retrieval (considering top {k} results) ---")

    for i, test_case in enumerate(tqdm(test_cases, desc="Evaluating test cases")):
        query = test_case["query"]
        expected_sources = [src.lower() for src in test_case.get("expected_sources", [])]
        expected_keywords = [kw.lower() for kw in test_case.get("expected_content_keywords", [])]

        # Perform the retrieval
        retrieved_results = rag_instance.query_vectors(query, k=k)

        if not retrieved_results:
            precision_scores.append(0)
            recall_scores.append(0)
            mrr_scores.append(0)
            continue

        # Count relevant documents
        relevant_docs = 0
        first_relevant_rank = None

        for idx, result in enumerate(retrieved_results):
            source_match = any(es in result['source'].lower() for es in expected_sources)
            content_match = any(ek in result['content'].lower() for ek in expected_keywords)

            if source_match or content_match:
                relevant_docs += 1
                if first_relevant_rank is None:
                    first_relevant_rank = idx + 1

        precision = relevant_docs / k if k > 0 else 0
        total_expected = len(expected_sources) + len(expected_keywords)
        recall = relevant_docs / len(expected_sources) if expected_sources else (1 if relevant_docs > 0 else 0)
        mrr = 1.0 / first_relevant_rank if first_relevant_rank else 0

        precision_scores.append(precision)
        recall_scores.append(recall)
        mrr_scores.append(mrr)

        if relevant_docs > 0:
            correct_retrievals += 1

    accuracy = (correct_retrievals / total_test_cases) if total_test_cases > 0 else 0
    avg_precision = np.mean(precision_scores) if precision_scores else 0
    avg_recall = np.mean(recall_scores) if recall_scores else 0
    avg_mrr = np.mean(mrr_scores) if mrr_scores else 0
    f1_score = 2 * (avg_precision * avg_recall) / (avg_precision + avg_recall) if (avg_precision + avg_recall) > 0 else 0

    return {
        "accuracy": accuracy,
        "hit_rate": accuracy,
        "precision_at_k": avg_precision,
        "recall_at_k": avg_recall,
        "mrr": avg_mrr,
        "f1_score": f1_score,
        "correct_retrievals": correct_retrievals,
        "total_test_cases": total_test_cases,
        "individual_precision": precision_scores,
        "individual_recall": recall_scores,
        "individual_mrr": mrr_scores
    }

def create_100_test_cases():
    """
    Tạo 100 test cases cho comprehensive evaluation của Manim RAG system.
    """
    test_cases = [
        # Category: basic_shapes (10)
        {"query": "How to create a circle?", "expected_sources": ["circle", "shapes"], "expected_content_keywords": ["Circle", "radius"], "category": "basic_shapes"},
        {"query": "Draw a square on the screen", "expected_sources": ["square", "shapes"], "expected_content_keywords": ["Square", "side_length"], "category": "basic_shapes"},
        {"query": "How to make a line segment?", "expected_sources": ["line", "geometry"], "expected_content_keywords": ["Line", "start", "end"], "category": "basic_shapes"},
        {"query": "Create a regular polygon with 5 sides", "expected_sources": ["polygon", "shapes"], "expected_content_keywords": ["Polygon", "n=5", "regularpolygon"], "category": "basic_shapes"},
        {"query": "What's the class for a dot?", "expected_sources": ["dot", "geometry"], "expected_content_keywords": ["Dot", "point"], "category": "basic_shapes"},
        {"query": "How to draw an arc?", "expected_sources": ["arc", "geometry"], "expected_content_keywords": ["Arc", "angle", "radius"], "category": "basic_shapes"},
        {"query": "Show an ellipse", "expected_sources": ["ellipse", "shapes"], "expected_content_keywords": ["Ellipse", "width", "height"], "category": "basic_shapes"},
        {"query": "Create a rectangle shape", "expected_sources": ["rectangle", "shapes"], "expected_content_keywords": ["Rectangle", "width", "height"], "category": "basic_shapes"},
        {"query": "How to use the Arrow mobject?", "expected_sources": ["arrow", "geometry"], "expected_content_keywords": ["Arrow", "tip", "buff"], "category": "basic_shapes"},
        {"query": "Make a curved arrow", "expected_sources": ["arrow", "arc"], "expected_content_keywords": ["CurvedArrow", "angle"], "category": "basic_shapes"},

        # Category: text_math (10)
        {"query": "Display text on screen", "expected_sources": ["text_mobject", "text"], "expected_content_keywords": ["Text", "font"], "category": "text_math"},
        {"query": "Render a LaTeX formula", "expected_sources": ["tex_mobject", "text"], "expected_content_keywords": ["Tex", "MathTex", "latex"], "category": "text_math"},
        {"query": "How to change font for Text?", "expected_sources": ["text_mobject", "font"], "expected_content_keywords": ["Text", "font="], "category": "text_math"},
        {"query": "Create a paragraph of text", "expected_sources": ["text_mobject", "paragraph"], "expected_content_keywords": ["Paragraph", "alignment"], "category": "text_math"},
        {"query": "Color parts of a MathTex object", "expected_sources": ["tex_mobject", "text"], "expected_content_keywords": ["set_color_by_tex", "t2c"], "category": "text_math"},
        {"query": "Using MarkupText for rich text formatting", "expected_sources": ["markup_text"], "expected_content_keywords": ["MarkupText", "<b>", "<i>"], "category": "text_math"},
        {"query": "How to align multiple Text objects?", "expected_sources": ["vgroup", "align"], "expected_content_keywords": ["arrange", "aligned_edge"], "category": "text_math"},
        {"query": "Display a matrix with MathTex", "expected_sources": ["matrix", "tex"], "expected_content_keywords": ["matrix", "amsmath"], "category": "text_math"},
        {"query": "What is the difference between Tex and MathTex?", "expected_sources": ["tex_mobject"], "expected_content_keywords": ["Tex", "MathTex", "inline"], "category": "text_math"},
        {"query": "Put a brace under a formula", "expected_sources": ["brace", "geometry"], "expected_content_keywords": ["Brace", "get_brace"], "category": "text_math"},

        # Category: animation (15)
        {"query": "How to move an object?", "expected_sources": ["animation", "transform"], "expected_content_keywords": ["animate", ".shift", "move_to"], "category": "animation"},
        {"query": "Fade in an object", "expected_sources": ["animation", "fade"], "expected_content_keywords": ["FadeIn"], "category": "animation"},
        {"query": "Animate the creation of a shape", "expected_sources": ["animation", "creation"], "expected_content_keywords": ["Create", "ShowCreation", "DrawBorderThenFill"], "category": "animation"},
        {"query": "How to rotate a mobject?", "expected_sources": ["animation", "rotation"], "expected_content_keywords": ["Rotate", "angle"], "category": "animation"},
        {"query": "What is Transform animation?", "expected_sources": ["animation", "transform"], "expected_content_keywords": ["Transform"], "category": "animation"},
        {"query": "Make an object disappear", "expected_sources": ["animation", "fade"], "expected_content_keywords": ["FadeOut"], "category": "animation"},
        {"query": "Animate text being written", "expected_sources": ["animation", "creation"], "expected_content_keywords": ["Write", "AddTextLetterByLetter"], "category": "animation"},
        {"query": "How to change animation speed?", "expected_sources": ["animation", "scene"], "expected_content_keywords": ["run_time"], "category": "animation"},
        {"query": "What are rate functions?", "expected_sources": ["animation", "rate_functions"], "expected_content_keywords": ["rate_func", "smooth", "linear"], "category": "animation"},
        {"query": "Group animations together", "expected_sources": ["animation_group"], "expected_content_keywords": ["AnimationGroup", "Succession"], "category": "animation"},
        {"query": "Transform one shape into another", "expected_sources": ["transform"], "expected_content_keywords": ["Transform", "ReplacementTransform"], "category": "animation"},
        {"query": "Indicate a point with a flashing circle", "expected_sources": ["indication"], "expected_content_keywords": ["FocusOn", "Indicate", "Flash"], "category": "animation"},
        {"query": "How does LaggedStart work?", "expected_sources": ["animation_group"], "expected_content_keywords": ["LaggedStart", "lag_ratio"], "category": "animation"},
        {"query": "Animate an object along a path", "expected_sources": ["animation", "movement"], "expected_content_keywords": ["MoveAlongPath"], "category": "animation"},
        {"query": "Uncreate an object", "expected_sources": ["animation", "creation"], "expected_content_keywords": ["Uncreate"], "category": "animation"},

        # Category: styling (10)
        {"query": "How to change color of an object?", "expected_sources": ["color", "style"], "expected_content_keywords": ["set_color", "color="], "category": "styling"},
        {"query": "Fill a shape with color", "expected_sources": ["color", "fill"], "expected_content_keywords": ["set_fill", "fill_color", "fill_opacity"], "category": "styling"},
        {"query": "Change the stroke width of a line", "expected_sources": ["stroke"], "expected_content_keywords": ["set_stroke", "stroke_width"], "category": "styling"},
        {"query": "How to make an object transparent?", "expected_sources": ["opacity", "style"], "expected_content_keywords": ["set_opacity", "opacity"], "category": "styling"},
        {"query": "Apply a gradient color", "expected_sources": ["color", "gradient"], "expected_content_keywords": ["set_color_by_gradient", "color_gradient"], "category": "styling"},
        {"query": "What colors are available?", "expected_sources": ["color", "constants"], "expected_content_keywords": ["manim.utils.color", "BLUE", "RED"], "category": "styling"},
        {"query": "Change background color of scene", "expected_sources": ["scene", "config"], "expected_content_keywords": ["background_color"], "category": "styling"},
        {"query": "Style the tip of an arrow", "expected_sources": ["arrow", "tip"], "expected_content_keywords": ["ArrowTip", "tip_shape"], "category": "styling"},
        {"query": "How to remove the stroke of a shape?", "expected_sources": ["stroke"], "expected_content_keywords": ["stroke_width=0"], "category": "styling"},
        {"query": "Animate a color change", "expected_sources": ["animation", "color"], "expected_content_keywords": ["animate.set_color", "Wiggle"], "category": "styling"},

        # Category: grouping (10)
        {"query": "Group objects together", "expected_sources": ["group", "vgroup"], "expected_content_keywords": ["Group", "VGroup"], "category": "grouping"},
        {"query": "Arrange mobjects in a line", "expected_sources": ["vgroup", "arrange"], "expected_content_keywords": ["arrange", "direction"], "category": "grouping"},
        {"query": "What is the difference between Group and VGroup?", "expected_sources": ["group", "vgroup"], "expected_content_keywords": ["Group", "VGroup", "Mobject"], "category": "grouping"},
        {"query": "Position an object next to another", "expected_sources": ["position", "vgroup"], "expected_content_keywords": ["next_to", "direction"], "category": "grouping"},
        {"query": "Select a submobject from a group", "expected_sources": ["group", "mobject"], "expected_content_keywords": ["submobjects", "group[0]"], "category": "grouping"},
        {"query": "How to add an object to a VGroup?", "expected_sources": ["vgroup"], "expected_content_keywords": ["VGroup.add"], "category": "grouping"},
        {"query": "Animate a group of objects at once", "expected_sources": ["group", "animation"], "expected_content_keywords": ["play(Animation(group))"], "category": "grouping"},
        {"query": "Create a grid of objects", "expected_sources": ["arrange_in_grid"], "expected_content_keywords": ["arrange_in_grid", "rows", "cols"], "category": "grouping"},
        {"query": "Distribute objects evenly", "expected_sources": ["arrange"], "expected_content_keywords": ["arrange", "buff"], "category": "grouping"},
        {"query": "How to ungroup objects?", "expected_sources": ["group"], "expected_content_keywords": ["group", "submobjects"], "category": "grouping"},

        # Category: 3d (10)
        {"query": "How to create a 3D scene?", "expected_sources": ["three_d_scene"], "expected_content_keywords": ["ThreeDScene"], "category": "3d"},
        {"query": "Make a sphere in 3D", "expected_sources": ["sphere", "three_d"], "expected_content_keywords": ["Sphere", "radius"], "category": "3d"},
        {"query": "Show a 3D cube", "expected_sources": ["cube", "three_d"], "expected_content_keywords": ["Cube", "side_length"], "category": "3d"},
        {"query": "Plot a 3D surface", "expected_sources": ["surface", "three_d"], "expected_content_keywords": ["Surface", "ParametricSurface"], "category": "3d"},
        {"query": "How to set up the camera in a 3D scene?", "expected_sources": ["camera", "three_d"], "expected_content_keywords": ["set_camera_orientation", "phi", "theta"], "category": "3d"},
        {"query": "Add lighting to a 3D scene", "expected_sources": ["light", "three_d"], "expected_content_keywords": ["add_light_source", "LightSource"], "category": "3d"},
        {"query": "What are 3D axes?", "expected_sources": ["axes", "three_d"], "expected_content_keywords": ["ThreeDAxes"], "category": "3d"},
        {"query": "Move camera in 3D animation", "expected_sources": ["camera", "animation"], "expected_content_keywords": ["move_camera", "frame.animate"], "category": "3d"},
        {"query": "Create a 3D vector", "expected_sources": ["vector", "three_d"], "expected_content_keywords": ["Arrow3D"], "category": "3d"},
        {"query": "What is a Voxel?", "expected_sources": ["voxel"], "expected_content_keywords": ["Voxel"], "category": "3d"},

        # Category: plotting (10)
        {"query": "Plot a function y = x^2", "expected_sources": ["plot", "axes"], "expected_content_keywords": ["plot", "Axes", "get_graph"], "category": "plotting"},
        {"query": "How to create a number line?", "expected_sources": ["number_line"], "expected_content_keywords": ["NumberLine", "x_range"], "category": "plotting"},
        {"query": "Draw a graph with nodes and edges", "expected_sources": ["graph"], "expected_content_keywords": ["Graph", "vertices", "edges"], "category": "plotting"},
        {"query": "Customize axes labels", "expected_sources": ["axes"], "expected_content_keywords": ["x_axis_config", "y_axis_config", "label_direction"], "category": "plotting"},
        {"query": "Plot a parametric function", "expected_sources": ["plot", "parametric"], "expected_content_keywords": ["ParametricFunction"], "category": "plotting"},
        {"query": "Get coordinates from a point on a graph", "expected_sources": ["axes", "coordinate"], "expected_content_keywords": ["input_to_graph_point", "point_to_coords"], "category": "plotting"},
        {"query": "Create a bar chart", "expected_sources": ["bar_chart", "graph"], "expected_content_keywords": ["BarChart", "values"], "category": "plotting"},
        {"query": "What is a NumberPlane?", "expected_sources": ["number_plane"], "expected_content_keywords": ["NumberPlane"], "category": "plotting"},
        {"query": "Add labels to graph points", "expected_sources": ["label", "axes"], "expected_content_keywords": ["get_T_label", "Dot"], "category": "plotting"},
        {"query": "Plot implicit function", "expected_sources": ["plot", "function"], "expected_content_keywords": ["ImplicitFunction"], "category": "plotting"},

        # Category: camera_and_scene (10)
        {"query": "How to control the camera?", "expected_sources": ["camera", "scene"], "expected_content_keywords": ["Camera", "frame"], "category": "camera_and_scene"},
        {"query": "Zoom in on an object", "expected_sources": ["camera", "animation"], "expected_content_keywords": ["frame.animate.scale", "frame.animate.move_to"], "category": "camera_and_scene"},
        {"query": "What is a Scene?", "expected_sources": ["scene"], "expected_content_keywords": ["Scene", "construct"], "category": "camera_and_scene"},
        {"query": "How to wait in an animation?", "expected_sources": ["scene"], "expected_content_keywords": ["self.wait"], "category": "camera_and_scene"},
        {"query": "Save the last frame as an image", "expected_sources": ["config", "scene"], "expected_content_keywords": ["save_last_frame", "-s"], "category": "camera_and_scene"},
        {"query": "What is the difference between Scene and ThreeDScene?", "expected_sources": ["scene", "three_d"], "expected_content_keywords": ["Scene", "ThreeDScene"], "category": "camera_and_scene"},
        {"query": "How to add and remove mobjects from a scene?", "expected_sources": ["scene"], "expected_content_keywords": ["self.add", "self.remove"], "category": "camera_and_scene"},
        {"query": "How does the MovingCameraScene work?", "expected_sources": ["moving_camera_scene"], "expected_content_keywords": ["MovingCameraScene"], "category": "camera_and_scene"},
        {"query": "Pan the camera across the scene", "expected_sources": ["camera", "animation"], "expected_content_keywords": ["frame.animate.shift"], "category": "camera_and_scene"},
        {"query": "Clear the screen during a scene", "expected_sources": ["scene"], "expected_content_keywords": ["self.clear"], "category": "camera_and_scene"},

        # Category: updaters_and_interaction (10)
        {"query": "How to make an object follow another?", "expected_sources": ["updater"], "expected_content_keywords": ["add_updater", "always_redraw"], "category": "updaters_and_interaction"},
        {"query": "What is a ValueTracker?", "expected_sources": ["value_tracker"], "expected_content_keywords": ["ValueTracker", "get_value"], "category": "updaters_and_interaction"},
        {"query": "Animate the change of a ValueTracker", "expected_sources": ["value_tracker", "animation"], "expected_content_keywords": ["tracker.animate.set_value"], "category": "updaters_and_interaction"},
        {"query": "Make a label that always shows the value of a number", "expected_sources": ["updater", "decimal_number"], "expected_content_keywords": ["always_redraw", "DecimalNumber"], "category": "updaters_and_interaction"},
        {"query": "How to remove an updater?", "expected_sources": ["updater"], "expected_content_keywords": ["remove_updater", "clear_updaters"], "category": "updaters_and_interaction"},
        {"query": "Dynamically update a graph based on a variable", "expected_sources": ["updater", "plot"], "expected_content_keywords": ["add_updater", "get_graph"], "category": "updaters_and_interaction"},
        {"query": "What does always_redraw do?", "expected_sources": ["updater"], "expected_content_keywords": ["always_redraw"], "category": "updaters_and_interaction"},
        {"query": "Create a tangent line that follows a curve", "expected_sources": ["updater", "tangent"], "expected_content_keywords": ["add_updater", "tangent_line"], "category": "updaters_and_interaction"},
        {"query": "Update mobject position based on time", "expected_sources": ["updater"], "expected_content_keywords": ["add_updater", "dt"], "category": "updaters_and_interaction"},
        {"query": "What is the difference between add_updater and always_redraw?", "expected_sources": ["updater"], "expected_content_keywords": ["add_updater", "always_redraw"], "category": "updaters_and_interaction"},

        # Category: config_and_customization (5)
        {"query": "How to change render quality?", "expected_sources": ["config"], "expected_content_keywords": ["quality", "pixel_width", "pixel_height"], "category": "config_and_customization"},
        {"query": "Set frame rate for video output", "expected_sources": ["config"], "expected_content_keywords": ["frame_rate", "fps"], "category": "config_and_customization"},
        {"query": "How to use a custom config file?", "expected_sources": ["config"], "expected_content_keywords": ["manim.cfg", "custom_config"], "category": "config_and_customization"},
        {"query": "How to disable caching?", "expected_sources": ["config", "cache"], "expected_content_keywords": ["--disable_caching"], "category": "config_and_customization"},
        {"query": "Change the directory for media output", "expected_sources": ["config"], "expected_content_keywords": ["media_dir"], "category": "config_and_customization"},

        # Category: advanced_and_plugins (5)
        {"query": "How to create my own plugin?", "expected_sources": ["plugin"], "expected_content_keywords": ["plugin", "ManimPango", "entry_points"], "category": "advanced_and_plugins"},
        {"query": "How to work with SVG images?", "expected_sources": ["svg"], "expected_content_keywords": ["SVGMobject"], "category": "advanced_and_plugins"},
        {"query": "What is VMobject?", "expected_sources": ["vmobject"], "expected_content_keywords": ["VMobject", "vectorized mobject"], "category": "advanced_and_plugins"},
        {"query": "How to add sound to a video?", "expected_sources": ["sound"], "expected_content_keywords": ["add_sound"], "category": "advanced_and_plugins"},
        {"query": "Using Manim with Jupyter notebooks", "expected_sources": ["jupyter"], "expected_content_keywords": ["%%manim", "jupyter"], "category": "advanced_and_plugins"},
    ]
    return test_cases

def run_evaluation_analysis(evaluation_results, test_cases):
    """
    Phân tích chi tiết kết quả evaluation.
    """
    print("\n--- Detailed Analysis ---")
    precision_scores = evaluation_results["individual_precision"]
    recall_scores = evaluation_results["individual_recall"]
    mrr_scores = evaluation_results["individual_mrr"]

    print(f"Precision@k distribution: Min: {min(precision_scores):.3f}, Max: {max(precision_scores):.3f}, Std: {np.std(precision_scores):.3f}")
    print(f"Recall@k distribution: Min: {min(recall_scores):.3f}, Max: {max(recall_scores):.3f}, Std: {np.std(recall_scores):.3f}")
    print(f"MRR distribution: Min: {min(mrr_scores):.3f}, Max: {max(mrr_scores):.3f}, Std: {np.std(mrr_scores):.3f}")

    zero_precision = sum(1 for score in precision_scores if score == 0)
    zero_recall = sum(1 for score in recall_scores if score == 0)
    zero_mrr = sum(1 for score in mrr_scores if score == 0)

    print(f"\nQueries with zero scores:")
    print(f"  Zero Precision: {zero_precision}/{len(precision_scores)}")
    print(f"  Zero Recall: {zero_recall}/{len(recall_scores)}")
    print(f"  Zero MRR: {zero_mrr}/{len(mrr_scores)}")

def plot_evaluation_results(evaluation_results, test_cases):
    """
    Trực quan hóa kết quả đánh giá bằng biểu đồ.
    """
    print("\n--- Visualizing Evaluation Results ---")

    # 1. Biểu đồ tổng quan
    overall_metrics = {
        'Accuracy': evaluation_results['accuracy'],
        'Precision@k': evaluation_results['precision_at_k'],
        'Recall@k': evaluation_results['recall_at_k'],
        'MRR': evaluation_results['mrr'],
        'F1 Score': evaluation_results['f1_score']
    }

    fig, ax = plt.subplots(figsize=(12, 6))
    bars = ax.bar(overall_metrics.keys(), overall_metrics.values(), 
                  color=['#4c72b0', '#55a868', '#c44e52', '#8172b2', '#ccb974'])
    ax.set_ylim(0, max(overall_metrics.values()) * 1.2)
    ax.set_ylabel('Score')
    ax.set_title('Overall RAG Retrieval Performance')
    ax.bar_label(bars, fmt='%.3f', padding=3)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

    # 2. Biểu đồ theo từng danh mục
    df_results = pd.DataFrame({
        'category': [tc.get('category', 'uncategorized') for tc in test_cases],
        'precision': evaluation_results['individual_precision'],
        'recall': evaluation_results['individual_recall'],
        'mrr': evaluation_results['individual_mrr']
    })

    category_metrics = df_results.groupby('category').mean()[['precision', 'recall', 'mrr']]
    
    category_metrics.plot(kind='bar', figsize=(16, 8), width=0.8)
    plt.title('Average Metrics per Category')
    plt.ylabel('Average Score')
    plt.xlabel('Category')
    plt.xticks(rotation=45, ha='right')
    plt.legend(title='Metric')
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.show()

    # 3. Phân phối điểm MRR
    plt.figure(figsize=(12, 6))
    plt.hist(evaluation_results['individual_mrr'], bins=np.arange(0, 1.1, 0.1), 
             edgecolor='black', alpha=0.7, color='skyblue')
    plt.title('Distribution of Mean Reciprocal Rank (MRR) Scores')
    plt.xlabel('MRR Score')
    plt.ylabel('Number of Queries')
    plt.xticks(np.arange(0, 1.1, 0.1))
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.show()

    # 4. Heatmap hiệu suất theo category
    category_counts = df_results.groupby('category').size()
    heatmap_data = category_metrics.copy()
    heatmap_data['count'] = category_counts

    plt.figure(figsize=(10, 8))
    sns.heatmap(category_metrics.T, annot=True, fmt='.3f', cmap='RdYlBu_r', 
                cbar_kws={'label': 'Score'})
    plt.title('Performance Heatmap by Category')
    plt.ylabel('Metrics')
    plt.xlabel('Categories')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()

# ========================================
# MAIN EXECUTION CODE FOR COLAB
# ========================================

# Tạo 100 test cases
test_cases_100 = create_100_test_cases()

# Chạy evaluation (giả sử instance 'rag' đã được tạo ở cell trước)
print(f"Starting evaluation for {len(test_cases_100)} test cases...")
evaluation_results = evaluate_retrieval(rag, test_cases_100, k=5)

print("\n--- Evaluation Results Summary ---")
print(f"Total test cases: {evaluation_results['total_test_cases']}")
print(f"Correct retrievals (Hit Rate): {evaluation_results['accuracy']:.4f}")
print(f"Average Precision@5: {evaluation_results['precision_at_k']:.4f}")
print(f"Average Recall@5: {evaluation_results['recall_at_k']:.4f}")
print(f"Average MRR: {evaluation_results['mrr']:.4f}")
print(f"F1 Score: {evaluation_results['f1_score']:.4f}")

# Phân tích chi tiết và vẽ biểu đồ
run_evaluation_analysis(evaluation_results, test_cases_100)
plot_evaluation_results(evaluation_results, test_cases_100)

# In thông tin về từng test case (chỉ 10 đầu để không quá dài)
print("\n--- Sample Individual Query Results (first 10) ---")
for i, test_case in enumerate(test_cases_100[:10]):
    print(f"\nQuery {i+1} ({test_case.get('category', 'N/A')}): {test_case['query']}")
    print(f"  Precision: {evaluation_results['individual_precision'][i]:.3f}")
    print(f"  Recall: {evaluation_results['individual_recall'][i]:.3f}")
    print(f"  MRR: {evaluation_results['individual_mrr'][i]:.3f}")

# Lưu kết quả
import json
with open('evaluation_results_100.json', 'w') as f:
    results_to_save = {k: v for k, v in evaluation_results.items()
                      if isinstance(v, (int, float, str))}
    json.dump(results_to_save, f, indent=2)

print(f"\nEvaluation completed! Overall accuracy: {evaluation_results['accuracy']:.4f}")
print("Detailed analysis and plots are shown above.")

Starting evaluation...

--- Evaluating Retrieval (considering top 5 results) ---


Evaluating test cases: 100%|██████████| 10/10 [00:48<00:00,  4.84s/it]


--- Evaluation Results ---
Total test cases: 10
Correct retrievals (within top 5): 10
Accuracy (Hit Rate): 1.0000
Average Precision@5: 0.9400
Average Recall@5: 1.4500
Average MRR: 0.9500
F1 Score: 1.1406

--- Detailed Analysis ---
Precision@k distribution:
  Min: 0.600
  Max: 1.000
  Std: 0.128
Recall@k distribution:
  Min: 1.000
  Max: 1.667
  Std: 0.277
MRR distribution:
  Min: 0.500
  Max: 1.000
  Std: 0.150

Queries with zero scores:
  Zero Precision: 0/10
  Zero Recall: 0/10
  Zero MRR: 0/10

--- Individual Query Results ---

Query 1: How to create a circle in Manim?
  Precision: 1.000
  Recall: 1.667
  MRR: 1.000

Query 2: What is Scene class in Manim?
  Precision: 1.000
  Recall: 1.667
  MRR: 1.000

Query 3: How to animate objects moving?
  Precision: 1.000
  Recall: 1.667
  MRR: 1.000

Query 4: Create text and mathematical expressions
  Precision: 1.000
  Recall: 1.250
  MRR: 1.000

Query 5: How to change colors of objects?
  Precision: 1.000
  Recall: 1.667
  MRR: 1.000

Quer


