In [3]:
from google import genai
client = genai.Client()


In [2]:
from semantic_router.splitters import RollingWindowSplitter
from semantic_router.utils.logger import logger
from semantic_router.encoders import HuggingFaceEncoder
import numpy as np
from dotenv import load_dotenv
from pinecone import Pinecone, ServerlessSpec
import os

from unstructured.partition.md import partition_md

load_dotenv()

def process_mdx_advanced(file_path):
    # partition_md tự động xử lý frontmatter và cấu trúc markdown
    elements = partition_md(filename=file_path)
    
    # Gộp các element lại thành văn bản
    full_text = "\n\n".join([str(el) for el in elements])
    return full_text


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.vectorstores import Pinecone
from langchain_ollama import ChatOllama
from langchain_pinecone import PineconeVectorStore
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI

logger.setLevel("WARNING")

embeddings = HuggingFaceEmbeddings(
    model_name="nomic-ai/nomic-embed-text-v1.5",
    model_kwargs={'trust_remote_code': True},
    encode_kwargs={
        'normalize_embeddings': True,  # Semantic Router does this by default
    }
)
vector_store = PineconeVectorStore(
    pinecone_api_key=os.getenv("PINECONE_API_KEY"),
    embedding=embeddings,
    index_name='cp-rag',
    text_key="content"
)

retriever = vector_store.as_retriever(kwargs={"k": 5})

template="""You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)
generate_queries = (
    prompt_perspectives 
    | ChatGoogleGenerativeAI(
        model="gemini-3-flash-preview",
        temperature=1.0,  # Gemini 3.0+ defaults to 1.0
        max_tokens=None,
        timeout=None,
        max_retries=2,
    ) 
    | StrOutputParser() 
    | (lambda x: [
        line.strip()                            # Remove whitespace
        for line in x.split("\n") 
        if line.strip() and not line.strip().isnumeric() # Remove empty lines and stray numbers
      ])
)

<All keys matched successfully>


In [87]:
print(len(embeddings.embed_query("test")))

768


In [88]:
docs = retriever.invoke("What is Segment Tree?")
for res in docs:
    print(f"* {res.page_content} [{res.metadata}]")

* Resources Sparse Segment Tree is more commonly referred to as "Dynamic Segment Tree." Monkey and Apple-trees Solution We need to support two operations on a range: Count the number of red-ripe trees in a range (range sum) Set all trees in a range to red-ripe (range paint) We can use a segment tree with lazy propagation to solve this, but the query range is up to $10^9$, so we have to use a sparse segment tree. Luckily, lazy propagation still works here. [{'postchunk': 'Pointer Implementation Time Complexity: $\\mathcal{O}(Q\\log{N})$ #include <bits/stdc++.h> using namespace std; class SparseSegtree { private: struct Node { int freq = 0; int lazy = 0;', 'prechunk': "id: sparse-segtree redirects: - /plat/sparse-seg title: Sparse Segment Trees author: Andi Qu prerequisites: - RURQ description: 'Querying big ranges.' frequency: 1 In problems where the query range is at most something like $10^6$, a normal segment tree suffices. However, as soon as we move to bigger ranges ($10^{12}$ in s

In [5]:
def get_unique_union(documents: list[list]):
    """ Flattens a list of lists of documents and removes duplicates. """
    flattened = [doc for sublist in documents for doc in sublist]
    unique_docs = {}
    for doc in flattened:
        # Use page_content as the unique key
        if doc.page_content not in unique_docs:
            unique_docs[doc.page_content] = doc
    return list(unique_docs.values())

def retrieve_documents(question: str):
    # generate_queries returns List[str]
    # retriever.map() expects List[str] and returns List[List[Document]]
    # get_unique_union expects List[List[Document]] and returns List[Document]
    retrieval_chain = generate_queries
    docs = retrieval_chain.invoke({"question": question})
    print(docs)
    docs = retriever.map().invoke(docs)
    return get_unique_union(docs)


In [6]:
docs = retrieve_documents("What is Binary Search?")
for doc in docs:
    print(doc)

ChatGoogleGenerativeAIError: Error calling model 'gemini-3-flash-preview' (RESOURCE_EXHAUSTED): 429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit. ', 'status': 'RESOURCE_EXHAUSTED'}}

In [None]:
print(docs)

[Document(id='chunk-11', metadata={'postchunk': "Example - Haybale Distribution Explanation Let's define a function $f(y)$ that evaluates the cost of transporting all the haybales if we deliver the haybales at point $y$. Can we show that this function is either unimodal or convex? An important fact to know when showing that a function is convex is that the sum of convex functions is convex. Here, $f(y)$ is the sum of all the individual cost functions for each barn. The cost function for a given barn is convex, so it turns out that $f(y)$ is also convex! Given that a convex function is unimodal, this allows us to ternary search on it.", 'prechunk': 'template <typename F> int find_min(int l, int r, const F &f) { while (l < r) { int m = (l + r) / 2; f(m) < f(m + 1) ? r = m : l = m + 1; } return l; } def find_min(l: int, r: int, f) -> int: while l < r: m = (l + r) // 2 if f(m) > f(m + 1): l = m + 1 else: r = m', 'title': 'le'}, page_content='return l The way the binary search approach work

In [None]:
def query(question: str):
    context = ""

    docs = retrieve_documents(question)
    for match in docs:
        title = match.metadata['title']
        pre = match.metadata['prechunk']
        content = match.page_content
        post = match.metadata['postchunk']

        context += '\n--------\nTitle: '
        context += title
        context += '\n'
        context += pre
        context += '\n'
        context += content
        context += '\n'
        context += post
        context += '\n'

    full_prompt = f'''
    You are a extremely helpful assistant in Competittive Programming. 
    Use the info in the context to answer the question as much detailed as possible. 
    If there are any code, implement it fully and dont hide any line.

    If you cannot answer, just say "I don't know"

    Context:
    {context}

    Question:
    {question}
    '''

    print(full_prompt)

    try:
        response = client.models.generate_content(
            model="gemini-2.0-flash", 
            contents=full_prompt
        )
        
        print(response)
        
    except Exception as e:
        print(f"Lỗi khi gọi model: {e}")

In [None]:
query("What is Segment Tree?")

ChatGoogleGenerativeAIError: Error calling model 'gemini-2.0-flash' (RESOURCE_EXHAUSTED): 429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit. \n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.0-flash\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.0-flash\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.0-flash\nPlease retry in 22.019254984s.', 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Learn more about Gemini API quotas', 'url': 'https://ai.google.dev/gemini-api/docs/rate-limits'}]}, {'@type': 'type.googleapis.com/google.rpc.QuotaFailure', 'violations': [{'quotaMetric': 'generativelanguage.googleapis.com/generate_content_free_tier_requests', 'quotaId': 'GenerateRequestsPerDayPerProjectPerModel-FreeTier', 'quotaDimensions': {'location': 'global', 'model': 'gemini-2.0-flash'}}, {'quotaMetric': 'generativelanguage.googleapis.com/generate_content_free_tier_requests', 'quotaId': 'GenerateRequestsPerMinutePerProjectPerModel-FreeTier', 'quotaDimensions': {'model': 'gemini-2.0-flash', 'location': 'global'}}, {'quotaMetric': 'generativelanguage.googleapis.com/generate_content_free_tier_input_token_count', 'quotaId': 'GenerateContentInputTokensPerModelPerMinute-FreeTier', 'quotaDimensions': {'location': 'global', 'model': 'gemini-2.0-flash'}}]}, {'@type': 'type.googleapis.com/google.rpc.RetryInfo', 'retryDelay': '22s'}]}}

In [None]:
response = client.models.generate_content(
    model="gemini-3-flash-preview", 
    contents="Hi"
)

print(response.text)

Hello! How can I help you today?


In [None]:
query("cách cài đặt DP convex hull trong C++. Hãy trả lời bằng tiếng việt")

Cài đặt Dynamic Programming (DP) để tìm convex hull của một tập điểm trong C++ có thể được thực hiện như sau:

Tạo ra một mảng động để lưu trữ các điểm:
```c
vector<Point> points;
```
Sau đó, tạo ra một mảng để lưu trữ kết quả convex hull:
```c
vector<Point> hull;
```
Hàm `monotoneChain` sẽ được sử dụng để tính convex hull. Hàm này sẽ nhận vào một mảng các điểm và trả về một mảng chứa các điểm trong convex hull.

Here's the implementation of the `monotoneChain` function:
```c
vector<Point> monotoneChain(vector<Point>& P) {
  if (P.size() < 3) return P; // nếu số lượng điểm dưới 3, thì toàn bộ tập điểm là convex hull

  sort(P.begin(), P.end()); // sắp xếp các điểm theo trục x và y

  vector<Point> hull;
  Point last = P[0]; // lấy điểm đầu tiên trong mảng
  hull.push_back(last);

  for (int i = 1; i < P.size(); i++) {
    if (last.x > P[i].x) { // nếu điểm hiện tại nằm bên phải của điểm cuối cùng trong hull, thì có thể thêm điểm này vào hull
      last = P[i]; // cập nhật điểm cuối cùn