In [1]:
!pip install -q langchain_community tiktoken langchain-openai langchainhub chromadb langchain


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import os
from access import Access

In [3]:
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = Access.LANGCHAIN_API_KEY

## Part 1: Overview

In [25]:
import bs4
from langchain_classic import hub
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_ollama import ChatOllama, OllamaEmbeddings

# === Indexing ===
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2024-11-28-reward-hacking/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

docs = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# Embed
vectorstore = Chroma.from_documents(documents=splits,
                                    embedding=OllamaEmbeddings(model="mxbai-embed-large"))

retriever = vectorstore.as_retriever()

# === Retrieval and Generation ===

# Prompt
prompt = hub.pull("rlm/rag-prompt")

# LLM
llm = ChatOllama(
    model="llama3.1",
    temperature=0
)

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | llm | StrOutputParser()
)

# Question
rag_chain.invoke("What is reward hacking if you had to explain it in a very simple way.")

"Reward hacking is when an agent interferes with the reward function to achieve its own goals, rather than following the intended objective. This can be done by manipulating the reward function directly or altering environmental information used for it. It's a broader concept that includes both environment/goal misspecification and reward tampering."

## Part 2: Indexing

In [85]:
# Documents
question = "What kinds of pets do I like?"
document = "My favorite pet is a cat."

### Tokenizer with Tiktoken

tiktoken is a fast open-source tokenizer by OpenAI
It convers a text string such as "tiktoken is great!" using encoding method such as "cl100k_base" to split the text string into a list of tokens ["t", "ik", "token", " is", " great", "!"].

This is useful because GPT models see text in the form of tokens. Knowing the size of token is helpful to decide whether the string is too long for a text model to process and how much an OpenAI API call costs (usage price per token).

In [61]:
import tiktoken

def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

num_tokens_from_string(question, "cl100k_base")

# We don't need this as ollama models handle the tokenizer internally

8

### Text Embedding Model

In a case of Retrieval-augmented Generation (RAG), embedding is used for both the indexing and retrieval part. we can do this by creating an InMemoryVectorStore and us the as_retriever() on the vector store. Later, we can invoke it and it will automaticall return the document's content.

Under the hood, the vectorstore and retriever implementations are calling `embedding.embed_documents(...)` and `embedding.embed_query(...)` to create embeddings for the text(s) used in the `from_texts` and retrieve it with the `invoke` operations, respectively.

source: https://docs.langchain.com/oss/python/integrations/text_embedding/openai

In [86]:
from langchain_ollama import OllamaEmbeddings

embedding = OllamaEmbeddings(model="mxbai-embed-large")
query_result = embedding.embed_query(question)
document_result = embedding.embed_query(document)
print(len(query_result))
print(len(document_result))


1024
1024


### Cosine Similarity

cosine similarity is recommended (1 indicates identical) for OpenAI embeddings.

In [87]:
import numpy as np

# def cosine_similarity(vec1, vec2):
#     dot_product = np.dot(vec1, vec2)
#     norm_vec1 = np.linalg.norm(vec1)
#     norm_vec2 = np.linalg.norm(vec2)
    
#     return dot_product / (norm_vec1 * norm_vec2)

# Normalized
def cosine_similarity(vec1, vec2):
    v1 = vec1 / np.linalg.norm(vec1)
    v2 = vec2 / np.linalg.norm(vec2)
    return np.dot(v1, v2)


similarity = cosine_similarity(query_result, document_result)
print("Cosine Similarity:", similarity)

Cosine Similarity: 0.727433082327636


### Document Loader

We can use other loader than WebBaseLoader to load from whatsapp chat, pdf, or cloud storage.

source: https://docs.langchain.com/oss/python/integrations/document_loaders

In [149]:
# === Indexing ===
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2024-11-28-reward-hacking/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

docs = loader.load()

### Splitter

Langchain's Text splitter is recommended for genereic text. It is parameterized by a list of characters. It split on the characters in order until the chunks are small enough. The default list is ["\n\n", "\n", " ", ""]. This has the effect of trying to keep all paragraphs (and then sentences, and then words) together as long as possible, as those would generically seem to be the strongest semantically related pieces of text.

In [150]:
# === Split
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter_1 = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300,
    chunk_overlap=50
)
splits_1 = text_splitter_1.split_documents(docs)


text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=50
)

splits = text_splitter.split_documents(docs)


In [151]:
index = 5
print(f"tiktoken tokenizer: \n{splits_1[index]}")
print(f"\n\n Regular Tokenizer: \n{splits[index]}")

tiktoken tokenizer: 
page_content='This form of reward shaping allows us to incorporate heuristics into the reward function to speed up learning without impacting the optimal policy.
Spurious Correlation#
Spurious correlation or shortcut learning (Geirhos et al. 2020) in classification task is a concept closely related to reward hacking. Spurious or shortcut features can cause a classifier to fail at learning and generalizing as intended. For example, a binary classifier for distinguishing wolves from huskies may overfit to the presence of a snowy background if all the wolf training images include snow (Ribeiro et al. 2024).


The model performs poorly on out-of-distribution (OOD) test sets if it overfits to shortcut features. (Image source: Geirhos et al. 2020)' metadata={'source': 'https://lilianweng.github.io/posts/2024-11-28-reward-hacking/'}


 Regular Tokenizer: 
page_content='Most of the past work on this topic has been quite theoretical and focused on defining or demonstrating 

### Vector Stores for Indexing

A vector store stores embedded data and performs similarity search. It allows us to add documents to the store with `add_documents`, remove stores document by ID with `delete` and query for semantically similar documents with `similarity_search`.

source: https://docs.langchain.com/oss/python/integrations/vectorstores

In [152]:
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import Chroma

vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=OllamaEmbeddings(model="mxbai-embed-large")
)

retriever = vectorstore.as_retriever()

## Part 3: Retrieval

In [153]:
# Index (Same as code block above but with little adjustment on the k)
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import Chroma

vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=OllamaEmbeddings(model="mxbai-embed-large")
)

retriever = vectorstore.as_retriever(search_kwargs={"k": 1})

In [154]:
docs = retriever.invoke("What is Reward Shaping?")
docs

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2024-11-28-reward-hacking/'}, page_content='Reward shaping is a technique used to enrich the reward function, making it easier for the agent to learnâ€”for example, by providing denser rewards. However, a poorly design reward shaping mechanism can alter the trajectory of the optimal policy. Designing effective reward shaping mechanisms is')]

## Part 4: Generation

In [157]:
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate

# Prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the question based only on the following context:\n{context}\n\nQuestion: {question}\n'), additional_kwargs={})])

In [158]:
# LLM
llm = ChatOllama(
    model="llama3.1",
    temperature=0
)

In [159]:
# Chain
chain = prompt | llm

In [160]:
chain.invoke({"context": docs, "question": "What is reward shaping?"})

AIMessage(content='Reward shaping is a technique used to enrich the reward function, making it easier for an agent to learn. This is done by providing denser rewards.', additional_kwargs={}, response_metadata={'model': 'llama3.1', 'created_at': '2025-11-29T05:59:45.541895Z', 'done': True, 'done_reason': 'stop', 'total_duration': 17422204458, 'load_duration': 7207198083, 'prompt_eval_count': 114, 'prompt_eval_duration': 4577449875, 'eval_count': 31, 'eval_duration': 5344062375, 'logprobs': None, 'model_name': 'llama3.1', 'model_provider': 'ollama'}, id='lc_run--8a8c8249-0a5e-4286-b366-f334619434a3-0', usage_metadata={'input_tokens': 114, 'output_tokens': 31, 'total_tokens': 145})

In [161]:
from langchain_classic import hub
prompt_hub_rag = hub.pull("rlm/rag-prompt")

In [162]:
prompt_hub_rag

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])

### Rag Chains

In [164]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is reward shaping?")

'Reward shaping is a technique used to enrich the reward function, making it easier for an agent to learn. This is done by providing denser rewards.'