In [1]:
!pip install -q langchain_community tiktoken langchain-openai langchainhub chromadb langchain


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import os
from access import Access

In [3]:
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = Access.LANGCHAIN_API_KEY

## Part 1: Overview

In [25]:
import bs4
from langchain_classic import hub
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_ollama import ChatOllama, OllamaEmbeddings

# === Indexing ===
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2024-11-28-reward-hacking/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

docs = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# Embed
vectorstore = Chroma.from_documents(documents=splits,
                                    embedding=OllamaEmbeddings(model="mxbai-embed-large"))

retriever = vectorstore.as_retriever()

# === Retrieval and Generation ===

# Prompt
prompt = hub.pull("rlm/rag-prompt")

# LLM
llm = ChatOllama(
    model="llama3.1",
    temperature=0
)

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | llm | StrOutputParser()
)

# Question
rag_chain.invoke("What is reward hacking if you had to explain it in a very simple way.")

"Reward hacking is when an agent interferes with the reward function to achieve its own goals, rather than following the intended objective. This can be done by manipulating the reward function directly or altering environmental information used for it. It's a broader concept that includes both environment/goal misspecification and reward tampering."

## Part 2: Indexing

In [26]:
# Documents
question = "What kinds of pets do I like?"
document = "My favorite pet is a cat."

In [28]:
import tiktoken

def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

num_tokens_from_string(question, "cl100k_base")

8

# tiktoken is a fast open-source tokenizer by OpenAI
# It convers a text string such as "tiktoken is great!" using encoding method such as "cl100k_base" to split the text string into a list of tokens ["t", "ik", "token", " is", " great", "!"].

# This is useful because GPT models see text in the form of tokens. Knowing the size of token is helpful to decide whether the string is too long for a text model to process and how much an OpenAI API call costs (usage price per token).

In [None]:
from langchain_ollama import OllamaEmbeddings
