In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")

In [4]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [3]:
from langchain_chroma import Chroma

db = Chroma(collection_name="rag-chroma",
            embedding_function=embedding_model,
            persist_directory='./chroma_db')

In [5]:
os.environ['USER_AGENT'] = 'myagent'

In [6]:
from langchain_community.document_loaders import WebBaseLoader

url = "https://lilianweng.github.io/posts/2024-11-28-reward-hacking/"

docs = WebBaseLoader(url).load()

In [7]:
docs[0].metadata

{'source': 'https://lilianweng.github.io/posts/2024-11-28-reward-hacking/',
 'title': "Reward Hacking in Reinforcement Learning | Lil'Log",
 'description': 'Reward hacking occurs when a reinforcement learning (RL) agent exploits flaws or ambiguities in the reward function to achieve high rewards, without genuinely learning or completing the intended task. Reward hacking exists because RL environments are often imperfect, and it is fundamentally challenging to accurately specify a reward function.\nWith the rise of language models generalizing to a broad spectrum of tasks and RLHF becomes a de facto method for alignment training, reward hacking in RL training of language models has become a critical practical challenge. Instances where the model learns to modify unit tests to pass coding tasks, or where responses contain biases that mimic a user’s preference, are pretty concerning and are likely one of the major blockers for real-world deployment of more autonomous use cases of AI model

In [8]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=100, chunk_overlap=50
)

doc_chunks = text_splitter.split_documents(docs)

In [11]:
# print("Total number of documents inside chunks:", len(doc_chunks))
# print()
# for i, chunk in enumerate(doc_chunks, start=1):
#     print(f"Document {i} metadata: {chunk.metadata}")
#     print(f"Document {i} chunks: {chunk.page_content[:100]}")
#     if i == 5: break
#     print("-" * 100)

In [12]:
db.add_documents(doc_chunks)

['52a232ef-43e4-499b-ac6c-07b7fce262ec',
 '8218605d-2c44-42aa-81a6-18f519096964',
 '8721be9d-3430-4111-afe6-ca4e31108d7a',
 '8d554112-6248-479d-acd9-ab4d2c90fa65',
 '180277ea-ad4b-4424-8eca-0a51fe01771f',
 'ed792bcd-771f-4573-8b8d-57fef23ab8dd',
 '28e2731f-3cdf-4067-a259-cc842ca95b00',
 '9abeb8ad-d6a1-4253-a172-0a98a7b200ff',
 '9b1540f1-63e8-4494-aceb-6da3aba58a04',
 '9c916888-9365-49c6-bb90-05bb368a7a85',
 'ff3196ce-aff4-4d99-9ff7-4ae73cdf3dc7',
 '84da14ac-f59d-4f00-81ac-ce6d5a27a9e6',
 '48ba0178-d485-49b4-985e-f5e2221aea7a',
 'd960adf7-678a-47fb-b514-87519598e1e1',
 'e0ec7de5-ccee-416b-8941-2e744e28075b',
 '948b0e0f-06fc-4330-a6e0-4a7d5262a0af',
 '3c1ee260-887f-4769-9e2c-49ea0fac85af',
 'd6fe4466-95b6-49bf-80fb-2253967a762f',
 'd9983810-f5bb-4b46-8460-467faaaa52bd',
 '3aa671e6-326e-4c0e-9f33-e0c7ba9484ca',
 'bdb7baca-56df-408e-a4b7-ac9981324a85',
 '980a1d7b-adbd-4a12-9dbf-be406e00583e',
 '1c24211f-c3ad-45e8-8a65-7b0d1bcb57ea',
 '08f0521b-bb16-468c-a919-2728a2d21906',
 'ae68708d-6cd1-