In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")

In [2]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [None]:
# pip install -qU langchain-qdrant

In [3]:
import os

os.environ["QDRANT_HOST"] = os.getenv("QDRANT_HOST")
os.environ["QDRANT_API_KEY"] = os.getenv("QDRANT_API_KEY")

In [6]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

client = QdrantClient(
    os.getenv("QDRANT_HOST"),
    api_key=os.getenv("QDRANT_API_KEY")
)

In [7]:
# create a collection
client.create_collection(
    collection_name="qdrant_db",
    vectors_config=VectorParams(size=768, distance=Distance.COSINE)
)

True

In [8]:
# Finally, create a vectorstore
vector_store = QdrantVectorStore(
    client=client,
    collection_name="qdrant_db",
    embedding=embedding_model
)

In [9]:
os.environ['USER_AGENT'] = 'myagent'

In [10]:
from langchain_community.document_loaders import WebBaseLoader

url = "https://lilianweng.github.io/posts/2024-11-28-reward-hacking/"

docs = WebBaseLoader(url).load()

In [11]:
docs[0].metadata

{'source': 'https://lilianweng.github.io/posts/2024-11-28-reward-hacking/',
 'title': "Reward Hacking in Reinforcement Learning | Lil'Log",
 'description': 'Reward hacking occurs when a reinforcement learning (RL) agent exploits flaws or ambiguities in the reward function to achieve high rewards, without genuinely learning or completing the intended task. Reward hacking exists because RL environments are often imperfect, and it is fundamentally challenging to accurately specify a reward function.\nWith the rise of language models generalizing to a broad spectrum of tasks and RLHF becomes a de facto method for alignment training, reward hacking in RL training of language models has become a critical practical challenge. Instances where the model learns to modify unit tests to pass coding tasks, or where responses contain biases that mimic a user’s preference, are pretty concerning and are likely one of the major blockers for real-world deployment of more autonomous use cases of AI model

In [18]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=100, chunk_overlap=50
)

doc_chunks = text_splitter.split_documents(docs)

In [25]:
from uuid import uuid4

uuids = [str(uuid4()) for _ in range(len(doc_chunks))]

print("Total uuids:", len(uuids))
print("Few examples:")
uuids[:10]

Total uuids: 200
Few examples:


['ba565260-979c-4cbf-a0b9-99a7d7d7fdf2',
 '93f398ca-3235-484d-8f2d-fd0bc85a632f',
 'f012f642-b9c1-41e8-9576-158cee987e09',
 'ff71db6d-5c17-4255-8c4c-f3d8918ed299',
 '664dec66-07a9-41fa-a1ee-5f5f7d84f9a4',
 '6475e061-982c-4a2e-b958-07e51dee551e',
 'a09036f2-eee9-43cf-b2cf-16aeae5786b3',
 'f0cd9247-3ce4-45bb-b9ce-6df476ff688a',
 '193c2723-f85d-4f7b-8f21-c05f8f24d2a2',
 '32b6bce9-dd1b-4197-98a3-199f1b95cc0b']

In [11]:
# print("Total number of documents inside chunks:", len(doc_chunks))
# print()
# for i, chunk in enumerate(doc_chunks, start=1):
#     print(f"Document {i} metadata: {chunk.metadata}")
#     print(f"Document {i} chunks: {chunk.page_content[:100]}")
#     if i == 5: break
#     print("-" * 100)

In [26]:
vector_store.add_documents(documents=doc_chunks, ids=uuids)

['ba565260-979c-4cbf-a0b9-99a7d7d7fdf2',
 '93f398ca-3235-484d-8f2d-fd0bc85a632f',
 'f012f642-b9c1-41e8-9576-158cee987e09',
 'ff71db6d-5c17-4255-8c4c-f3d8918ed299',
 '664dec66-07a9-41fa-a1ee-5f5f7d84f9a4',
 '6475e061-982c-4a2e-b958-07e51dee551e',
 'a09036f2-eee9-43cf-b2cf-16aeae5786b3',
 'f0cd9247-3ce4-45bb-b9ce-6df476ff688a',
 '193c2723-f85d-4f7b-8f21-c05f8f24d2a2',
 '32b6bce9-dd1b-4197-98a3-199f1b95cc0b',
 '09b3f240-f9e3-4720-bbd0-d5aa91c5fc2e',
 'c73964c3-2e3c-4066-a2fe-2b6bb5083e73',
 'a3c5fe7e-68a5-40b9-8a01-0a83c3fa4fd5',
 '94365194-9e52-44cc-94e7-dfdc6bf0e2c9',
 'b230c4f1-78b4-4812-a1f7-54cf9ad07fde',
 '20f13f8a-26ba-4c63-9400-5b4ab4a9fe0d',
 '6a3952d7-20c9-43c5-8969-f5f571df2a0e',
 'bd4c3abd-294d-4180-ab3d-e2e653ccb600',
 '45d76137-1695-4d7e-88db-31b5b73d81c6',
 'cf4825fa-73a0-4ac6-a12b-c9a16d78d0d1',
 'f7c1268d-172d-4cb8-a673-badf07e0b4aa',
 '0c575b88-8a82-4361-8fe2-e1221d5269cf',
 'cbbb4424-38b1-4cd1-a80d-38c37a8dbee5',
 '4c3c2922-0a3d-4999-953a-4f0cec406eff',
 'a696d6bc-c109-