# Setup

## Install necessary libraries

In [None]:
! pip install langchain_community tiktoken langchainhub langchain langchain-huggingface sentence_transformers chromadb  langchain-qdrant qdrant_client fastembed

## Insert API key


You will need first to get an API key from [Google AI Studio](https://aistudio.google.com/app/apikey).

In [None]:

import os

# GOOGLE_API_KEY = ""  # add your GOOGLE API key here
# os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

#or run this
from google.colab import userdata
from google.colab import drive
os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY")

# Indexing


## 1. Load Data

### From your files

In [None]:
!pip install unstructured
!pip install "unstructured[pdf]"

In [None]:
import os
from google.colab import files
import shutil

# Create the folder
folder_path = os.path.join("/content/", "uploaded_files")
os.makedirs(folder_path, exist_ok=True)  # Create if it doesn't exist

# Upload files
uploaded = files.upload()

# Move uploaded files to the folder
for filename, data in uploaded.items():
  source_path = os.path.join("/content/", filename)  # Path to uploaded file
  destination_path = os.path.join(folder_path, filename)
  shutil.move(source_path, destination_path)  # Move the file

In [None]:
from langchain_community.document_loaders import DirectoryLoader

loader = DirectoryLoader(folder_path)
docs = loader.load()
len(docs)

### From URLs

In [None]:
import bs4

from langchain_community.document_loaders import WebBaseLoader

# Load Documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
)
docs = loader.load()



In [None]:
len(docs)

2

## 2. Use a Text Splitter to Split Documents

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=256, chunk_overlap=0)
splits = text_splitter.split_documents(docs)

In [None]:
len(splits)

241

## 3.Embed the documents and store them

### Load embedding model from Hugging Face

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer
#sentence-transformers/all-MiniLM-L6-v2
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    multi_process=True,
    model_kwargs={"device": "cpu"},
    encode_kwargs={"normalize_embeddings": True},  # Set `True` for cosine similarity

)
print(f"Model's maximum sequence length: {SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2').max_seq_length}")

### Connect to vector store locally , create collection and add the documents

In [None]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

client = QdrantClient(
    path="/content/vector_store_folder"
    #":memory:"
    # you can use :memory: mode for fast and light-weight experiments,
)
client.create_collection(
    collection_name="workshop_collection",
    vectors_config=VectorParams(size=384, distance=Distance.COSINE),
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="workshop_collection",
    embedding=embedding_model
)
vector_store.add_documents(documents=splits)


### Or connect to Qdrant Cloud collection with already indexed documents (hybrid search configuration)

In [None]:
from langchain_qdrant import FastEmbedSparse,QdrantVectorStore,RetrievalMode
from qdrant_client import QdrantClient, models
from qdrant_client.http.models import Distance, SparseVectorParams, VectorParams
sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")

client = QdrantClient(
    # set API KEY for Qdrant Cloud
    url="https://e7739953-b688-421a-837b-6016c3420745.europe-west3-0.gcp.cloud.qdrant.io:6333",
    api_key="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJyIiwiZXhwIjoxNzQ4MzA0OTE3fQ.DkuRnzWd704vhuq-fZJKZydO5genR1oHg2RBZuwD3Xo",
)
vector_store = QdrantVectorStore(
    client=client,
    collection_name="workshop_collection",
    embedding=embedding_model,
    sparse_embedding=sparse_embeddings,
    retrieval_mode=RetrievalMode.HYBRID,
    vector_name="dense",
    sparse_vector_name="sparse",
)

### In case qdrant doesnt work, use this:

In [None]:
vector_store = Chroma.from_documents(documents=splits,
                                    embedding=embedding_model)

# Retrieval

In [None]:
user_query= "What are the three core parts of an agent?" #"Which system became the first AI to earn an IMO medal?"

In [None]:
retrieved_docs = vector_store.similarity_search_with_score(query=user_query, k=5)

In [None]:
retrieved_docs

In [None]:
print("Content:",retrieved_docs[0][0].page_content)
print("Metadata",retrieved_docs[0][0].metadata)
print("Similarity score",retrieved_docs[0][1])

## Create Retriever

In [None]:
retriever = vector_store.as_retriever(search_kwargs={"k": 15})
docs = retriever.get_relevant_documents(user_query)
print(docs[0].page_content)

  docs = retriever.get_relevant_documents(user_query)


Relationships between agents and observations of one agent by another are all taken into consideration for planning and reacting.
Environment information is present in a tree structure.


# Generation

## Define prompt

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate


# Prompt

system_prompt = """You are an assistant for question-answering tasks.
Given a user question and some retrieved article snippets, answer the user question.If the provided context doesn't contain the answer, answer from your knowledge but say that you do,else just say that you don't know, don't try to make up an answer.
Here are the retrieved article snippets :
{context}
"""
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{question}"),
    ]
)


In [None]:
prompt.pretty_print()



## Configure LLM


In [None]:
! pip install langchain-google-genai

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash-preview-05-20",#"gemini-2.0-flash-lite",#"gemini-2.0-flash",#"gemini-2.5-flash-preview-04-17",#
    temperature=0,
    # max_tokens=None,
    # timeout=None,
    # max_retries=2,
)

## Create chain and invoke


In [None]:
from langchain.schema.runnable import RunnablePassthrough

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Question
response=rag_chain.invoke(user_query)

In [None]:
print(response)

## Evaluating the pipeline

In [None]:
! pip install ragas

In [None]:
from ragas import EvaluationDataset

# sample_queries = ["What are the three core parts of an agent?",
#              "Which hypothesis says a single reward can be enough for intelligence?",
#              "Which system became the first AI to earn an IMO medal?",
#              "What are the two basic multi-agent orchestration patterns?",
#              "List the three steps in the guardrail-setup heuristic."
#             ]

# #If you are not interested in the context_recall metric, you don’t need to provide the ground_truths information.
# expected_responses = ["Model, Tools, Instructions",
#                 "'Reward is Enough' hypothesis",
#                 "AlphaProof",
#                 "Manager pattern and Decentralised pattern"],
#                 "1 Focus on privacy & safety 2 Add guardrails for real-world edge cases 3 Tune for both security and user experience"
#                  ]
# #for sample website
sample_queries = ["What three components sit alongside the LLM “brain” in an autonomous agent system?",
             "What are the two main kinds of memory an agent maintains?",
             "Which prompting method tells the model to 'think step by step'?",
             "Which framework fuses reasoning traces with discrete actions inside an agent?",
             "Generative Agents score memories on three factors; name them."
            ]

#If you are not interested in the context_recall metric, you don’t need to provide the ground_truths information.
expected_responses = ["Planning, Memory, Tool use",
                "Short-term memory and Long-term memory",
                "Chain of Thought (CoT)",
                "ReAct",
                "Recency, Importance, Relevance"
                 ]

dataset = []

for query, reference in zip(sample_queries, expected_responses):
    relevant_docs = retriever.invoke(query)
    response = rag_chain.invoke(query)
    dataset.append(
        {
            "user_input": query,
            "retrieved_contexts": [rdoc.page_content for rdoc in relevant_docs],
            "response": response,
            "reference": reference,
        }
    )

evaluation_dataset = EvaluationDataset.from_list(dataset)

In [None]:
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness

evaluator_llm = LangchainLLMWrapper(llm)

result = evaluate(
    dataset=evaluation_dataset,
    metrics=[LLMContextRecall(), Faithfulness(), FactualCorrectness()],
    llm=evaluator_llm,
)



In [None]:
print(result)


# Citations

## Change prompt

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate


# Prompt

system_prompt = """You are an assistant for question-answering tasks.
Given a user question and some retrieved article snippets, answer the user question.If the provided context doesn't contain the answer, answer from your knowledge but say that you do,else just say that you don't know, don't try to make up an answer.
Cite inline each snippet with
When using information from the retrieved articles, cite your sources using the format [source id].For example, if you're using information from source id 1, cite it as [1].
Only cite the most relevant sources that directly support your answer. The citations should be inline not in the end of your response.

Here are the retrieved article snippets:
{context}
"""
citations_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{question}"),
    ]
)


In [None]:
prompt.pretty_print()



## Define Structured Output for the LLM

In [None]:
from pydantic import BaseModel, Field
from typing_extensions import List, TypedDict


class Citation(BaseModel):
    source_id: int = Field(
        ...,
        description="The integer ID of a SPECIFIC source which justifies the answer.",
    )
    source_name: str = Field(
        ...,
        description="The name of the source which justifies the answer.",
    )
    quote: str = Field(
        ...,
        description="The VERBATIM quote from the specified source that justifies the answer.",
    )


class QuotedAnswer(BaseModel):
    """Answer the user question based only on the given sources, and cite the sources used."""

    answer: str = Field(
        ...,
        description="The answer to the user question, which is based only on the given sources,which are also cited inline",
    )
    citations: List[Citation] = Field(
        ..., description="Citations from the given sources that justify the answer."
    )

In [None]:
structured_llm = llm.with_structured_output(QuotedAnswer)


## Create chain and invoke


In [None]:
from langchain.schema.runnable import RunnablePassthrough

# Post-processing
def format_docs(docs):
    formatted_docs = []
    for i, doc in enumerate(docs):  # Use enumerate to get the index
        source = doc.metadata.get('source', 'Unknown Source')
        # Include the index (source_id) before the source name
        formatted_docs.append(f"Source ID: {i+1}\nSource: {source}\nSnippet: {doc.page_content}")
    return "\n\n".join(formatted_docs)

# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | citations_prompt
    | structured_llm
)

# Question
response=rag_chain.invoke(user_query)

In [None]:
print(response.answer)
for citation in response.citations:
  cleaned_quote = citation.quote.replace('\n', '')
  print(f"[{citation.source_id}] Quote: {cleaned_quote}. Source {citation.source_name}")