# Self-rag
I am following the notebook [here](https://github.com/langchain-ai/langgraph/blob/main/examples/rag/langgraph_self_rag_local.ipynb).

In [19]:
# specify your working directory
working_dir = "/Users/pietromascheroni/open-modular-rag"
# specify the number of retrieved chunks
K = 5

In [20]:
from dotenv import load_dotenv
import os
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from torch import cuda
from typing import Callable
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
import chromadb

import pandas as pd
import re
import string

## Set up the retriever

In [21]:
embed_model_id = 'sentence-transformers/all-mpnet-base-v2'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# Initialize embedding model
embedding_model = HuggingFaceEmbeddings(
    model_name=embed_model_id,
    model_kwargs={'device': device},
    encode_kwargs={'device': device, 'batch_size': 32},
    cache_folder=working_dir + '/emb_model'
)



In [22]:
# ChromaDB setup to initilize collection including indeces of all documents
# (in case of errors, perform pip uninstall chromadb and pip install chromadb)
chroma_client = chromadb.PersistentClient(path=working_dir + "/vectordb")

In [23]:
# provide a name to setup and reference the vector index
collection_name = "more_agents_paper_self_rag"
# initialize the vector index with the respective similarity search metric
vectorstore = chroma_client.get_or_create_collection(collection_name, metadata={"hnsw:space": "cosine"})

In [24]:
print(f"We have {vectorstore.count()} chunks in the vector store")

We have 139 chunks in the vector store


In [25]:
# Load the vectordb as a langchain object
langchain_chroma = Chroma(
    client=chroma_client,
    collection_name=collection_name,
    embedding_function=embedding_model,
)

print("There are", langchain_chroma._collection.count(), "in the langchain-formatted collection")

There are 139 in the langchain-formatted collection


In [26]:
retriever = langchain_chroma.as_retriever(
    search_kwargs={'k': K,
                   #'filter': {'Page': '1'}
                   }
    )

test_query = "LLM performance"
retriever.invoke(test_query)

[Document(page_content='task designed to isolate each one. Consider the task detailed below: To start the analysis, we select two datasets with increasing difficulty, i.e., GSM8K and MATH, to calculate the rela- tive performance gain. The relative performance gain η is given by: η = Pm−Ps where Pm and Ps are the perfor- mances (accuracy) with our method and a single LLM query, respectively. The results are shown in', metadata={'Last Modified': '2024-05-02T21:13:10', 'Page': '6', 'Source': '/Users/pietromascheroni/open-modular-rag/docs/2402.05120v1.pdf'}),
 Document(page_content='based on Table 2. Task Llama2-13B Llama2-70B GPT-3.5-Turbo GSM8K (easy) MATH (hard) 69 200 37 120 16 34 It is noteworthy that the relative performance gain is more substantial with increasing task difficulty. Specifically, we observe that within the same task, the smaller model, Llama2-13B, gains ranging from 28%-200%, but only 8%- 16% over GPT-3.5-Turbo. Moreover, the more challenging task MATH', metadata={'La

## LLMs

### Retrieval Grader

In [30]:
from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq
from langchain_core.output_parsers import JsonOutputParser
from dotenv import load_dotenv
import os

# Load environmental variables
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

# set up the LLM
llm = ChatGroq(temperature=0)

prompt = PromptTemplate(
    template="""You are a grader assessing relevance of a retrieved document to a user question. \n 
    Here is the retrieved document: \n\n {document} \n\n
    Here is the user question: {question} \n
    If the document contains keywords related to the user question, grade it as relevant. \n
    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \n
    Provide the binary score as a JSON with a single key 'score' and no premable or explaination.""",
    input_variables=["question", "document"],
)

retrieval_grader = prompt | llm | JsonOutputParser()
question = "LLM performance"
docs = retriever.invoke(question)
doc_txt = docs[1].page_content
print(retrieval_grader.invoke({"question": question, "document": doc_txt}))

{'score': 'yes'}
