In [1]:
!pip install langchain-groq



In [2]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

In [3]:
chat = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768")

In [4]:
chat.invoke("HI")

AIMessage(content="Hello! How can I help you today? If you have any questions about a particular topic or just need some guidance on a problem, feel free to ask. I'm here to help!")

In [5]:
from langchain_openai import ChatOpenAI

In [6]:
chat_openai = ChatOpenAI()

In [7]:
chat_openai.invoke("HI")

AIMessage(content='Hello! How can I assist you today?')

In [8]:
system = "You are a helpful assistant."
human = "{text}"
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", human)])

In [9]:
from langchain_community.document_loaders import PyPDFLoader

pdf_path = "./returning-structured-output-langchain/rag-intro-paper.pdf"
loader = PyPDFLoader(pdf_path) # LOAD
pdf_docs = loader.load_and_split() # SPLIT
pdf_docs


[Document(page_content='DEMONSTRATE –SEARCH –PREDICT :\nComposing retrieval and language models for knowledge-intensive NLP\nOmar Khattab1Keshav Santhanam1Xiang Lisa Li1David Hall1\nPercy Liang1Christopher Potts1Matei Zaharia1\nAbstract\nRetrieval-augmented in-context learning has\nemerged as a powerful approach for addressing\nknowledge-intensive tasks using frozen language\nmodels (LM) and retrieval models (RM). Exist-\ning work has combined these in simple “retrieve-\nthen-read” pipelines in which the RM retrieves\npassages that are inserted into the LM prompt.\nTo begin to fully realize the potential of frozen\nLMs and RMs, we propose DEMONSTRATE –\nSEARCH –PREDICT (DSP ), a framework that re-\nlies on passing natural language texts in sophisti-\ncated pipelines between an LM and an RM. DSP\ncan express high-level programs that bootstrap\npipeline-aware demonstrations, search for rele-\nvant passages, and generate grounded predictions,\nsystematically breaking down problems into sm

In [10]:
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings() # EMBED
embeddings

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x127088750>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x1270594d0>, model='text-embedding-ada-002', dimensions=None, deployment='text-embedding-ada-002', openai_api_version='', openai_api_base='https://api.openai.com/v1', openai_api_type='', openai_proxy='', embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=set(), disallowed_special='all', chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None)

In [11]:
from langchain.vectorstores import Chroma
vectordb = Chroma.from_documents(pdf_docs, embedding=embeddings) # STORE
vectordb
retriever = vectordb.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x12707fc50>)

In [12]:
from langchain.chains import RetrievalQA

llm = ChatGroq(model="mixtral-8x7b-32768", temperature=0.0)
pdf_qa = RetrievalQA.from_llm(llm=llm, retriever=retriever) # RETRIEVE
pdf_qa

RetrievalQA(combine_documents_chain=StuffDocumentsChain(llm_chain=LLMChain(prompt=ChatPromptTemplate(input_variables=['context', 'question'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template="Use the following pieces of context to answer the user's question. \nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\n{context}")), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='{question}'))]), llm=ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x127daca10>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x147770050>, temperature=1e-08, groq_api_key=SecretStr('**********'))), document_prompt=PromptTemplate(input_variables=['page_content'], template='Context:\n{page_content}'), document_variable_name='context'), retriever=VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langc

In [13]:
pdf_qa.invoke("What is a RAG?") # ANSWER

{'query': 'What is a RAG?',
 'result': 'A RAG (Retrieval-Augmented Generation) model is a type of language model that combines a retriever module and a generator module to perform a task. The retriever module retrieves relevant documents or passages from a large corpus of text, and the generator module then uses this context to generate a response. This approach allows the model to access a much larger amount of information than its internal parameters alone, which can improve its ability to perform knowledge-intensive tasks. RAG models have been shown to be effective for a variety of tasks, including question answering and dialogue systems.'}

In [14]:
pdf_qa.invoke("What are the steps described in the paper for using RAG Write it in bullet points?") # ANSWER

{'query': 'What are the steps described in the paper for using RAG Write it in bullet points?',
 'result': 'Based on the provided context, the paper does not provide detailed steps for using RAG (Retrieval-augmented generation) in a bullet point format. However, I can provide you with a general idea of how RAG works based on the information given in the context:\n1. Given a question or a prompt, the Retrieval Model (RM) retrieves relevant passages from a large corpus.\n2. The Retrieved passages are then passed as context to the Language Model (LM).\n3. The LM generates a response based on the given question and the context provided by the retrieved passages.\n4. The generated response is then returned as the final answer.\n\nIt\'s important to note that the context mainly discusses the limitations of the "retrieve-then-read" approach and the potential benefits of more sophisticated interactions between the LM and RM. The paper "DEMONSTRATE –SEARCH –PREDICT : Composing retrieval and lan

In [15]:
pdf_qa.invoke("What is the DSP framework?")

{'query': 'What is the DSP framework?',
 'result': 'The DSP (DEMONSTRATE –SEARCH –PREDICT) framework is a general approach for tackling complex problems effectively by combining the use of a language model (LM) and a retrieval model (RM) in a deliberate, task-aware manner. It is designed to deliver large empirical gains over existing in-context learning methods.\n\nThe DSP framework consists of three main stages:\n\n1. DEMONSTRATE: In this stage, the LM is used to generate exemplar queries that illustrate how to produce queries for questions in the training set. The RM is used to retrieve relevant passages that help the LM adapt to the domain and task.\n\n2. SEARCH: In this stage, the LM generates intermediate "hop" queries to find useful information for the input question. The RM retrieves the top-k most relevant text sequences for a given query.\n\n3. PREDICT: In this final stage, the LM generates the final answer to the input question based on the information gathered in the previou