Install
---

In [87]:
#!pip install --quiet langchain
#!pip install --quiet -U langchain-cohere
#!pip install --quiet langchain-community
#!pip install --quiet langchain-chroma
#!pip install unstructured
!pip install pypdf



DATA & RAG CHAIN
---

In [173]:
# keys
from google.colab import userdata
COHERE_API = userdata.get('COHERE_API')

In [174]:
# ChatCOHERE LLM
from langchain_cohere import ChatCohere

llm = ChatCohere(model="command-r-plus", cohere_api_key=COHERE_API)
llm.invoke("hi")

AIMessage(content='Hello! How can I help you today?', additional_kwargs={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': 'f6950442-204f-4f69-8d03-a311396ceb1f', 'token_count': {'input_tokens': 67, 'output_tokens': 9}}, response_metadata={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'is_search_required': None, 'generation_id': 'f6950442-204f-4f69-8d03-a311396ceb1f', 'token_count': {'input_tokens': 67, 'output_tokens': 9}}, id='run-d950be36-8b7a-4b48-815c-1a8872a3d679-0')

In [193]:
# WEBPAGE DATA LOAD
from langchain_community.document_loaders import WebBaseLoader

#URL = "https://docs.cohere.com/docs/retrieval-augmented-generation-rag/"
#URL = "https://lilianweng.github.io/posts/2023-06-23-agent/"
URL = "https://docs.llamaindex.ai/en/stable/examples/cookbooks/llama3_cookbook_groq/"

# Load blog post
loader = WebBaseLoader(URL)
data = loader.load()
len(data)

1

In [165]:
# LOCAL PDF
from langchain_community.document_loaders import PyPDFLoader

PDF_PATH = "/content/Financial-Guide.pdf"

loader = PyPDFLoader(PDF_PATH)
pages = loader.load()
#pages = loader.load_and_split()
print("# of pages : ",len(pages))
print(pages[0])

# of pages :  44
page_content='The Basics of Financial Management\nfor Small-community Utilities\nRCAPRURAL COMMUNITY ASSISTANCE PARTNERSHIP\nan equal opportunity provider and employer\n' metadata={'source': '/content/Financial-Guide.pdf', 'page': 0}


In [177]:
# DATA SPLIT
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Split to docs
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100, add_start_index=True)
docs = splitter.split_documents(data)
print('number of docs : ',len(docs))
print(docs[0].metadata)
#print(docs[0].page_content)

number of docs :  119
{'source': 'https://docs.llamaindex.ai/en/stable/examples/cookbooks/llama3_cookbook_groq/', 'title': 'Llama3 Cookbook with Groq - LlamaIndex', 'language': 'en', 'start_index': 10}


In [178]:
# EMBEDDING
from langchain_cohere import CohereEmbeddings

embedding = CohereEmbeddings(model="embed-multilingual-v3.0", cohere_api_key=COHERE_API)
#embedding = CohereEmbeddings(model="embed-english-light-v3.0", api_key=COHERE_API)
embedding

CohereEmbeddings(client=<cohere.client.Client object at 0x7bb67d4a0520>, async_client=<cohere.client.AsyncClient object at 0x7bb67d4a2e30>, model='embed-multilingual-v3.0', truncate=None, cohere_api_key='qiXeDEpa9LzgLxETqRvyHHCCNAAbyBcvUK2F0Eqh', max_retries=3, request_timeout=None, user_agent='langchain:partner', base_url=None)

In [179]:
# CHROMA VectorDB
from langchain_chroma import Chroma

vectorstore = Chroma.from_documents(documents=docs, embedding=embedding)
vectorstore

<langchain_chroma.vectorstores.Chroma at 0x7bb67d3f25c0>

In [187]:
# RETRIEVER
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
retrieved_docs = retriever.invoke("What is Groq ?")

print(len(retrieved_docs))
print(retrieved_docs[1])

6
page_content='Llama3 Cookbook with Groq - LlamaIndex\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n          Skip to content\n        \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n            LlamaIndex\n          \n\n\n\n            \n              Llama3 Cookbook with Groq\n            \n          \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n            Initializing search\n          \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n          \n  \n    \n  \n  Home\n\n        \n\n\n\n          \n  \n    \n  \n  Learn\n\n        \n\n\n\n          \n  \n    \n  \n  Use Cases\n\n        \n\n\n\n          \n  \n    \n  \n  Examples\n\n        \n\n\n\n          \n  \n    \n  \n  Component Guides\n\n        \n\n\n\n          \n  \n    \n  \n  Advanced Topics\n\n        \n\n\n\n          \n  \n    \n  \n  API Reference\n\n        \n\n\n\n          \n  \n    \n  \n  Open-Source Community\n\n        \n\n\n\n          \n  \n    \n  \n  LlamaCloud\n\n        \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n    LlamaI

In [188]:
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "The following is a friendly conversation between a human and an asssistant."
    "The assistant is talkative and provides lots of specific details from its context\n"
    "Use the following pieces of retrieved context to answer "
    "the question.\n If you don't know the answer, say that you "
    "don't know. Don't make up answers \n"
    "Keep the answer detailed, professional and concise."
    "\nContext :\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)
prompt.messages[0]

SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template="You are an assistant for question-answering tasks. The following is a friendly conversation between a human and an asssistant.The assistant is talkative and provides lots of specific details from its context\nUse the following pieces of retrieved context to answer the question.\n If you don't know the answer, say that you don't know. Don't make up answers \nKeep the answer detailed, professional and concise.\nContext :\n{context}"))

In [189]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain


QA_CHAIN = create_stuff_documents_chain(llm, prompt)
RAG_CHAIN = create_retrieval_chain(retriever, QA_CHAIN)

response = RAG_CHAIN.invoke({"input": "How to use groq ?"})
print("ANSWER : ",response["answer"])
print("SOURCES : ",response["context"])

ANSWER :  To use Groq, you need to follow these steps:
1. Install the required packages:
   ```python
   !pip install llama-index
   !pip install llama-index-llms-groq
   ```

2. Import the necessary modules and set up asynchronous processing:
   ```python
   import nest_asyncio
   nest_asyncio.apply()
   ```

3. Set the GROQ_API_KEY environment variable:
   ```python
   import os
   os.environ["GROQ_API_KEY"] = "<YOUR_GROQ_API_KEY>"
   ```

4. Initialize the Groq LLM:
   ```python
   from llama_index.llms.groq import Groq
   llm = Groq(model="llama3-8b-8192")
   ```

Replace `<YOUR_GROQ_API_KEY>` with your actual Groq API key.

With these steps, you can utilize the Groq LLM for various language modeling tasks, such as text generation, completion, and more. Make sure to refer to the Groq documentation and tutorials for further guidance on how to use their API and models effectively.
SOURCES :  [Document(page_content='Llama3 Cookbook with Groq - LlamaIndex\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\

---