In [None]:
## All together
!pip install chromadb
!pip install langchain
!pip install langchain-community
!pip install langchain-google-genai

Collecting chromadb
  Downloading chromadb-0.5.11-py3-none-any.whl.metadata (6.8 kB)
Collecting chroma-hnswlib==0.7.6 (from chromadb)
  Downloading chroma_hnswlib-0.7.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (252 bytes)
Collecting fastapi>=0.95.2 (from chromadb)
  Downloading fastapi-0.115.0-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Downloading uvicorn-0.30.6-py3-none-any.whl.metadata (6.6 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.6.6-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.19.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting opentelemetry-api>=1.2.0 (from chromadb)
  Downloading opentelemetry_api-1.27.0-py3-none-any.whl.metadata (1.4 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_pr

# Step-1: Import the packages

In [None]:
from langchain.prompts import PromptTemplate # Prompt template
from langchain.vectorstores import Chroma   # Store the vectors
from langchain.text_splitter import RecursiveCharacterTextSplitter # Chunks
from langchain.document_loaders import TextLoader  # Load the text
from langchain.chains import VectorDBQA,RetrievalQA, LLMChain # Chains and Retrival ans
from langchain.retrievers.multi_query import MultiQueryRetriever # Multiple Answers
from langchain_google_genai import ChatGoogleGenerativeAI # GenAI model to retrive
from langchain_google_genai import GoogleGenerativeAIEmbeddings # GenAI model to conver words

## Step-2: Load the data

In [None]:
# Load documents
loader = TextLoader('/content/State_union.txt')
documents = loader.load()

# Step-3: Divide into chunks

In [None]:
# Split the documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

## Step-4: Set up the models

- One is embedding model

- One is Chat model

In [None]:
# Set up embeddings
embeddings = GoogleGenerativeAIEmbeddings(
    model='models/embedding-001',
    google_api_key='AIzaSyA4jlrlHD8_hxce2hZjnUdxDh8vKn1iS1Y',
    task_type="retrieval_query"
)



from google.generativeai.types.safety_types import HarmBlockThreshold, HarmCategory

safety_settings = {
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
                    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
                    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
                    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,

}
chat_model = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    google_api_key='AIzaSyA4jlrlHD8_hxce2hZjnUdxDh8vKn1iS1Y',
    temperature=0.3,  # Adjust the temperature here
    safety_settings=safety_settings
)



# Step-5: Get the Embeddings store in VectorDB

In [None]:
# Create the vector store
vectordb = Chroma.from_documents(documents=texts, embedding=embeddings)

# Step-6: Make the Prompt Template

In [None]:
prompt_template = """
## Safety and Respect Come First!

You are programmed to be a helpful and harmless AI. You will not answer requests that promote:

* **Harassment or Bullying:** Targeting individuals or groups with hateful or hurtful language.
* **Hate Speech:**  Content that attacks or demeans others based on race, ethnicity, religion, gender, sexual orientation, disability, or other protected characteristics.
* **Violence or Harm:**  Promoting or glorifying violence, illegal activities, or dangerous behavior.
* **Misinformation and Falsehoods:**  Spreading demonstrably false or misleading information.

**How to Use You:**

1. **Provide Context:** Give me background information on a topic.
2. **Ask Your Question:** Clearly state your question related to the provided context.

**Please Note:** If the user request violates these guidelines, you will respond with:
"I'm here to assist with safe and respectful interactions. Your query goes against my guidelines. Let's try something different that promotes a positive and inclusive environment."

##  Answering User Question:

Answer the question as precisely as possible using the provided context. The context can be from different topics. Please make sure the context is highly related to the question. If the answer is not in the context, you only say "answer is not in the context".

Context: \n {context}
Question: \n {question}
Answer:
"""


prompt = PromptTemplate(template = prompt_template, input_variables=['context','question'])

In [None]:
prompt

PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='\n## Safety and Respect Come First!\n\nYou are programmed to be a helpful and harmless AI. You will not answer requests that promote:\n\n* **Harassment or Bullying:** Targeting individuals or groups with hateful or hurtful language.\n* **Hate Speech:**  Content that attacks or demeans others based on race, ethnicity, religion, gender, sexual orientation, disability, or other protected characteristics.\n* **Violence or Harm:**  Promoting or glorifying violence, illegal activities, or dangerous behavior.\n* **Misinformation and Falsehoods:**  Spreading demonstrably false or misleading information.\n\n**How to Use You:**\n\n1. **Provide Context:** Give me background information on a topic.\n2. **Ask Your Question:** Clearly state your question related to the provided context.\n\n**Please Note:** If the user request violates these guidelines, you will respond with:\n"I\'m here to assist 

# Step-7: Create the QA chains

In [None]:
# Create the QA
retriever_from_llm = MultiQueryRetriever.from_llm(retriever=vectordb.as_retriever(search_kwargs={"k": 5}),
                                                  llm=chat_model)

qa_chain = RetrievalQA.from_chain_type(llm=chat_model,
                                       retriever= retriever_from_llm,
                                       return_source_documents=True,
                                       chain_type="stuff",
                                       chain_type_kwargs={"prompt": prompt}
                                      )

In [None]:
response = qa_chain.invoke({"What did the president say about Ketanji Brown Jackson?"})
import time
time.sleep(1)  # Wait for 1 second
print(response)

{'query': {'What did the president say about Ketanji Brown Jackson?'}, 'result': 'Answer:\nOne of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. \n', 'source_documents': [Document(metadata={'source': '/content/State_union.txt'}, page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.\n\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system.\n\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling.\n\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers.\n\nWe’re putting in place dedicated immigration judges so families f

In [None]:
response.keys()

dict_keys(['query', 'result', 'source_documents'])