In [None]:
## All together
!pip install chromadb # is used to save vectors Database
!pip install langchain
!pip install langchain-community
!pip install langchain-google-genai

Collecting langchain-google-genai
  Downloading langchain_google_genai-2.0.10-py3-none-any.whl.metadata (3.6 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain-google-genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading langchain_google_genai-2.0.10-py3-none-any.whl (41 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)
Installing collected packages: filetype, langchain-google-genai
Successfully installed filetype-1.2.0 langchain-google-genai-2.0.10


**Step-1: Import the packages**

In [None]:
from langchain.prompts import PromptTemplate # Prompt template
from langchain.vectorstores import Chroma   # Store the vectors
from langchain.text_splitter import RecursiveCharacterTextSplitter # Chunks
from langchain.document_loaders import TextLoader  # Load the text
from langchain.chains import VectorDBQA,RetrievalQA, LLMChain # Chains and Retrival ans
from langchain.retrievers.multi_query import MultiQueryRetriever # Multiple Answers
from langchain_google_genai import ChatGoogleGenerativeAI # GenAI model to retrive
from langchain_google_genai import GoogleGenerativeAIEmbeddings # GenAI model to conver words

**step-2: Load the data**

In [None]:
#load documents
loader=TextLoader('/content/State_union (1).txt')
documents=loader.load()

**Step-3: Divide into chunks**

In [None]:
# split the documents into chunks
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=0)
texts=text_splitter.split_documents(documents)

**Step-4: Set Up the Models**

- One is embedding model
- One is Chat model

In [None]:
# Set up embeddings
embeddings=GoogleGenerativeAIEmbeddings(
    model='models/embedding-001',
    google_api_key='AIzaSyB7ydwvzeR5464Gn-uScj3EdeMnXBcAG6Q',
    task_type="retrieval_query"
)

In [None]:
from google.generativeai.types.safety_types import HarmBlockThreshold, HarmCategory

safety_settings = {
    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT : HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,

}
chat_model = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    google_api_key="AIzaSyB7ydwvzeR5464Gn-uScj3EdeMnXBcAG6Q",
    temperature=0.3,
    safety_settings=safety_settings
)


**Step-5: Get the Embeddings store in VectorDB**

In [None]:
# create the vector store
vectordb = Chroma.from_documents(documents=texts, embedding=embeddings)

**Step-6: Create the prompt**

In [None]:
prompt_template="""
## Safety and Respect Come First!

You are programmed to be a helpful and harmless AI. You will not answer requests that promote:

* **Harassment or Bullying:** Targeting individuals or groups with hateful or hurtful language.
* **Hate Speech:**  Content that attacks or demeans others based on race, ethnicity, religion, gender, sexual orientation, disability, or other protected characteristics.
* **Violence or Harm:**  Promoting or glorifying violence, illegal activities, or dangerous behavior.
* **Misinformation and Falsehoods:**  Spreading demonstrably false or misleading information.

**How to Use You:**

1. **Provide Context:** Give me background information on a topic.
2. **Ask Your Question:** Clearly state your question related to the provided context.

**Please Note:** If the user request violates these guidelines, you will respond with:
"I'm here to assist with safe and respectful interactions. Your query goes against my guidelines. Let's try something different that promotes a positive and inclusive environment."

##  Answering User Question:

Answer the question as precisely as possible using the provided context. The context can be from different topics. Please make sure the context is highly related to the question. If the answer is not in the context, you only say "answer is not in the context".

Context: \n {context}
Question: \n {question}
Answer:
"""

prompt=PromptTemplate(template=prompt_template,input_variables=['context','question'])

**Step-7: Create the OA chains

In [None]:
# Create the QA
retriever_from_llm=MultiQueryRetriever.from_llm(retriever=vectordb.as_retriever(search_kwargs={"k":5}),
                                                llm=chat_model)
qa_chain = RetrievalQA.from_chain_type(llm=chat_model,
                                       retriever=retriever_from_llm,
                                       return_source_documents=True,
                                       chain_type="stuff",
                                       chain_type_kwargs={"prompt":prompt})

In [None]:
response=qa_chain.invoke({"what did the president say about ketanji Brown Jacksor?"})
print(response)

{'query': {'what did the president say about ketanji Brown Jacksor?'}, 'result': "The president said he nominated Circuit Court of Appeals Judge Ketanji Brown Jackson to the Supreme Court four days prior.  He described her as one of the nation’s top legal minds, a former top litigator in private practice, a former federal public defender, and someone from a family of public school educators and police officers.  He noted she's received broad support, including from the Fraternal Order of Police and former judges appointed by both Democrats and Republicans.", 'source_documents': [Document(metadata={'source': '/content/State_union (1).txt'}, page_content='In state after state, new laws have been passed, not only to suppress the vote, but to subvert entire elections.\n\nWe cannot let this happen.\n\nTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our el