### RAG Basics
##### Boilerplate code

In [None]:
import langchain
import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

load_dotenv()

google_api_key = os.getenv("GOOGLE_API_KEY")
openai_api_key = os.getenv("OPENAI_API_KEY")

google_llm = ChatGoogleGenerativeAI(
    temperature=0, 
    model="gemini-2.0-flash", 
    api_key=google_api_key,
    max_tokens=200
)

openai_llm = ChatOpenAI(
    temperature=0, 
    model="gpt-4", 
    api_key=openai_api_key
)


In [None]:
# openai_llm.invoke("Who is Dahaa in coolie movie 2025 tamil?").content
# print(openai_llm.invoke("What is your knowledge cutoff point? 2023?").content)
# print(google_llm.invoke("What is your knowledge cutoff point? 2023?").content)

As an AI, I don't have a specific knowledge cutoff point. My knowledge is constantly updated as long as I'm connected to the internet. However, please note that the accuracy and timeliness of my information may depend on the sources from which it is derived.
I am a large language model, trained by Google.

My knowledge cutoff is **early 2023**. This means I may not have information about events or developments that occurred after that time.


##### 1. Loading the document

In [57]:
# Load, split, and embed the documents

from langchain_community.document_loaders import TextLoader

loader = TextLoader("./docs_for_rag/coolie_large.txt")

documents = loader.load()

print(documents)

[Document(metadata={'source': './docs_for_rag/coolie_large.txt'}, page_content='Devaraj “Deva” runs a boarding house where he takes care of his late friend Rajasekhar’s daughter Preethi and although Rajasekhar’s sudden death is officially blamed on a heart attack Deva immediately suspects foul play and begins to investigate uncovering a dangerous criminal syndicate led by Simon and his men Dayalan (Dayal) and Kalyani who not only smuggle gold and luxury goods but also secretly kill people using a special cremation-chair device to dispose of bodies with Dayal even murdering an undercover policeman disguised as a coolie while Preethi who knows how the device works becomes a direct target of their sinister operations forcing Deva to step in to protect her gradually revealing shocking truths about the gang including Kalyani’s hidden identity Preethi’s actual relation as Deva’s daughter and Simon’s past connections to Deva’s own history recalling that years ago Deva was a union leader in Ma

##### 2. Splitting the documents (also called as chunking)

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pprint import pprint

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50
)

chunks = text_splitter.split_documents(documents)


print(len(chunks))
pprint(chunks)

5
[Document(metadata={'source': './docs_for_rag/coolie_large.txt'}, page_content='Devaraj “Deva” runs a boarding house where he takes care of his late friend Rajasekhar’s daughter Preethi and although Rajasekhar’s sudden death is officially blamed on a heart attack Deva immediately suspects foul play and begins to investigate uncovering a dangerous criminal syndicate led by Simon and his men Dayalan (Dayal) and Kalyani who not only smuggle gold and luxury goods but also secretly kill people using a special cremation-chair device to dispose of bodies with Dayal even murdering'),
 Document(metadata={'source': './docs_for_rag/coolie_large.txt'}, page_content='to dispose of bodies with Dayal even murdering an undercover policeman disguised as a coolie while Preethi who knows how the device works becomes a direct target of their sinister operations forcing Deva to step in to protect her gradually revealing shocking truths about the gang including Kalyani’s hidden identity Preethi’s actual r

##### 3. Embedding and storing it to vector store

In [63]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

chunk_texts = [document.page_content for document in chunks] 
doc_vectors = embeddings.embed_documents(chunk_texts)
print(doc_vectors[0])

[0.033937811851501465, -0.007642795331776142, -0.010086317546665668, -0.032960403710603714, -0.03154858946800232, 0.039395011961460114, -0.012923519127070904, -0.009156421758234501, -0.018136367201805115, -0.008837406523525715, 0.010520721785724163, 0.022059578448534012, 0.012760616838932037, -0.006227588281035423, 0.008498027920722961, -0.021883100271224976, 0.038716256618499756, -0.011029789224267006, 0.03450796753168106, -0.018774397671222687, -0.028209108859300613, -0.0012955759884789586, -0.004279558081179857, 0.02732672542333603, -0.0009536524885334074, 0.019507454708218575, -0.0009188662515953183, -0.008009323850274086, 0.0073509300127625465, 0.011864659376442432, 0.0009248053538613021, -0.02089211717247963, 0.018136367201805115, -0.018665796145796776, -0.012950669042766094, 0.013181446120142937, -0.02086496725678444, 0.01024921890348196, -0.011932534165680408, 0.009237872436642647, 0.006465153302997351, 0.007323779631406069, -0.004255801439285278, -0.03749449551105499, 0.006767

##### 4. Storing it to vector database

In [64]:
from langchain_community.vectorstores import FAISS

vectorstore = FAISS.from_documents(chunks, embeddings)

results = vectorstore.similarity_search( #asimilarity_search
    "who is dahaa?"
)
print(results)

[Document(id='47f7f45e-3d84-4459-94bd-87b39ff396c6', metadata={'source': './docs_for_rag/coolie_large.txt'}, page_content='network led by a mysterious figure named Daaha suggesting that the story is far from over and that further conflicts and revelations may unfold in the future.'), Document(id='4841b9b9-e1a8-41d7-9954-debb3685bad0', metadata={'source': './docs_for_rag/coolie_large.txt'}, page_content='As the story races toward its climax Deva confronts Simon and Dayal in a series of intense confrontations rescues Preethi from imminent danger and dismantles the syndicate resulting in Simon’s death Dayal’s capture and the workers reclaiming their dignity while the emotional reunion between Deva and Preethi is bittersweet as her full awareness of his identity remains incomplete and the movie closes with a foreboding hint of a larger global criminal network led by a mysterious figure named Daaha'), Document(id='16c24504-d9d6-4afb-b097-bc9e71cb7fb2', metadata={'source': './docs_for_rag/co

##### 5. Making the vectorstore runnable

In [None]:
retriever = vectorstore.as_retriever() # converts the vectorstore to runnable

print(retriever.invoke("Who is dahaa?"))

[Document(id='47f7f45e-3d84-4459-94bd-87b39ff396c6', metadata={'source': './docs_for_rag/coolie_large.txt'}, page_content='network led by a mysterious figure named Daaha suggesting that the story is far from over and that further conflicts and revelations may unfold in the future.'), Document(id='4841b9b9-e1a8-41d7-9954-debb3685bad0', metadata={'source': './docs_for_rag/coolie_large.txt'}, page_content='As the story races toward its climax Deva confronts Simon and Dayal in a series of intense confrontations rescues Preethi from imminent danger and dismantles the syndicate resulting in Simon’s death Dayal’s capture and the workers reclaiming their dignity while the emotional reunion between Deva and Preethi is bittersweet as her full awareness of his identity remains incomplete and the movie closes with a foreboding hint of a larger global criminal network led by a mysterious figure named Daaha'), Document(id='16c24504-d9d6-4afb-b097-bc9e71cb7fb2', metadata={'source': './docs_for_rag/co

##### 5. Feed and get response from LLM

In [68]:
from langchain_core.runnables import RunnablePassthrough

# Create RAG chain using LCEL
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

prompt_template = ChatPromptTemplate.from_template("""
        Use the following context to answer the question:
        Context: {context}
        Question: {question}

        Answer:""")


chain = {"context": retriever, "question": RunnablePassthrough()} | prompt_template | google_llm | StrOutputParser()

print(chain.invoke("Who is Dahaa in coolie movie 2025 tamil?"))
# print(chain.invoke("தாஹா என்பவன் யார்?"))
# print(chain.invoke("Which file holds this info?"))

Based on the provided context, Daaha is a mysterious figure who leads a larger global criminal network. The movie's ending hints at this network, suggesting that the story is not over and further conflicts and revelations may occur.
