In [14]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_chroma import Chroma
from dotenv import load_dotenv

load_dotenv()
# 3.1 Initialize the Embedding Model
# NOTE: The model and task type are crucial for retrieval accuracy.
embedding = GoogleGenerativeAIEmbeddings(
    model="models/text-embedding-004",  # A strong general-purpose model
    task_type="retrieval_document" # Optimizes the embedding for document search
)

In [11]:
# ! pip install -U langchain-chroma

In [12]:
v1 = Chroma(
    persist_directory='../data/chroma/',
    embedding_function=embedding
)
print(v1._collection.count())


418


In [8]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)


In [21]:
template = """User: {input}
Make sure your renponse is concise and clear and do not exceed three sentences.
"""

class llmPipeline():
    def __init__(self, model="gemini-2.5-flash", prompt_template=template):
        self.llm = ChatGoogleGenerativeAI(model=model)
        self.prompt = ChatPromptTemplate.from_template(prompt_template)
        self.chain = self.prompt | self.llm | StrOutputParser()

    def invoke(self, input_dict={}):
        try:
            response = self.chain.invoke(input_dict)
            return response
        except Exception as e:
            return f"API Test Failed. Ensure the GOOGLE_API_KEY is correctly set in your .env file. Error: {e}"
a = llmPipeline()

result = a.invoke({"input": "what is the beginign of time?"})
print(result)

The scientific consensus is that time began with the Big Bang, approximately 13.8 billion years ago. Before this event, the concepts of space and time as we understand them did not exist. There is no "before" the Big Bang in a temporal sense, as time itself emerged with the universe.


In [22]:
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")

In [25]:
# Build prompt
from langchain.prompts import PromptTemplate
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)

# Run chain
from langchain.chains import RetrievalQA
question = "Summarize the context of the documents in three sentences."
qa_chain = RetrievalQA.from_chain_type(llm,
                                       retriever=v1.as_retriever(),
                                       return_source_documents=True,
                                       chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})


result = qa_chain({"query": question})
result["result"]

"The context describes a technician feeling unjustly accused for efficiently doing his job, while another scene features a person interacting with a policeman to ensure a subsidy is not cut off, sending greetings to the Governor. The policeman, nearing the end of his shift, is focused on paperwork and suggests a follow-up meeting. Separately, a reflective passage discusses gaining knowledge from books and food, recognizing the temporary nature of one's stay. thanks for asking!"

In [None]:
s1 = "..data/text_files/kf100"

s = s1.endswith()
s

<function str.endswith>