In [22]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())


In [23]:
from langchain_community.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader

In [24]:
loader = TextLoader("./data/AgenticAILLM.txt")
loaded_document = loader.load()

In [25]:
loaded_document

[Document(metadata={'source': './data/AgenticAILLM.txt'}, page_content='# Large Language Models in 2025 and Agentic AI:  Predictions and Potential\n\nThis document explores the anticipated state of Large Language Models (LLMs) in 2025 and the emerging field of Agentic AI. It covers potential advancements, challenges, and societal impacts.\n\n## I. Large Language Models in 2025:  Beyond the Hype\n\n**A. Expected Advancements:**\n\n1.  **Improved Contextual Understanding:**  LLMs will likely exhibit significantly improved contextual understanding, moving beyond simple pattern matching to a more nuanced grasp of meaning, intent, and implicit information.  This includes better handling of:\n    *   Ambiguity:  Resolving ambiguous phrases and sentences more accurately.\n    *   Long-Range Dependencies:  Maintaining coherence and consistency over very long texts (e.g., entire books or complex conversations).\n    *   Common Sense Reasoning:  Demonstrating improved common sense and world know

In [26]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
chunks_of_text = text_splitter.split_documents(loaded_document)

In [27]:
len(chunks_of_text)

11

In [28]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

vector_db = FAISS.from_documents(chunks_of_text, embeddings)

In [29]:
vector_db

<langchain_community.vectorstores.faiss.FAISS at 0x1c654d608c0>

In [30]:
retriever = vector_db.as_retriever()

In [31]:
response = retriever.invoke("what are the Challenges and Considerations")

In [34]:
response

[Document(metadata={'source': './data/AgenticAILLM.txt'}, page_content='1.  **Bias and Fairness:**  Addressing biases inherent in training data remains a critical challenge.  Efforts will focus on:\n    *   Developing better techniques for detecting and mitigating bias.\n    *   Creating more diverse and representative training datasets.\n    *   Establishing ethical guidelines for LLM development and deployment.\n\n2.  **Truthfulness and Factuality:**  Ensuring that LLMs generate accurate and truthful information remains a significant hurdle.  This involves:\n    *   Developing methods for grounding LLMs in external knowledge sources (e.g., knowledge graphs, databases).\n    *   Improving techniques for fact-checking and verification.\n    *   Distinguishing between factual claims and opinions.'),
 Document(metadata={'source': './data/AgenticAILLM.txt'}, page_content='3.  **Explainability and Interpretability:**  Understanding how LLMs arrive at their outputs (the "black box" problem)

In [35]:
len(response)

4

In [36]:
retriever = vector_db.as_retriever(search_kwargs={"k": 3})

In [38]:
response = retriever.invoke("what are the Challenges and Considerations")

In [39]:
response

[Document(metadata={'source': './data/AgenticAILLM.txt'}, page_content='1.  **Bias and Fairness:**  Addressing biases inherent in training data remains a critical challenge.  Efforts will focus on:\n    *   Developing better techniques for detecting and mitigating bias.\n    *   Creating more diverse and representative training datasets.\n    *   Establishing ethical guidelines for LLM development and deployment.\n\n2.  **Truthfulness and Factuality:**  Ensuring that LLMs generate accurate and truthful information remains a significant hurdle.  This involves:\n    *   Developing methods for grounding LLMs in external knowledge sources (e.g., knowledge graphs, databases).\n    *   Improving techniques for fact-checking and verification.\n    *   Distinguishing between factual claims and opinions.'),
 Document(metadata={'source': './data/AgenticAILLM.txt'}, page_content='3.  **Explainability and Interpretability:**  Understanding how LLMs arrive at their outputs (the "black box" problem)

## Simple use with LCEL

In [40]:
from langchain_google_genai import ChatGoogleGenerativeAI



from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [41]:
template = """Answer the question based only on the following context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

model = ChatGoogleGenerativeAI(model="gemini-pro")


In [42]:
def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])


chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)


In [43]:
response = chain.invoke("what are the Challenges and Considerations")

In [44]:
response

'**Challenges and Considerations for LLM Development and Deployment:**\n\n* **Bias and Fairness:**\n    * Detecting and mitigating bias in training data\n    * Creating diverse and representative datasets\n    * Establishing ethical guidelines\n\n* **Truthfulness and Factuality:**\n    * Grounding LLMs in external knowledge sources\n    * Fact-checking and verification\n    * Distinguishing factual claims from opinions\n\n* **Explainability and Interpretability:**\n    * Visualizing and interpreting LLM decision-making processes\n    * Creating transparent and explainable architectures\n\n* **Security and Misuse:**\n    * Detecting and preventing LLM misuse\n    * Establishing legal and regulatory frameworks\n\n* **Data Privacy:**\n    * Protecting user data used for training and interaction\n    * Employing techniques like federated learning, differential privacy, and secure multi-party computation'

In [45]:
type(response)

str