In [1]:
import os
from dotenv import load_dotenv, find_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI

_ = load_dotenv(find_dotenv())
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
chatModel = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash-lite",
    google_api_key=GEMINI_API_KEY,
    temperature=0.2,
    max_output_tokens=4096
)

In [3]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("data/langchain.txt")
loaded_data = loader.load()
loaded_data

[Document(metadata={'source': 'data/langchain.txt'}, page_content="What is LangChain?\nLangChain is an open source framework for building applications based on large language models (LLMs). LLMs are large deep-learning models pre-trained on large amounts of data that can generate responses to user queriesâ€”for example, answering questions or creating images from text-based prompts. LangChain provides tools and abstractions to improve the customization, accuracy, and relevancy of the information the models generate. For example, developers can use LangChain components to build new prompt chains or customize existing templates. LangChain also includes components that allow LLMs to access new data sets without retraining.\n\nRead about Large Language Models (LLMs)\n\nWhy is LangChain important?\nLLMs excel at responding to prompts in a general context, but struggle in a specific domain they were never trained on. Prompts are queries people use to seek responses from an LLM. For example, 

In [4]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    chunk_size=1000,  
    chunk_overlap=200,
    length_function=len
)
chunks_of_texts = text_splitter.split_documents(loaded_data)
chunks_of_texts

[Document(metadata={'source': 'data/langchain.txt'}, page_content='What is LangChain?\nLangChain is an open source framework for building applications based on large language models (LLMs). LLMs are large deep-learning models pre-trained on large amounts of data that can generate responses to user queriesâ€”for example, answering questions or creating images from text-based prompts. LangChain provides tools and abstractions to improve the customization, accuracy, and relevancy of the information the models generate. For example, developers can use LangChain components to build new prompt chains or customize existing templates. LangChain also includes components that allow LLMs to access new data sets without retraining.\n\nRead about Large Language Models (LLMs)'),
 Document(metadata={'source': 'data/langchain.txt'}, page_content="Read about Large Language Models (LLMs)\n\nWhy is LangChain important?\nLLMs excel at responding to prompts in a general context, but struggle in a specific 

In [5]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(
  model="models/gemini-embedding-001", 
  google_api_key=GEMINI_API_KEY
)

In [6]:
from langchain_community.vectorstores import Chroma

vector_db = Chroma.from_documents(
    documents=chunks_of_texts,
    embedding=embeddings,
)

In [7]:
vector_db

<langchain_community.vectorstores.chroma.Chroma at 0x28ecb44a420>

In [8]:
retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 3})


## LCEL

In [9]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [16]:
prompts = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant that answers questions based on the provided context."),
        ("human", "{context}\n\nQuestion: {question}"),
    ]
)


In [17]:
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

In [18]:
chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompts
    | chatModel
    | StrOutputParser()
)

In [19]:
response = chain.invoke("What are the advantages of LangChain?")
response

'Based on the provided text, here are the advantages of LangChain:\n\n*   **Repurposing Language Models:** Allows organizations to adapt LLMs for specific applications without retraining or fine-tuning. It enables the creation of applications that use proprietary information to augment model responses, like summarizing internal documents. This is achieved through Retrieval Augmented Generation (RAG) workflows, which improve response accuracy and reduce model hallucination.\n*   **Simplifying AI Development:** Abstracts the complexity of data source integrations and prompt refining, allowing developers to customize sequences and build complex applications quickly. It reduces development time by providing templates and libraries.\n*   **Developer Support:** It is open-source and supported by an active community, providing developers with tools to connect language models with external data sources. Organizations can use it for free and receive support from other developers.\n*   **Enablin