In [1]:
import os
from dotenv import load_dotenv, find_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI

_ = load_dotenv(find_dotenv())
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
chatModel = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash-lite",
    google_api_key=GEMINI_API_KEY,
    temperature=0.2,
    max_output_tokens=4096
)

In [3]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("data/langchain.txt")
load_data = loader.load()

In [5]:
loaded_data = load_data[0].page_content
loaded_data

"What is LangChain?\nLangChain is an open source framework for building applications based on large language models (LLMs). LLMs are large deep-learning models pre-trained on large amounts of data that can generate responses to user queriesâ€”for example, answering questions or creating images from text-based prompts. LangChain provides tools and abstractions to improve the customization, accuracy, and relevancy of the information the models generate. For example, developers can use LangChain components to build new prompt chains or customize existing templates. LangChain also includes components that allow LLMs to access new data sets without retraining.\n\nRead about Large Language Models (LLMs)\n\nWhy is LangChain important?\nLLMs excel at responding to prompts in a general context, but struggle in a specific domain they were never trained on. Prompts are queries people use to seek responses from an LLM. For example, an LLM can provide an answer to how much a computer costs by provi

## Splitter

In [6]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    separator="\n\n",
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    is_separator_regex= False,
)

In [7]:
texts = text_splitter.create_documents([loaded_data])
texts

[Document(page_content='What is LangChain?\nLangChain is an open source framework for building applications based on large language models (LLMs). LLMs are large deep-learning models pre-trained on large amounts of data that can generate responses to user queriesâ€”for example, answering questions or creating images from text-based prompts. LangChain provides tools and abstractions to improve the customization, accuracy, and relevancy of the information the models generate. For example, developers can use LangChain components to build new prompt chains or customize existing templates. LangChain also includes components that allow LLMs to access new data sets without retraining.\n\nRead about Large Language Models (LLMs)'),
 Document(page_content="Read about Large Language Models (LLMs)\n\nWhy is LangChain important?\nLLMs excel at responding to prompts in a general context, but struggle in a specific domain they were never trained on. Prompts are queries people use to seek responses fr

In [8]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter2 = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    separators=["\n\n", "\n", " ", ""],
)

In [9]:
texts2 = text_splitter2.create_documents([loaded_data])
texts2

[Document(page_content='What is LangChain?\nLangChain is an open source framework for building applications based on large language models (LLMs). LLMs are large deep-learning models pre-trained on large amounts of data that can generate responses to user queriesâ€”for example, answering questions or creating images from text-based prompts. LangChain provides tools and abstractions to improve the customization, accuracy, and relevancy of the information the models generate. For example, developers can use LangChain components to build new prompt chains or customize existing templates. LangChain also includes components that allow LLMs to access new data sets without retraining.\n\nRead about Large Language Models (LLMs)'),
 Document(page_content="Read about Large Language Models (LLMs)\n\nWhy is LangChain important?\nLLMs excel at responding to prompts in a general context, but struggle in a specific domain they were never trained on. Prompts are queries people use to seek responses fr

## Embedding

In [13]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(
  model="models/gemini-embedding-001", 
  google_api_key=GEMINI_API_KEY
)
res = embeddings.embed_query("What's our Q1 revenue?")
res

[-0.03483394905924797,
 0.015264326706528664,
 -0.009936011396348476,
 -0.08367152512073517,
 -0.011280737817287445,
 0.019322767853736877,
 0.007235697004944086,
 -0.02023247629404068,
 -0.04337852820754051,
 0.0014303997159004211,
 0.016797518357634544,
 -0.017435407266020775,
 -0.016660084947943687,
 0.019680600613355637,
 0.11402520537376404,
 -0.011607675813138485,
 0.01964283362030983,
 -0.019136322662234306,
 -0.0015197325265035033,
 0.007405140902847052,
 -0.0041225203312933445,
 0.019905684515833855,
 0.029392894357442856,
 -0.00011375194299034774,
 0.010637006722390652,
 -0.025488853454589844,
 -0.0011877428041771054,
 -0.01818552240729332,
 0.0227174311876297,
 -0.0053983693942427635,
 -0.000539258646313101,
 -0.04067695513367653,
 -0.006042566150426865,
 0.0005785937537439167,
 0.006890262942761183,
 0.004361587110906839,
 -0.0013504556845873594,
 0.0014623762108385563,
 -0.023247357457876205,
 0.006964950356632471,
 -0.01595117524266243,
 0.010770643129944801,
 -0.00317730