In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_astradb import AstraDBVectorStore
from langchain_core.prompts import ChatPromptTemplate
from langchain.pydantic_v1 import BaseModel, Field
from langchain_core.output_parsers import StrOutputParser
from langchain import hub
from langchain.schema import Document
from langgraph.graph import START, StateGraph, END
from typing import Literal, List
from typing_extensions import TypedDict
from dotenv import load_dotenv
import os

In [2]:
load_dotenv()

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
ASTRA_API_KEY = os.getenv("ASTRA_API_KEY")
DB_ENDPOINT = os.getenv("DB_ENDPOINT")
DB_ID = os.getenv("DB_ID")

In [5]:
llm = ChatGoogleGenerativeAI(api_key = GOOGLE_API_KEY, model = "gemini-1.5-pro",temperature = 1)
print(llm.invoke("What is capital of India").content)

The capital of India is **New Delhi**. 



In [7]:
def pdf_2_document(data):
    loader = DirectoryLoader(glob = "*.pdf", loader_cls = PyPDFLoader, path = data)
    docs = loader.load()
    return docs

In [13]:
docs = pdf_2_document("data/")

In [15]:
def text_2_chunks(docs):
    splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 100)
    chunks = splitter.split_documents(docs)
    return chunks

In [16]:
chunks = text_2_chunks(docs)
len(chunks)

2306