In [1]:
from dotenv import load_dotenv
load_dotenv()
import os

In [2]:
LANGSMITH_TRACING="true"
LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
LANGSMITH_API_KEY=os.getenv('langsmith_api_key')
LANGSMITH_PROJECT="pdf-rag-model"
GOOGLE_API_KEY=os.getenv('google_api_key')

In [4]:
os.environ["LANGCHAIN_TRACING_V2"] = LANGSMITH_TRACING
os.environ["LANGCHAIN_ENDPOINT"] = LANGSMITH_ENDPOINT
os.environ["LANGCHAIN_API_KEY"]= LANGSMITH_API_KEY
os.environ["LANGCHAIN_PROJECT"]= LANGSMITH_PROJECT

In [5]:
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

In [6]:
import warnings
warnings.filterwarnings("ignore")

In [7]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
gemini_embeddings = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",
)

In [8]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model = "gemini-1.5-pro", convert_system_message_to_human=True)

In [9]:
print(model.invoke("hi").content)

Hi there! How can I help you today?


In [10]:
from langchain import hub
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import MessagesPlaceholder

In [28]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader('book.txt', encoding='utf-8')
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=40)
docs = text_splitter.split_documents(documents)

In [29]:
docs

[Document(metadata={'source': 'book.txt'}, page_content='We have kicked off the Fresh Thinking Entrepreneurial Challenge for 2024 and are excited to have you grow with us. If you have not seen it yet, the full list of all 12 books we will be reading this'),
 Document(metadata={'source': 'book.txt'}, page_content='of all 12 books we will be reading this year can be found here.'),
 Document(metadata={'source': 'book.txt'}, page_content='The first book we are going to review, for our February challenge book, is GOOD TO GREAT by Jim Collins.'),
 Document(metadata={'source': 'book.txt'}, page_content='Jim Collins’ influential business book, Good to Great: Why Some Companies Make the Leap… and Others Don’t, opens a new window, was released in 2001. The creation of this book was the product of an'),
 Document(metadata={'source': 'book.txt'}, page_content='of this book was the product of an extensive study project in which Collins and his colleagues examined a number of outstanding businesses 

In [30]:
vectorestore = Chroma.from_documents(
    documents=docs,
    embedding=gemini_embeddings,
)

In [43]:
query = "Chapter 6"
docs = vectorestore.similarity_search(query, k=20)
docs

[Document(id='d23a5316-3579-4294-a093-e01962210b6e', metadata={'source': 'book.txt'}, page_content='chapter.'),
 Document(id='48a69b97-5050-4af5-838c-1dce6617dfb5', metadata={'source': 'book.txt'}, page_content='chapter.'),
 Document(id='29cd7944-ac20-44ff-9705-4283a80c0d9c', metadata={'source': 'book.txt'}, page_content='Chapter 4: Confront the Brutal Facts (Yet Never Lose Faith)'),
 Document(id='4db246d8-285c-4e53-8442-9fbb84d45fe9', metadata={'source': 'book.txt'}, page_content='Chapter 4: Confront the Brutal Facts (Yet Never Lose Faith)'),
 Document(id='970ca799-0687-4145-8e8e-186cc08061cf', metadata={'source': 'book.txt'}, page_content='the first chapter. Collins notes that many companies, despite their strong performance, never reach'),
 Document(id='e421f12c-08d6-47e1-a9af-27245507eaff', metadata={'source': 'book.txt'}, page_content='a man who endured years of captivity during the Vietnam War, in this chapter. The paradox entails'),
 Document(id='8272d141-b896-45db-b1e0-656a0fc1

In [31]:
retriever = vectorestore.as_retriever()

In [32]:
retriever

VectorStoreRetriever(tags=['Chroma', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x0000022A3C39BE90>, search_kwargs={})

In [33]:
system_prompt = (
    "You are an assistant for question answering tasks. "
    "Use the following pieces of retrieved context to answer the question. "
    "If you don't know the answer, just say that you don't know. "
    "Use three sentences maximum and keep the answer concise."
    "\n\n"
    "{context}"
)

In [34]:
chat_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [35]:
question_answering_chain = create_stuff_documents_chain(model, chat_prompt)

In [None]:
result = rag_chain = create_retrieval_chain(retriever, question_answering_chain)
result['answer']

In [None]:
result = rag_chain.invoke({"input": "What is Chapter 4 about?"})
result['answer']

{'input': 'What is Chapter 4 about?',
 'context': [Document(id='29cd7944-ac20-44ff-9705-4283a80c0d9c', metadata={'source': 'book.txt'}, page_content='Chapter 4: Confront the Brutal Facts (Yet Never Lose Faith)'),
  Document(id='4db246d8-285c-4e53-8442-9fbb84d45fe9', metadata={'source': 'book.txt'}, page_content='Chapter 4: Confront the Brutal Facts (Yet Never Lose Faith)'),
  Document(id='8272d141-b896-45db-b1e0-656a0fc177ed', metadata={'source': 'book.txt'}, page_content='Collins addresses “the Stockdale Paradox,” a theory that has the name of Admiral James Stockdale, a man who endured years of captivity during the Vietnam War, in this chapter. The paradox entails'),
  Document(id='ffe71a72-e0a0-4a43-aed3-3a6ff00a9b46', metadata={'source': 'book.txt'}, page_content='in this chapter. The paradox entails facing the unflinching reality of a circumstance but simultaneously remaining confident in ultimate achievement. Collins shows how successful businesses have a')],
 'answer': 'Chapter 4

In [None]:
result = rag_chain.invoke({"input": "What is the conclusion?"})
result['answer']

{'input': 'What is the conclusion?',
 'context': [Document(id='aa06744c-5b32-4d99-8f01-cf44bb65cb12', metadata={'source': 'book.txt'}, page_content='The idea of Level 5 Leadership is one of the study’s primary conclusions. Collins characterizes'),
  Document(id='88d1d528-6e1d-42f6-8064-77f60d6a9e11', metadata={'source': 'book.txt'}, page_content='The idea of Level 5 Leadership is one of the study’s primary conclusions. Collins characterizes Level 5 leaders as possessing a special blend of individual humility and organizational will. These'),
  Document(id='ef2e8a0f-b991-404b-995f-7a7b7a0b2f0d', metadata={'source': 'book.txt'}, page_content='Conclusion'),
  Document(id='9303847e-ade8-4e23-ad2d-e37b19e6f682', metadata={'source': 'book.txt'}, page_content='In the conclusion, Collins recaps the key principles of greatness outlined in the book, emphasizing')],
 'answer': 'The conclusion emphasizes the key principles of greatness outlined in the book.  A primary concept is Level 5 Leadership

In [None]:
result = rag_chain.invoke({"input": "What is the Summary of Key Concepts?"})
result['answer']

{'input': 'What is the Summary of Key Concepts?',
 'context': [Document(id='c29edf31-9ba6-4fd2-8744-49f9030eaae6', metadata={'source': 'book.txt'}, page_content='In the conclusion, Collins recaps the key principles of greatness outlined in the book, emphasizing the importance of Level 5 Leadership, the Hedgehog Concept, and the Flywheel Effect. He concludes'),
  Document(id='4a8d2644-94ad-498d-b5b2-3fe2dad6cf7d', metadata={'source': 'book.txt'}, page_content='The last chapter covers the process of going from a decent firm to a great one and maintaining greatness over time. Collins emphasizes the need for enduring core beliefs and concepts that stand the'),
  Document(id='1454f732-403b-40da-8770-f89b843294d4', metadata={'source': 'book.txt'}, page_content='maintain extraordinary levels of performance. We will go into the major ideas and significant'),
  Document(id='d9aa70e5-c805-47bd-aba8-3495abe0035c', metadata={'source': 'book.txt'}, page_content='greatness over time. Collins emphasi

In [None]:
result = rag_chain.invoke({"input": "What is mentioned about Technology Accelerators?"})
result['answer']

{'input': 'What is mentioned about Technology Accelerators?',
 'context': [Document(id='7a7e6126-2d76-4e1a-9072-c93e016b2ebd', metadata={'source': 'book.txt'}, page_content='Technology Accelerators: Rather than being a force for greatness, technology is considered a tool'),
  Document(id='55a02e3e-15a3-43b2-83ab-6290df27ca31', metadata={'source': 'book.txt'}, page_content='Technology Accelerators: Rather than being a force for greatness, technology is considered a tool to enhance core qualities and methods.'),
  Document(id='a92a7bb9-6479-4bc9-952e-8634313b4294', metadata={'source': 'book.txt'}, page_content='Chapter 7: Technology Accelerators'),
  Document(id='a6eef6cc-9c43-4291-aca8-46103f88b476', metadata={'source': 'book.txt'}, page_content='Chapter 7: Technology Accelerators')],
 'answer': 'Technology accelerators are discussed in Chapter 7.  They are considered tools that enhance core qualities and methods, rather than a force for greatness on their own.  The provided context foc

In [None]:
result = rag_chain.invoke({"input": "What is the Summary of Key Concepts?"})
result['answer']