In [52]:
%load_ext autoreload
%autoreload 2

def warn(*args, **kwargs):
       pass
import warnings
warnings.warn = warn
warnings.filterwarnings('ignore')

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_classic.prompts import PromptTemplate
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import Chroma
from langchain_classic.chains import create_retrieval_chain, ConversationalRetrievalChain
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_classic.memory import ConversationBufferMemory
from langchain_google_genai import GoogleGenerativeAI, ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings as GoogleEmbedder
from langchain_classic.embeddings import HuggingFaceEmbeddings
from langchain.agents import create_agent
from langchain.agents.middleware import dynamic_prompt, ModelRequest

import pprint
import os
import wget
from dotenv import load_dotenv
load_dotenv()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


True

In [4]:
api_key = os.getenv('GEMINI_API_KEY')

In [37]:
model_id = 'gemini-2.0-flash-lite'

model = GoogleGenerativeAI(
       model=model_id,
       api_key=api_key,
       temperature=0.5,
       max_retries=2,
)


### Downloading the file

In [7]:
filename = 'pracdoc.txt'
url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/XVnuuEg94sAE4S_xAsGxBA.txt'

wget.download(url=url, out=filename)

'pracdoc (1).txt'

In [32]:
loader = TextLoader(filename)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100, add_start_index=True)
chunks = text_splitter.split_documents(documents)
print(len(chunks))

43


In [54]:
embedder = GoogleEmbedder(model='models/text-embedding-004', google_api_key=api_key)
docsearch = Chroma.from_documents(documents=chunks, embedding=embedder)

In [55]:
@dynamic_prompt
def prompt_with_context(request: ModelRequest) -> str:
       last_query = request.state['messages'][-1].text
       retrieved_docs = docsearch.similarity_search(last_query)

       docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)

       system_message = (
              "You are a helpful assistant. Use the following context in your response:"
              f"\n\n{docs_content}"
       )

       return system_message

agent = create_agent(model=model, tools=[], middleware=[prompt_with_context])

In [56]:
query = "Summarize the document"
for step in agent.stream(
       {'messages': [{'role': 'user', 'content': query}]},
       stream_mode='values',
):
       step['messages'][-1].pretty_print()


Summarize the document

This document discusses several key issues:

*   **COVID-19:** It acknowledges the impact of COVID-19 and announces a shift towards more normal routines due to progress made, including new mask guidelines from the CDC.
*   **Ukraine:** It addresses the Russian invasion of Ukraine, expressing support for the Ukrainian people and condemning the actions of the Russian dictator. The document highlights actions taken by the US, including sanctions and the release of oil reserves.
*   **Domestic Policy:** It touches on domestic issues such as gun violence, advocating for measures like universal background checks, banning assault weapons, and repealing the liability shield for gun manufacturers. It also addresses voting rights, expressing concern over new laws that suppress and subvert elections.
