In [6]:
import chardet

with open("speech.txt", "rb") as f:
    raw = f.read()
print(chardet.detect(raw))


{'encoding': 'utf-8', 'confidence': 0.99, 'language': ''}


In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter

loader=TextLoader("speech.txt", encoding='utf-8')
documents=loader.load()
text_splitter=CharacterTextSplitter(chunk_size=200, chunk_overlap=30)
docs=text_splitter.split_documents(documents)

Created a chunk of size 443, which is longer than the specified 200
Created a chunk of size 330, which is longer than the specified 200
Created a chunk of size 316, which is longer than the specified 200
Created a chunk of size 339, which is longer than the specified 200
Created a chunk of size 298, which is longer than the specified 200


In [2]:
docs

[Document(metadata={'source': 'speech.txt'}, page_content='Today, we stand at the threshold of a new era in artificial intelligence—one defined not by passive assistants that wait for our commands, but by proactive agents that think ahead, learn our preferences, and execute complex tasks on our behalf. At Google I/O 2025, Sundar Pichai and his team unveiled a sweeping vision for these “agentic” AI capabilities, signaling that traditional search and simple chatbots are soon to be relics of the past'),
 Document(metadata={'source': 'speech.txt'}, page_content='One of the marquee announcements was Agent Mode in the Gemini app, a paradigm shift from reactive to directive AI. Instead of merely answering your questions, Gemini can now autonomously plan your day, book meetings, and coordinate multi-step projects—all tailored to your personal data housed in Gmail, Calendar, Maps, and beyond'),
 Document(metadata={'source': 'speech.txt'}, page_content='Complementing Agent Mode is Project Marine

In [3]:
embeddings=OllamaEmbeddings(model="llama2:7b")
db=FAISS.from_documents(docs, embeddings)
db

  embeddings=OllamaEmbeddings(model="llama2:7b")


<langchain_community.vectorstores.faiss.FAISS at 0x262dc61c6d0>

In [4]:
### querying
query="Describe Project Mariner’s capabilities and use cases as explained in the speech."
docs=db.similarity_search(query)
docs


[Document(id='87ee1e2f-f4e9-41a0-a17c-9ce5a76fc7ba', metadata={'source': 'speech.txt'}, page_content='Complementing Agent Mode is Project Mariner, Google’s experimental web-browsing AI agent. Mariner can visit dozens of websites in parallel, synthesize information, and deliver concise, actionable insights—whether you’re researching market trends or troubleshooting a technical issue—saving you hours of manual search'),
 Document(id='7f79e636-ddc5-41a7-ae57-4d9649a3d112', metadata={'source': 'speech.txt'}, page_content='In closing, Google’s new agent AI initiative represents more than incremental improvements—it’s a fundamental reimagining of how we interact with technology. By shifting from reactive queries to proactive, goal-driven assistance, Google is empowering individuals and organizations to achieve more with less effort. As these agents become ever more capable and integrated into our digital lives, the possibilities for innovation are limitless. Thank you.'),
 Document(id='7095d

In [14]:
docs[0].page_content

'Complementing Agent Mode is Project Mariner, Google’s experimental web-browsing AI agent. Mariner can visit dozens of websites in parallel, synthesize information, and deliver concise, actionable insights—whether you’re researching market trends or troubleshooting a technical issue—saving you hours of manual search'

In [15]:
##Retriever

In [16]:
retriever=db.as_retriever()

In [17]:
retriever.invoke(query)

[Document(id='afaded0b-de5f-4016-8fd3-5bcb9fc7ea81', metadata={'source': 'speech.txt'}, page_content='Complementing Agent Mode is Project Mariner, Google’s experimental web-browsing AI agent. Mariner can visit dozens of websites in parallel, synthesize information, and deliver concise, actionable insights—whether you’re researching market trends or troubleshooting a technical issue—saving you hours of manual search'),
 Document(id='877f7345-19a1-473b-bd65-4bd2adaf3f3b', metadata={'source': 'speech.txt'}, page_content='In closing, Google’s new agent AI initiative represents more than incremental improvements—it’s a fundamental reimagining of how we interact with technology. By shifting from reactive queries to proactive, goal-driven assistance, Google is empowering individuals and organizations to achieve more with less effort. As these agents become ever more capable and integrated into our digital lives, the possibilities for innovation are limitless. Thank you.'),
 Document(id='ed3ce

In [18]:
### similarity search with score
docs_and_score=db.similarity_search_with_score(query)
docs_and_score

[(Document(id='afaded0b-de5f-4016-8fd3-5bcb9fc7ea81', metadata={'source': 'speech.txt'}, page_content='Complementing Agent Mode is Project Mariner, Google’s experimental web-browsing AI agent. Mariner can visit dozens of websites in parallel, synthesize information, and deliver concise, actionable insights—whether you’re researching market trends or troubleshooting a technical issue—saving you hours of manual search'),
  25508.309),
 (Document(id='877f7345-19a1-473b-bd65-4bd2adaf3f3b', metadata={'source': 'speech.txt'}, page_content='In closing, Google’s new agent AI initiative represents more than incremental improvements—it’s a fundamental reimagining of how we interact with technology. By shifting from reactive queries to proactive, goal-driven assistance, Google is empowering individuals and organizations to achieve more with less effort. As these agents become ever more capable and integrated into our digital lives, the possibilities for innovation are limitless. Thank you.'),
  2

In [19]:
embedding_vector=embeddings.embed_query(query)
query

'Describe Project Mariner’s capabilities and use cases as explained in the speech.'

In [24]:
docs_score=db.similarity_search_by_vector(embedding_vector)
docs_score

[Document(id='afaded0b-de5f-4016-8fd3-5bcb9fc7ea81', metadata={'source': 'speech.txt'}, page_content='Complementing Agent Mode is Project Mariner, Google’s experimental web-browsing AI agent. Mariner can visit dozens of websites in parallel, synthesize information, and deliver concise, actionable insights—whether you’re researching market trends or troubleshooting a technical issue—saving you hours of manual search'),
 Document(id='877f7345-19a1-473b-bd65-4bd2adaf3f3b', metadata={'source': 'speech.txt'}, page_content='In closing, Google’s new agent AI initiative represents more than incremental improvements—it’s a fundamental reimagining of how we interact with technology. By shifting from reactive queries to proactive, goal-driven assistance, Google is empowering individuals and organizations to achieve more with less effort. As these agents become ever more capable and integrated into our digital lives, the possibilities for innovation are limitless. Thank you.'),
 Document(id='ed3ce

In [25]:
###saving and load

In [26]:
db.save_local("faiss_index")

In [28]:
new_df=FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
docs=new_df.similarity_search(query)

In [29]:
docs

[Document(id='afaded0b-de5f-4016-8fd3-5bcb9fc7ea81', metadata={'source': 'speech.txt'}, page_content='Complementing Agent Mode is Project Mariner, Google’s experimental web-browsing AI agent. Mariner can visit dozens of websites in parallel, synthesize information, and deliver concise, actionable insights—whether you’re researching market trends or troubleshooting a technical issue—saving you hours of manual search'),
 Document(id='877f7345-19a1-473b-bd65-4bd2adaf3f3b', metadata={'source': 'speech.txt'}, page_content='In closing, Google’s new agent AI initiative represents more than incremental improvements—it’s a fundamental reimagining of how we interact with technology. By shifting from reactive queries to proactive, goal-driven assistance, Google is empowering individuals and organizations to achieve more with less effort. As these agents become ever more capable and integrated into our digital lives, the possibilities for innovation are limitless. Thank you.'),
 Document(id='ed3ce