In [None]:
#  data ingestion
from langchain_community.document_loaders import TextLoader
loader=TextLoader("speech.txt")
text_documents=loader.load()
text_documents

In [104]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ['NVIDIA_API_KEY']=os.getenv("NVIDIA_API_KEY")

In [105]:
#web based loader
from langchain_community.document_loaders import WebBaseLoader
import bs4

#load,chunk and index the content of the html page 

loader=WebBaseLoader(web_path=("https://lilianweng.github.io/posts/2023-06-23-agent/",), 
                     bs_kwargs=dict(parse_only=bs4.SoupStrainer(
                         class_=("post-title","post-content","post-header")
                     )))
text_documents=loader.load()

In [106]:
text_documents

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='\n\n      LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistake

In [1]:
# pdf reader
from langchain_community.document_loaders import PyPDFLoader
loader=PyPDFLoader("A023166923070_Madhur Prakash Mangal.pdf")
docs=loader.load()

In [2]:
docs

[Document(metadata={'source': 'Haunting Adeline (Cat and Mouse duet 1) (H. D. Carlton) (Z-Library).pdf', 'page': 0}, page_content=''),
 Document(metadata={'source': 'Haunting Adeline (Cat and Mouse duet 1) (H. D. Carlton) (Z-Library).pdf', 'page': 1}, page_content=''),
 Document(metadata={'source': 'Haunting Adeline (Cat and Mouse duet 1) (H. D. Carlton) (Z-Library).pdf', 'page': 2}, page_content='Haunting Adeline Copyright © 2021 by H. D. Carlton\n\xa0\nAll rights reserved. Printed in the United States of America. No part of\nthis book may be used or reproduced in any manner whatsoever without\nwritten permission except in the case of brief quotations em- bodied in\ncritical articles or reviews.\n\xa0\nThis book is a work of fiction. Names, characters, businesses, organiza-\ntions, places, events and incidents either are the product of the author’s\nimagination or are used fictitiously. Any resemblance to actual persons,\nliving or dead, events, or locales is entirely coincidental.\n\

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=100)
documents=text_splitter.split_documents(docs)
documents[:5]

[Document(metadata={'source': 'A023166923070_Madhur Prakash Mangal.pdf', 'page': 0}, page_content='oid:16158:62489569\nSimilarity Report ID: \nPAPER NAME\nTerm Paper by Madhur Prakash Mangal -\n A023166923070.docx\nAUTHOR\nmadhur ntcc\nWORD COUNT\n5858 Words\nCHARACTER COUNT\n35576 Characters\nPAGE COUNT\n28 Pages\nFILE SIZE\n86.5KB\nSUBMISSION DATE\nJul 8, 2024 9:31 AM GMT+5:30\nREPORT DATE\nJul 8, 2024 9:32 AM GMT+5:30\n9% Overall Similarity\nThe combined total of all matches, including overlapping sources, for each database.\n6% Internet database\n3% Publications database\nCrossref database\nCrossref Posted Content database\n7% Submitted Works database\nExcluded from Similarity Report\nBibliographic material\nQuoted material\nCited material\nSmall Matches (Less then 15 words)\nSummary'),
 Document(metadata={'source': 'A023166923070_Madhur Prakash Mangal.pdf', 'page': 1}, page_content='1 \n \nTerm Paper On \nBLOCKCHAIN BASED AUTHENTICATION FOR IOT DEVICES \n \nIn partial fulfilment o

In [4]:
# vector embedding - chroma db
from langchain_community.embeddings import OllamaEmbeddings
# from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA
from langchain_community.vectorstores import Chroma  #Chroma db
db=Chroma.from_documents(documents,OllamaEmbeddings()) # this makes db a vector database(chroma db)

In [6]:
#  vector database FOR CHROMA
query="who are the authors of  this paper"
result=db.similarity_search(query)
result

[Document(metadata={'page': 21, 'source': 'A023166923070_Madhur Prakash Mangal.pdf'}, page_content="21 \n \n3. Filament's Blockchain for IOT: \nFilament is a company that focuses on providing blockchain solutions specifically for the \nIndustrial Internet of Things (IIOT). Filament's blockchain technology aims to secure IIOT \ndevices and data, facilitating secure and autonomous machine-to-machine interactions. They \nuse Blocklet chips which is integrated into IOT devices to enable blockchain functionalities. \nAll interactions and transactions are logged on the blockchain, providing a tamper-proof and \ntransparent record. This logging is crucial for audit trails, compliance, and troubleshooting in \nindustrial environments. Filament’s technology is designed to be interoperable with existing \nIIOT platforms and infrastructure. This allows seamless integration with legacy systems and \nother blockchain networks. It can be used in supply chain management, asset tracking, \nindustrial 

In [7]:
# vector embedding - FIASS db
from langchain_community.vectorstores import FAISS  #fiass db
db1=FAISS.from_documents(documents,OllamaEmbeddings())  # this makes db1 a vector database(fiass db)

In [8]:
#  vector database FOR FIASS
query="who are the authors of  this book"
result=db1.similarity_search(query)
result[0].page_content

'oid:16158:62489569\nSimilarity Report ID: \n9\nkyoto2.org\n<1%\nInternet\n10\nGitam University on 2021-04-29\n<1%\nSubmitted works\n11\nHellenic Open University on 2024-05-27\n<1%\nSubmitted works\n12\npublic.scnchub.com\n<1%\nInternet\n13\nDr. Jason Edwards. "Mastering Cybersecurity", Springer Science and B...\n<1%\nCrossref\n14\nMa Zhaofeng, Meng Jialin, Wang Jihui, Shan Zhiguang. "Blockchain-Ba...\n<1%\nCrossref\n15\nlink.springer.com\n<1%\nInternet\n16\nUniversity of West London on 2021-09-20\n<1%\nSubmitted works\n17\ndokumen.pub\n<1%\nInternet\n18\npt.scribd.com\n<1%\nInternet\nSources overview'