### Implementing RAG

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ["HF_API_KEY"]=os.getenv("HF_API_KEY")
os.environ["GROQ_API_KEY"]=os.getenv("GROQ_API_KEY")

In [2]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_chroma import Chroma


In [11]:
loader = PyPDFLoader(file_path=f"../Resources/CSS Notes.pdf")
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size= 500, chunk_overlap=100)
splitted_docs = splitter.split_documents(docs)
embeddings = HuggingFaceEmbeddings(model="all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
db = Chroma.from_documents(embedding=embeddings,documents=splitted_docs)

In [13]:
#similarity Search:
query="CSS is used for styling"

In [18]:
res=db.similarity_search(query=query)

In [20]:
res[0].page_content

"CSS\nCascading Style Sheet\nmakeupnot a programming language, but a stying languageBut for styling there should be some content, and that's whywe studied html before cssgo to websites & remove css\nA P N A \nC O L L E G E \nIt is a language that is used to describe the style of a document."

In [21]:
retriever = db.as_retriever()
retriever.invoke("What is CSS")

[Document(id='d4e16b04-b11b-4489-b95b-dac49bb447a2', metadata={'keywords': 'DAFk338rPhk,BAEHDsZUYOI', 'moddate': '2023-06-04T16:00:50+00:00', 'creator': 'Canva', 'title': 'CSS Notes', 'source': '../Resources/CSS Notes.pdf', 'page': 2, 'author': 'Rahul Neha', 'producer': 'Canva', 'creationdate': '2023-06-04T16:00:52+00:00', 'page_label': '3', 'total_pages': 72}, page_content="CSS\nCascading Style Sheet\nmakeupnot a programming language, but a stying languageBut for styling there should be some content, and that's whywe studied html before cssgo to websites & remove css\nA P N A \nC O L L E G E \nIt is a language that is used to describe the style of a document."),
 Document(id='0c08b89c-4f9f-45d2-b9ba-dda598d83373', metadata={'author': 'Rahul Neha', 'source': '../Resources/CSS Notes.pdf', 'title': 'CSS Notes', 'creator': 'Canva', 'total_pages': 72, 'keywords': 'DAFk338rPhk,BAEHDsZUYOI', 'moddate': '2023-06-04T16:00:50+00:00', 'page': 50, 'creationdate': '2023-06-04T16:00:52+00:00', 'pag