In [43]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import FastEmbedEmbeddings
from groq import Groq
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

In [3]:
loader = PyPDFLoader('diabetics.pdf')

In [4]:
data = loader.load()
len(data)

5

In [10]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=100)
text = text_splitter.split_documents(data)
len(text)

163

In [11]:
embeddings = FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5")

  from .autonotebook import tqdm as notebook_tqdm
Fetching 5 files: 100%|██████████| 5/5 [00:02<00:00,  2.14it/s]


In [12]:
db = FAISS.from_documents(text,embeddings)

In [13]:
retriever = db.as_retriever(search_type="similarity",kwargs={"k":4})


In [17]:
retriever_docs = retriever.get_relevant_documents("What is meant by diabetes")

In [20]:
retriever_docs[3]

Document(id='ff9575e9-ef09-4a13-ae14-e8026ed4e342', metadata={'source': 'diabetics.pdf', 'page': 0}, page_content='Diabetes is a met abolic disorder in which there are high levels of sugar in the blood, a condition')

In [22]:
%pip install python-dotenv

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'd:\PDF Interaction\myenv\Scripts\python.exe -m pip install --upgrade pip' command.


In [26]:
from dotenv import load_dotenv
import os
load_dotenv()
groq_api_key = os.getenv('GROQ_API_KEY')

In [27]:
client = Groq(api_key=os.getenv('GROQ_API_KEY'))

In [30]:
llm = ChatGroq(model_name='llama3-70b-8192')

In [31]:
query = "Who is the PM of India?"

In [35]:
llm.invoke(query)

AIMessage(content='As of my knowledge cutoff in 2023, the Prime Minister of India is Narendra Damodardas Modi. He has been serving as the Prime Minister of India since May 26, 2014.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 43, 'prompt_tokens': 17, 'total_tokens': 60, 'completion_time': 0.124853567, 'prompt_time': 0.004023704, 'queue_time': 0.186886077, 'total_time': 0.128877271}, 'model_name': 'llama3-70b-8192', 'system_fingerprint': 'fp_2f30b0b571', 'finish_reason': 'stop', 'logprobs': None}, id='run-24ef71dc-eb9d-47e3-9d75-98c37959d986-0', usage_metadata={'input_tokens': 17, 'output_tokens': 43, 'total_tokens': 60})

In [77]:
prompt_template = """You are an helpful assistant.Greet the user before answering..

{context}
{question}
"""

In [78]:
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=False)

In [79]:
qa = ConversationalRetrievalChain.from_llm(llm=llm,
                                          memory=memory,
                                          retriever = retriever)

In [80]:
query = "what is meant by diabetes"

In [81]:
result = qa(query)

In [82]:
answer = result['answer']

In [83]:
print(result['answer'])

According to the provided context, diabetes is a metabolic disorder in which there are high levels of sugar in the blood.
