In [None]:
%pip install sentence-transformers

In [None]:
%pip install groq

In [None]:
%pip install 

In [None]:
%pip install langchain_groq

In [None]:
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer
from dotenv import load_dotenv
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from groq import Groq


from langchain_groq import ChatGroq
import os

In [None]:
def load_pdf_file(data):
    loader=DirectoryLoader(data,
                           glob="*.pdf",
                           loader_cls=PyPDFLoader)
    documents=loader.load()
    return documents

In [None]:
extractor=load_pdf_file(data='C:/Users/Arjun/OneDrive/Desktop/DO IT/Medical-Chat-Bot/Data')

In [None]:
extractor

In [None]:
def split_data(extractor):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extractor)
    return text_chunks

In [None]:
text_chunks=split_data(extractor)
print(len(text_chunks))

In [None]:
def download_embedding():
    embedding=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embedding

In [None]:
embedding=download_embedding()

In [None]:
result=embedding.embed_query("hello world")
print("result",len(result))

In [None]:
load_dotenv()

In [None]:
pinecone_api_key=os.environ.get("pinecone_api_key")
groq_api_key=os.environ.get("groq_api_key")

In [None]:
pc = Pinecone(api_key=pinecone_api_key)
index_name = "medibot"

# Only create the index if it does not already exist
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1",
        )
    )
else:
    print(f"Index '{index_name}' already exists. Skipping creation.")


In [None]:
os.environ["pinecone_api_key"] = pinecone_api_key

os.environ["groq_api_key"] = groq_api_key

In [None]:
vector=PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embedding,
)

In [None]:
vector=PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embedding,
)

In [None]:
vector

In [None]:
retrival=vector.as_retriever(search_type="similarity", search_kwargs={"k": 5})

In [None]:
Query=retrival.invoke("what is the symptoms of ARDS?")

In [None]:
Query

In [None]:
# Wrap the Groq LLM with the LangChain ChatGroq wrapper
chat_llm = ChatGroq(api_key=groq_api_key, model="llama-3.3-70b-versatile")

In [None]:
system_prompt=(
    "you are a medical expert. You will be given a question and some context. "
    "You will answer the question based on the context provided. "
    "if you don't know the answers,say 'I don't know' and do not make up answers."
    "use three sentences maximum and keep the answer concise."
    "\n\n"
    "{context}"

)
prompt=ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [None]:



chain = create_stuff_documents_chain(chat_llm, prompt)
rag_chain = create_retrieval_chain(retrival, chain)


In [None]:
response = rag_chain.invoke({'input': "what is the symptoms of ARDS?"})
print(response['answer'])