# Tests

In [2]:
%pwd

'/workspaces/medical_chatbot/research'

In [3]:
import os
os.chdir("/workspaces/medical_chatbot")
os.getcwd()

'/workspaces/medical_chatbot'

In [4]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [5]:
def load_pdf_file(data):
    loader=DirectoryLoader(data,
                           glob="*.pdf",
                           loader_cls=PyPDFLoader)
    documents=loader.load()
    return documents

In [6]:
extracter_data=load_pdf_file(data="data/")

In [7]:
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks

In [9]:
chunks=text_split(extracter_data)
len(chunks)

5860

In [25]:
from langchain.embeddings import HuggingFaceEmbeddings
def download_hugging_face_embeddings():
    embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    return embeddings

In [31]:
from dotenv import load_dotenv
load_dotenv()

HF_TOKEN=os.getenv('HF_TOKEN')
PINECONE_TOKEN=os.getenv('PINECONE_TOKEN')

In [29]:
import sentence_transformers
embeddings=download_hugging_face_embeddings()

In [32]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
import os

pc=Pinecone(api_key=PINECONE_TOKEN)

index_name = "testify"

pc.create_index(
    name=index_name,
    dimension=384, # Replace with your model dimensions
    metric="cosine", # Replace with your model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

In [33]:
os.environ["PINECONE_API_KEY"]=PINECONE_TOKEN

In [37]:
import pinecone
from langchain.vectorstores import Pinecone

docsearch = Pinecone.from_documents(
    documents=chunks,  # Replace `chunks` with your list of documents
    embedding=embeddings,
    index_name=index_name
)

In [38]:
docsearch=Pinecone.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)

In [39]:
retriever=docsearch.as_retriever(search_type='similarity',search_kwargs={"k":3})

In [40]:
retrieved_docs=retriever.invoke("What is Fever?")
retrieved_docs

[Document(metadata={'page': 69.0, 'source': 'data/Medical_book.pdf'}, page_content='fever in children. This disease is most often caused by\ntypes 3 and 7. Symptoms, which appear suddenly and\nusually disappear in less than a week, include:\n• inflammation of the lining of the eyelid (conjunctivitis)\n•f e v e r\n• sore throat (pharyngitis)\n• runny nose\n• inflammation of lymph glands in the neck (cervical\nadenitis)\nGALE ENCYCLOPEDIA OF MEDICINE 256\nAdenovirus infections\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 56'),
 Document(metadata={'page': 60.0, 'source': 'data/Medical_book.pdf'}, page_content='(38°–40°C). In addition, a general ill feeling, muscle\naches,headache, chills, and loss of appetite may be felt.\nDiagnosis\nIf lymphangitis is suspected, the person should call\nhis or her doctor immediately or go to an emergency\nroom. Acute lymphangitis could be diagnosed by the\nfamily doctor, infectious disease specialist, or an emer-\ngency room doctor. The painful, red st

In [41]:
from langchain_groq import ChatGroq
groq_api_key=os.getenv("GROQ_API_KEY")

llm=ChatGroq(groq_api_key=groq_api_key, model_name="Llama3-8b-8192")

In [42]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [43]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [44]:
response = rag_chain.invoke({"input": "What is Fever?"})
print(response["answer"])

Fever is one of the symptoms listed in the text as a symptom of adenovirus infections in children, characterized by an elevated body temperature (usually above 38°C to 40°C).
