In [1]:
import os
os.chdir("../")

In [2]:
%pwd

'c:\\Users\\Neelesh\\Desktop\\Amogh\\Medical Chatbot\\Medical_Chatbot'

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader

#Extract Data From the PDF File
def load_pdf_file(data):
    loader= DirectoryLoader(data,
                            glob = "*.pdf",
                            loader_cls = PyPDFLoader)

    documents = loader.load()

    return documents

extracted_data = load_pdf_file(data='Data/')

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

#Split the Data into Text Chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

text_chunks=text_split(extracted_data)

In [6]:
text_chunks=text_split(extracted_data)
print("Length of Text Chunks", len(text_chunks))

Length of Text Chunks 5860


In [7]:
from langchain.embeddings import HuggingFaceEmbeddings

#Download the Embeddings from Hugging Face
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

embeddings = download_hugging_face_embeddings()

  embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/all-MiniLM-L6-v2')


In [8]:
from dotenv import load_dotenv
load_dotenv()

True

In [9]:
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
HF_TOKEN = os.environ.get('HF_TOKEN')

In [11]:
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY

In [12]:
os.environ["HF_TOKEN"] = HF_TOKEN

In [13]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec

pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "medical-chatbot"


pc.create_index(
    name=index_name,
    dimension=384, 
    metric="cosine", 
    spec=ServerlessSpec(
        cloud="aws", 
        region="us-east-1"
    ) 
) 

In [14]:
# Embed each chunk and upsert the embeddings into your Pinecone index.
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents = text_chunks,
    index_name = index_name,
    embedding = embeddings, 
)

In [15]:
from langchain_huggingface import HuggingFaceEndpoint

HUGGINGFACE_REPO_ID = "mistralai/Mistral-7B-Instruct-v0.3"

def load_llm(huggingface_repo_id):
    llm = HuggingFaceEndpoint(
        repo_id = huggingface_repo_id,
        temperature = 0.5,
        task = "text-generation",
        model_kwargs = {"token":HF_TOKEN,
                      "max_length":"512"}
    )
    return llm

In [16]:
from langchain_core.prompts import PromptTemplate

CUSTOM_PROMPT_TEMPLATE = """
Use the pieces of information provided in the context to answer user's question.
If you dont know the answer, just say that you dont know, dont try to make up an answer. 
Dont provide anything out of the given context and dont use the word fuck

Context: {context}
Question: {question}

Start the answer directly. No small talk please.
"""

def set_custom_prompt(custom_prompt_template):
    prompt = PromptTemplate(template=custom_prompt_template, input_variables = ["context", "question"])
    return prompt

In [17]:
from langchain.chains import RetrievalQA

# Create QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm = load_llm(HUGGINGFACE_REPO_ID),
    chain_type = "stuff",
    retriever = docsearch.as_retriever(search_type = "similarity", search_kwargs = {"k":3}),
    return_source_documents = True,
    chain_type_kwargs = {'prompt':set_custom_prompt(CUSTOM_PROMPT_TEMPLATE)}
)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [18]:
# Now invoke with a single query
user_query = input("How can I help you?: ")

In [19]:
response = qa_chain.invoke({'query': user_query})
print("\nAnswer: ", response["result"])


Answer:  
Acne is a common skin disease characterized by pimples on the face, chest, and back. It occurs when the pores of the skin become clogged with oil, dead skin cells, and bacteria.
