In [1]:
import numpy as numpy
import pandas as pd
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from dotenv import load_dotenv

load_dotenv()
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")


In [3]:
def load_pdf(path):
    loader = PyPDFLoader(path)
    documents = loader.load()
    return documents

def split_data(input_docs):
    splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
    final_documents = splitter.split_documents(input_docs)
    return final_documents

def get_huggingface_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")
    return embeddings





In [4]:
input_data = load_pdf("../data/Medical_book.pdf")
final_documents = split_data(input_data)
embeddings = get_huggingface_embeddings()

  from .autonotebook import tqdm as notebook_tqdm


# Create Vectorstore Embeddings using Pinecone

In [None]:
from langchain_pinecone import PineconeVectorStore,Pinecone

docstore = PineconeVectorStore.from_documents(documents=final_documents,embedding=embeddings,index_name="medical-chatbot",
                                              pinecone_api_key=PINECONE_API_KEY)



In [33]:
retriever = docstore.as_retriever(search_type='similarity',search_kwargs={"k":5})

In [34]:
retriever.invoke("acne?")

[Document(id='b3d5b019-a2c9-4e5b-90ad-3297fd30952c', metadata={'page': 38.0, 'source': '../data/Medical_book.pdf'}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 2 25Acne\nAcne vulgaris affecting a woman’s face. Acne is the general\nname given to a skin disorder in which the sebaceousglands become inflamed. (Photograph by Biophoto Associ-\nates, Photo Researchers, Inc. Reproduced by permission.)GEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 25'),
 Document(id='c0d4ebda-f559-40b8-b059-fd39595a8781', metadata={'page': 239.0, 'source': '../data/Medical_book.pdf'}, page_content='ent purposes. For example, lotions, soaps, gels, andcreams containing benzoyl peroxide or tretinoin may beused to clear up mild to moderately severe acne.Isotretinoin (Accutane) is prescribed only for verysevere, disfiguring acne.\nAcne is a skin condition that occurs when pores or'),
 Document(id='25f6e71c-447a-4606-8700-aa689566f9b7', metadata={'page': 40.0, 'source': '../data/Medical_book.pdf'}, page_content='Clif

In [None]:
from langchain_openai import OpenAI

llm = OpenAI(model="gtp4-0",temperature=0.1,max_tokens=500,api_key=OPENAI_API_KEY)

OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

In [21]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
            "You are an assistant for question-answering tasks. "
            "Use the following pieces of retrieved context to answer the question."
            "If you don't know the answer, just say that you don't know."
            "Use three sentences maximum and keep the answer concise."
            "\n\n"
            "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system",system_prompt),
        ("human","{input}")
    ]
)

In [22]:
question_answer_chain = create_stuff_documents_chain(llm,prompt)
rag_chain = create_retrieval_chain(retriever,question_answer_chain)

ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question.If you don't know the answer, just say that you don't know.Use three sentences maximum and keep the answer concise.\n\n{context}"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={})])

In [20]:
system_prompt

"You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question.If you don't know the answer, just say that you don't know.Use three sentences maximum and keep the answer concise.\n\n{context}"