In [1]:
from langchain import PromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain_groq import ChatGroq
from dotenv import load_dotenv

In [2]:
load_dotenv()

True

# Extract data from pdf

In [3]:
def load_pdf(data):
    loader = DirectoryLoader(data,glob='*.pdf',loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [None]:
extracted_text = load_pdf('../data/')

# Create chunks

In [5]:
def text_split(extracted_text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_text)
    return text_chunks

In [6]:
text_chunks = text_split(extracted_text)

In [7]:
len(text_chunks)

7020

# Download Embeddings

In [5]:
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [6]:
embeddings = download_hugging_face_embeddings()

In [5]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

# Query result

In [7]:
vectorstore = PineconeVectorStore(index='mchatbot', pinecone_api_key= os.getenv("PINECONE_API_KEY") ,embedding=embeddings)

In [93]:
docsearch = vectorstore.from_texts(texts=[t.page_content for t in text_chunks],embedding=embeddings,index_name='mchatbot')

# Load from existing vector database

In [7]:
docsearch = PineconeVectorStore.from_existing_index(index_name="mchatbot",embedding=embeddings)

In [8]:
query="Medicine for cough"
docs = docsearch.similarity_search(query,k=3)

In [9]:
# type(docs[0])
docs

[Document(page_content='sants, do not stop the cough. Instead they are used to thinthe mucus in the lungs, making it easier to cough up. Thistype of cough medicine may be helpful to individuals suf-fering from bronchitis. People who are unsure about whatKEY TERMS\nAcute —Disease or condition characterized by the\nrapid onset of severe symptoms.\nBronchi —The larger air tubes of the lung that\nbring air in from the trachea.\nChronic —Disease or condition characterized by\nslow onset over a long period of time.'),
 Document(page_content='Home care includes drinking plenty of fluids, resting, notsmoking, increasing moisture in the air with a cool misthumidifier, and taking acetaminophen (Datril, Tylenol,\nPanadol) for fever and pain . Aspirin should not be given\nto children because of its association with the serious ill-ness, Reye’s syndrome .\nCough suppressants are used only when the cough'),
 Document(page_content='serious if it progresses to pneumonia, therefore, antibi-otics may be

In [10]:
prompt_template = """
Use the following pieces of information to answer user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: 
<context>
{context}
</context>
Question: {input}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [11]:
groq_api_key = os.environ['GROQ_API_KEY']
llm = ChatGroq(groq_api_key=groq_api_key, model='mixtral-8x7b-32768')

In [12]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template(prompt_template)

In [13]:
from langchain.chains.combine_documents.stuff import create_stuff_documents_chain

In [14]:
document_chain = create_stuff_documents_chain(llm,prompt)

In [15]:
retriever = docsearch.as_retriever()

In [16]:
from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever,document_chain)

In [17]:
response = retrieval_chain.invoke({"input" : "medicine for cough"})

In [18]:
response

{'input': 'medicine for cough',
 'context': [Document(page_content='sants, do not stop the cough. Instead they are used to thinthe mucus in the lungs, making it easier to cough up. Thistype of cough medicine may be helpful to individuals suf-fering from bronchitis. People who are unsure about whatKEY TERMS\nAcute —Disease or condition characterized by the\nrapid onset of severe symptoms.\nBronchi —The larger air tubes of the lung that\nbring air in from the trachea.\nChronic —Disease or condition characterized by\nslow onset over a long period of time.'),
  Document(page_content='Home care includes drinking plenty of fluids, resting, notsmoking, increasing moisture in the air with a cool misthumidifier, and taking acetaminophen (Datril, Tylenol,\nPanadol) for fever and pain . Aspirin should not be given\nto children because of its association with the serious ill-ness, Reye’s syndrome .\nCough suppressants are used only when the cough'),
  Document(page_content='serious if it progresse

In [19]:
print(response['answer'])

For a bronchitis-related cough, consider using a medicine that thins the mucus in the lungs instead of stopping the cough. Home care remedies include drinking plenty of fluids, resting, avoiding smoking, using a cool mist humidifier, and taking acetaminophen for fever and pain. Although cough suppressants are typically used only for severe coughs, homeopathic medicine, traditional Chinese medicine, and certain herbal teas (such as mullein, coltsfoot, and anise seed) may provide relief. Additionally, diphenhydramine (an example of a first-generation antihistamine) can help with cough control, especially at night. Consult detailed references for more information.
