In [3]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
import os

  from tqdm.autonotebook import tqdm


In [4]:
def pdf_loder(data):
    loader=DirectoryLoader(data, glob='*.pdf', loader_cls=PyPDFLoader)
    doc=loader.load()
    return doc

In [5]:
extracted_data=pdf_loder('data/')

In [6]:
# Chunks
def chunks(ext_data):
    text=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    chunk=text.split_documents(ext_data)
    return chunk



In [7]:
text_chunk=chunks(extracted_data)
print(len(text_chunk))

10484


In [14]:
#embedding
def hf_embedding():
    embedding_model=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embedding_model

embedding=hf_embedding()

In [15]:
embedding_test=embedding.embed_query('Hello')
print('Length', len(embedding_test))

Length 384


In [8]:
from dotenv import load_dotenv

load_dotenv()
api_key=os.getenv('api_key')
index_name=os.getenv('index_name')

from pinecone import Pinecone

pc = Pinecone(api_key=api_key)
index = pc.Index(index_name)

In [11]:
load_dotenv()
api_key = os.getenv('api_key')
index_name = os.getenv('index_name')


pc = Pinecone(api_key=api_key, region='us-east-1')
index = pc.Index(index_name, region='us-east-1', host='https://medical-bot-tnswrkd.svc.aped-4627-b74a.pinecone.io')



for i, chunk in enumerate(text_chunk):
    embedding = hf_embedding().embed_query(chunk.page_content)
    index.upsert(vectors=[{'id': f'doc_chunk_{i}',
        'values': embedding,
        'metadata': {'text': chunk.page_content}
    }],namespace='doc1')

In [18]:
query='what is allergies'
query=embedding.embed_query(query)
index.query(
    namespace="doc1",
    vector=query,
    top_k=2,
    include_values=True,
    include_metadata=True)

{'matches': [{'id': 'doc_chunk_3256',
              'metadata': {'text': 'GALE ENCYCLOPEDIA OF MEDICINE 2 '
                                   '2591Physical allergy'},
              'score': 0.704280496,
              'values': [0.00337857287,
                         -0.057141,
                         -0.000467860751,
                         -0.00216279598,
                         0.0150210867,
                         0.0235109683,
                         0.0677552,
                         0.129873246,
                         -0.119194411,
                         0.0408555977,
                         0.033427503,
                         -0.0460601598,
                         0.0333798379,
                         0.0573912784,
                         -0.0537969433,
                         0.0917580724,
                         0.00468751322,
                         -0.0339851454,
                         0.0196098406,
                         -0.0241118707,
             

In [48]:
prompt_temp="""
use the following pieces of information to answer the result, if answer is out of your knowledge just say out of my knowledge
please don't try to make up answer.

Context:{context}
Question: {question}

please return helpful results.
"""

In [49]:
prompt=PromptTemplate(template=prompt_temp, input_variables=['context', 'question'])
chain_type_kwargs={'prompt':prompt}

In [50]:
llm=CTransformers(model='model\llama-2-7b-chat.ggmlv3.q4_0.bin',
              model_type='llama',
              config={'max_new_tokens':512,
                      'temperature':0.8})

In [62]:
from langchain.chains import LLMChain

def query(query):
    query_embedding = embedding.embed_query(query)
    response = index.query(namespace="doc1", vector=query_embedding, top_k=2, include_values=True, include_metadata=True)
    
    # Extract context from query results
    contexts = [match['metadata']['text'] for match in response['matches']]
    context_text = " ".join(contexts)


    prompt = PromptTemplate(template=prompt_temp, input_variables=['context', 'question'])

    chain = LLMChain(llm=llm, prompt=prompt)

    answer = chain.run(context=context_text, question=query)

    print(answer)


In [63]:
query('what is allergies?')


Please provide your answer based on the given information.


In [95]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

def query(query_text):
    try:
        query_embedding = embedding.embed_query(query_text)
        response = index.query(namespace="doc1", vector=query_embedding, top_k=2, include_values=True, include_metadata=True)
        
        # Extract context from query results
        contexts = [match['metadata']['text'] for match in response['matches']]
        context_text = " ".join(contexts)

        prompt = PromptTemplate(template=prompt_temp, input_variables=['context', 'question'])

        chain = LLMChain(llm=llm, prompt=prompt)

        answer = chain.run(context=context_text, question=query_text)

        print(answer)

        return answer
    except Exception as e:
        return f"An error occurred: {e}"

# Example usage
print(query('What is allergies?'))



Answer: Out of my knowledge.

Answer: Out of my knowledge.
