In [2]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

### Defining some helper functions
These will be transfered to utils.py

In [3]:
## Function to extract data from pdf(load the pdf)
def load_pdf(data):                 #directory path as input
    loader = DirectoryLoader(data,
                             glob = "*.pdf", #glob specifies which type of files to include, we specified it using regex
                             loader_cls = PyPDFLoader)
    
    document = loader.load()
    return document

## Function to create text chunks based on sementic and other similarites
def text_split(loaded_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 200, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(loaded_data)

    return text_chunks

## Function to download the embedding model from huggingface
def download_hugging_face_embeddings_model():
    '''Downloads the all-MiniLM-L6-v2 embedding model from hugging face'''
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings


#### Loading and making chunks of input data

In [None]:
extracted_data = load_pdf("data_source/")

text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

In [9]:
embeddings = download_hugging_face_embeddings_model()
embeddings

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [10]:
query_result = embeddings.embed_query("Hello world")
print("Length : ", len(query_result))


Length 384


In [12]:
print(type(query_result))
print("Embedding looks like this : ", query_result[:3], ".....")

<class 'list'>
Embedding looks like this :  [-0.03447727486491203, 0.031023181974887848, 0.006734966300427914] .....


In [13]:
import os
from dotenv import load_dotenv

load_dotenv()

PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
PINECONE_API_ENV = os.environ.get('PINECONE_API_ENV')

In [None]:
import os
from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))

index_name = "medical-chatbot"

## If index with this name is not present then create it.
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=1536, 
        metric="cosine", 
        spec=ServerlessSpec(
            cloud="aws"#, 
            #region="us-east-1"
        ) 
    ) 


## Create a "namespace" section inside the index for efficient search(see documentation)
import time
from langchain_pinecone import PineconeVectorStore

namespace = "wondervector5000"

## This captures all the embedding data and is later used to make retriever, for retrieval.
docsearch = PineconeVectorStore.from_documents(
    documents = text_chunks,
    index_name = index_name,
    embedding = embeddings, 
    namespace = namespace 
)

time.sleep(1)

## Making of prompt template for passing the context and question to LLM.
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up any answer.

The Context is : {context}
The Question is : {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""
## Final prompt template
medical_prompt_template=PromptTemplate(input_variables=["context", "question"], template=prompt_template)


## LLM used is loaded from model folder and is downloaded from huggingface as a bin file.
##CTransformers library is used to load the model. You can use any model from huggingface/OpenAI etc. Here we have used
## meta llama-2-7b model.
llm=CTransformers(model="model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.75})


chain_type_kwargs={"prompt": medical_prompt_template}


## Initializing the retrievelQA chain. Notice the kwargs of retriever, the k value is used by k-nearest neighbour algorithm
## for sementic search and returns 'k' most relavent chunks from the namespace of index relavent to the question.
qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    chain_type_kwargs=chain_type_kwargs,
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True
    )

## qa will finally be used whenever user asks a question. So storing in index was a one-time effort, now we can use
## the sementic search capability of vector stores to retrieve 'k' most relavent chunks and use them as context.


In [None]:
## How to query from a namespace in a particular index
pc = Pinecone(api_key='YOUR_API_KEY')
index = pc.Index(index_name)

index.query(
    namespace="Name of the namespace",
    vector="Vector formed by embeeding of qn.",
    top_k=3,
    include_values=True
)
