In [2]:
print

<function print>

In [1]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import CTransformers
from langchain.prompts import PromptTemplate

  from tqdm.autonotebook import tqdm


In [3]:
PINECONE_API_KEY = "********************************"
PINECONE_API_ENV = "gcp-starter"

In [4]:
# Extract data from pdf

def load_pdf(data):
    loader = DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    
    documents = loader.load()

    return documents

In [5]:
import os
os.chdir("../")

In [6]:
extracted_data = load_pdf("Dataset/")

In [7]:
#extracted_data

In [8]:
# Create text chunks for extracted data

def chunk_split(extracted_data):
    splitter = RecursiveCharacterTextSplitter(chunk_size = 500 , chunk_overlap = 20)
    chunks = splitter.split_documents(extracted_data)

    return chunks

In [9]:
text_chunks = chunk_split(extracted_data)
len(text_chunks)

7020

The Extracted data got splitted in to 7020 text chunks.Now we do embedding to convert our text in to vectors

In [10]:
# Download_embedding model

def Download_embedding_model():
    embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embedding



In [11]:
embedding_model = Download_embedding_model()

In [12]:
embedding_model

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={})

In [13]:
result = embedding_model.embed_query("chat bot")
len(result)

384

In [14]:
# Initializing the Pinecone
pinecone.init(api_key=PINECONE_API_KEY,
              environment=PINECONE_API_ENV)
index_name = "medical-chatbot"

#Creating embedding for each of text_chunks & storing
docsearch = Pinecone.from_texts([t.page_content for t in text_chunks], 
                                embedding=embedding_model,
                                index_name=index_name)


KeyboardInterrupt: 

In [None]:
# if we already have an index we can laod it like this
index_name = "medical-chatbot"
docsearch = Pinecone.from_existing_index(index_name=index_name, embedding=embedding_model)

In [15]:


query= "symptoms of cancer"

doc = docsearch.similarity_search(query,k=3)

print("Result" , doc)

Result [Document(page_content='• nausea and vomiting or sudden attacks of vomiting not\naccompanied by nausea\n• seizures• dizziness, loss of coordination or balance• personality changes• sudden loss of vision• memory loss• speech problems• sensory changes• mental impairment• weakness or paralysis on one side of the body\nA doctor should be notified whenever a patient expe-\nriences one or more of the symptoms.\nDiagnosis\nAlthough brain tumor symptoms resemble those of', metadata={}), Document(page_content='Most individuals with anal cancer are over the age of 50.\nSymptoms of anal cancer resemble those found in\nother harmless conditions. They include pain ,itching\nand bleeding, straining during a bowel movement,change in bowel habits, change in the diameter of thestool, discharge from the anus, and swollen lymph nodesin the anal or groin area.\nDiagnosis\nAnal cancer is sometimes diagnosed during routine', metadata={}), Document(page_content='many other illnesses, the presence of a

In [16]:
template = """
Use the following pieces of information to answer the user's question.
if you dont't know the answer, just say i don't have knowledge about it, don't try to make up an answer

Context : {context}
Question: {question}

Only return the helpful answer below and nothing else.
Answer :
"""

In [17]:
Prompt = PromptTemplate(template=template,
                        input_variables=["context","question"])
chain_type_kwargs = {"prompt": Prompt}

In [18]:
# Model llama2 Loading 
llm = CTransformers(model="model\llama-2-7b-chat.ggmlv3.q4_0.bin",
                    model_type="llama",
                    config={'max_new_tokens':512,
                            'temperature':0.8})

In [19]:
QnA = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    retriever = docsearch.as_retriever(search_kwargs={'k':2}),
    return_source_documents = True,
    chain_type_kwargs=chain_type_kwargs
)

In [20]:
# User face
while True:
    user_input = input(f"Input Prompt:")
    result = QnA({"query":user_input})
    print("Response:", result['result'])

: 