In [None]:

from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter  # To convert them into chunks
from langchain.llms import CTransformers  # For quantized models
import os
from langchain_pinecone import PineconeVectorStore
from langchain.prompts import PromptTemplate

In [None]:

from langchain.document_loaders import PyPDFLoader, OnlinePDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
from sentence_transformers import SentenceTransformer
from langchain.chains.question_answering import load_qa_chain
import pinecone
from pinecone import Pinecone, ServerlessSpec

In [None]:
PINECONE_API_KEY="fc01720d-4061-4e31-90ac-f6efc4dd8056"
PINECONE_API_ENV="gcp-starter"

In [None]:
# Extracting the data

def load_pdf(data):
    loader=DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    documents=loader.load()

    return documents



In [None]:
data_extract=load_pdf("Data/")

In [None]:
# Text Chunks

def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks


In [None]:
text_chunks=text_split(data_extract)

In [None]:
# Embedding model

def download_embedding():
    embedding=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embedding

embeddings=download_embedding()

In [None]:
len(embeddings.embed_query("Hello World"))

In [None]:
os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY

index_name = 'medicalchatbot'

# Create the PineconeVectorStore instance using from_documents method
docsearch = PineconeVectorStore.from_documents(
    text_chunks,
    embeddings,
    index_name=index_name
)


In [None]:
pinecone_client = Pinecone(
    api_key=PINECONE_API_KEY,
    spec=ServerlessSpec(
        environment=PINECONE_API_ENV 
    )
)


In [None]:
docsearch=Pinecone.from_texts([t.page_content for t in docs], embeddings, index_name=index_name)

In [None]:
docsearch

In [None]:
# This is for testing

# docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)

# Use the index for similarity search
query = "What are Allergies"
docs = docsearch.similarity_search(query, k=1)

In [None]:
# proper_lines = []
# for doc in docs:
#     page_content = doc.page_content.strip()  # Remove leading and trailing whitespaces
#     proper_lines.append(page_content)

# # Print the proper lines
# for line in proper_lines:
#     print(line)

In [None]:

prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [None]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [None]:
llm=CTransformers(model="model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8})

In [None]:
qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever = docsearch.as_retriever(),
        return_source_document = True,
        chain_type_kwargs={'prompt':PROMPT}
    )
    

In [None]:
while True:
    user_input=input(f"Input Prompt:")
    result=qa_chain({"query": user_input})
    print("Response : ", result["result"])