In [3]:
import streamlit as st
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore
import pinecone
from pinecone import Pinecone, ServerlessSpec
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import CTransformers
import os

In [2]:


# Change working directory
os.chdir('c:/Users/gt260/End-to-end-Medical-Chatbot-using-Llama2')

# Initialize Pinecone
PINECONE_API_KEY = "122f095f-ea22-497c-99a0-2994b23f687d"
index_name = "medical-chatbot"
os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY

# Functions to load data and create embeddings
@st.cache(allow_output_mutation=True)
def load_pdf(data):
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

@st.cache(allow_output_mutation=True)
def download_hugging_face_embeddings():
    return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

@st.cache(allow_output_mutation=True)
def initialize_pinecone():
    pc = Pinecone(api_key=PINECONE_API_KEY)
    index = pc.Index(index_name)
    return index

# def text_split(extracted_data):
#     text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
#     text_chunks = text_splitter.split_documents(extracted_data)
#     return text_chunks

def create_vector_store(text_chunks, embeddings):
    docsearch = PineconeVectorStore.from_texts([t.page_content for t in text_chunks], embedding=embeddings, index_name=index_name)
    docsearch.add_texts([t.page_content for t in text_chunks])
    return docsearch

def setup_retrieval_qa(docsearch, embeddings):
    prompt_template = """
    Use the following pieces of information to answer the user's question.
    If you don't know the answer, just say that you don't know, don't try to make up an answer.

    Context: {context}
    Question: {question}

    Only return the helpful answer below and nothing else.
    Helpful answer:
    """
    PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
    chain_type_kwargs = {"prompt": PROMPT}

    llm = CTransformers(model="model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                        model_type="llama",
                        config={'max_new_tokens': 512, 'temperature': 0.8})

    qa = RetrievalQA.from_chain_type(
        llm=llm, 
        chain_type="stuff", 
        retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
        return_source_documents=True, 
        chain_type_kwargs=chain_type_kwargs
    )
    return qa

# Streamlit UI
st.title("Medical Chatbot using LLaMA 2")

if 'qa' not in st.session_state:
    st.session_state['qa'] = None

st.sidebar.title("Configuration")
st.sidebar.write("Upload your PDF files to create the chatbot.")

uploaded_files = st.sidebar.file_uploader("Upload PDFs", accept_multiple_files=True, type=["pdf"])
if uploaded_files:
    for uploaded_file in uploaded_files:
        with open(os.path.join("data", uploaded_file.name), "wb") as f:
            f.write(uploaded_file.getbuffer())
    st.sidebar.success("PDFs uploaded successfully!")

    if st.sidebar.button("Initialize Chatbot"):
        with st.spinner("Loading data and initializing..."):
            extracted_data = load_pdf("data/")
            text_chunks = text_split(extracted_data)
            embeddings = download_hugging_face_embeddings()
            pc = initialize_pinecone()
            docsearch = create_vector_store(text_chunks, embeddings)
            st.session_state['qa'] = setup_retrieval_qa(docsearch, embeddings)
        st.sidebar.success("Chatbot initialized successfully!")

if st.session_state['qa']:
    st.write("Chat with the medical chatbot below:")
    user_input = st.text_input("Your question:", "")
    if user_input:
        result = st.session_state['qa']({"query": user_input})
        st.write("Response: ", result["result"])



2024-07-08 12:36:07.286 
  command:

    streamlit run c:\Users\gt260\anaconda3\envs\mchatbot02\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2024-07-08 12:36:07.287 
`st.cache` is deprecated and will be removed soon. Please use one of Streamlit's new caching commands, `st.cache_data` or `st.cache_resource`.
More information [in our docs](https://docs.streamlit.io/develop/concepts/architecture/caching).

**Note**: The behavior of `st.cache` was updated in Streamlit 1.36 to the new caching logic used by `st.cache_data` and `st.cache_resource`.
This might lead to some problems or unexpected behavior in certain edge cases.

2024-07-08 12:36:07.288 
`st.cache` is deprecated and will be removed soon. Please use one of Streamlit's new caching commands, `st.cache_data` or `st.cache_resource`.
More information [in our docs](https://docs.streamlit.io/develop/concepts/architecture/caching).

**Note**: The behavior of `st.cache` was updated in Streamlit 1.36 to the new caching logic used by