In [1]:
!pip install streamlit 

!npm install localtunnel

!pip install langchain langchain-community 

!pip install sentence-transformers 

!pip install chromadb 

!pip install transformers 

!pip install torch torchvision torchaudio 

!pip install huggingface_hub 

!pip install pypdf 

!pip install bitsandbytes 

!pip install faiss-cpu

Collecting streamlit
  Downloading streamlit-1.39.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.39.0-py2.py3-none-any.whl (8.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [31m53.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m76.3 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.39.0
[K[?25hm##################[0m] / reify:yargs: [32;40mhttp[0m [35mfetch[0m GET 200 https://registry.npmjs.o[0m[Kpmjs.o[0m[K
added 22 packages in 2s

3 packages are looking for funding
  run `npm fund` for details
[37;40mnpm[0m [0m[36;40mnotice[0m[35m[0m 
[0m[37;40mnpm[0m [0m

In [6]:
%%writefile app.py
import os
from dotenv import load_dotenv
import streamlit as st
from huggingface_hub import login
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import faiss
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from sentence_transformers import SentenceTransformer
import re
import numpy as np
import torch

import os
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("HF_TOKEN")

login(token=secret_value_0)
generation_args = {
    "max_new_tokens": 256,
    "no_repeat_ngram_size": 4,
    "repetition_penalty": 1.2,
    "temperature": 0.0,
    "do_sample": False,
}

@st.cache_resource
def load_pipeline():
    model_id = 'meta-llama/Llama-3.1-8B-Instruct'
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_compute_dtype=torch.bfloat16
    )
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        quantization_config=bnb_config,
        low_cpu_mem_usage=True
    )
    return tokenizer, model

@st.cache_resource
def load_faiss():
    input_dir = "/kaggle/input/"
    pdf_data = []
    
    for root, dirs, files in os.walk(input_dir):
        for file in files:
            if file.endswith(".pdf"):
                file_path = os.path.join(root, file)
                data_load = PyPDFLoader(file_path)
                data = data_load.load()
                pdf_data.append(data)
    
    if not pdf_data:
        st.error("No PDF files found in the specified directory.")
        return None, None, None

    page_contents = [doc for i in range(len(pdf_data)) for doc in pdf_data[i]]
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=300)
    documents = text_splitter.split_documents(page_contents)

    embedder = SentenceTransformer('distiluse-base-multilingual-cased-v2')
    pdf_embeddings = embedder.encode([doc.page_content for doc in documents])

    dimension = pdf_embeddings.shape[1]
    index = faiss.IndexFlatIP(dimension)
    
    index.add(pdf_embeddings)
    
    return index, embedder, documents

# Load the model and FAISS index
tokenizer, model = load_pipeline()
index, embedder, documents = load_faiss()
st.session_state.index = index
st.session_state.embedder = embedder
st.session_state.documents = documents

st.title("Medical ChatBot")

if "messages" not in st.session_state:
    st.session_state.messages = []

# Display chat messages from history on rerun
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

def query_faiss(query):
    query_embedding = st.session_state.embedder.encode([query])
    Dis, Ind = st.session_state.index.search(query_embedding, 10)
    retrieved_docs = [st.session_state.documents[i].page_content for i in Ind[0]]
    return retrieved_docs

def format_chat_template(question, retrieved_context):
    chat_template = f"""
Context:
{retrieved_context}

Instructions:
"You are a helpful assistant that is an expert at extracting the most useful information from the given context above. Also bring in extra relevant infromation to the user query from outside the given context if there any."
"if there any refrences or links in the context just ignore it"
"Answer with the same language as the question"
Question:
{question}

Answer:

"""
    return chat_template

def get_response(query):
    retrieved_docs = query_faiss(query)
    if not retrieved_docs:
        return "Sorry, I couldn't retrieve any relevant documents."
    retrieved_context = " ".join(retrieved_docs)
    formatted_input = format_chat_template(query, retrieved_context)
    inputs = tokenizer(formatted_input, return_tensors="pt").to(model.device)
    output = model.generate(**inputs, **generation_args)
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    answer_start = response.find("Answer:") + len("Answer: ")

    only_answer = response[answer_start:].strip()
    
    cleaned_answer = re.sub(r'\s+', ' ', only_answer)
    cleaned_answer = re.sub(r'[^\w\s.,;\[\]\(\)]', '', cleaned_answer)
    sentences = re.findall(r'.*?[.!?]', cleaned_answer)
    if sentences:
        cleaned_answer = ' '.join(sentences)
    
    return cleaned_answer

if prompt := st.chat_input("Ask Anything..."):
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)

    with st.spinner("Thinking..."):
        response = get_response(prompt)
        
    with st.chat_message("assistant"):
        st.markdown(response)

    st.session_state.messages.append({"role": "assistant", "content": response})


Overwriting app.py


In [None]:
!streamlit run /kaggle/working/app.py  & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.19.2.2:8501[0m
[34m  External URL: [0m[1mhttp://35.237.2.123:8501[0m
[0m
your url is: https://petite-squids-drive.loca.lt
The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /root/.cache/huggingface/token
Login successful
Loading checkpoint shards: 100%|██████████████████| 4/4 [00:43<00:00, 10.89s/it]
Batches: 100%|████████████████████████████████| 110/110 [00:07<00:00, 14.75it/s]
The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this func