### **Installation of required libraries**



In [2]:
!pip install streamlit
!pip install langchain
!pip install langchain_community
!pip install langchain_huggingface
!pip install transformers
!pip install torch
!pip install pypdf

Collecting streamlit
  Downloading streamlit-1.45.1-py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.45.1-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m49.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m63.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hInst

In [3]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl (31.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m32.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.11.0


### **Importing libraries**

In [4]:
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline
from langchain import PromptTemplate
import torch

### Creating database to store memory

In [5]:
def create_db():
    loader =  DirectoryLoader(glob="*.pdf", loader_cls=PyPDFLoader)
    data = loader.load()
    textsplitter = RecursiveCharacterTextSplitter(chunk_size = 500,
                                              chunk_overlap=50)
    data_chunks = textsplitter.split_documents(data)
    embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    database = FAISS.from_documents(data_chunks, embedding_model)
    database.save_local("storedata/db_faiss")


In [6]:
@st.cache_resource
def get_embeddings_model():
    return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

@st.cache_resource
def get_database():
    embedding_model = get_embeddings_model()
    faiss_index_path = os.path.join(os.path.dirname(__file__), "storedata", "db_faiss")
    database = FAISS.load_local(faiss_index_path, embedding_model, allow_dangerous_deserialization=True)
    return database



Prompt to feed to the pre-trained model

In [7]:
def set_custom_prompt(custom_prompt_template):
    prompt=PromptTemplate(template=custom_prompt_template, input_variables=["context", "question"])
    return prompt

### **Model training**

In [8]:
@st.cache_resource
def llm():
    model_name = "google/flan-t5-small"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = T5ForConditionalGeneration.from_pretrained(model_name).to("cuda" if torch.cuda.is_available() else "cpu")
    device = 0 if torch.cuda.is_available() else -1
    pipe = pipeline("text-generation",
                    model=model,
                    tokenizer= tokenizer,
                    temperature=0.7,
                    max_new_tokens=64,
                    do_sample=True,
                    top_p=0.8)

    llm_to_use = HuggingFacePipeline(pipeline=pipe)
    return llm_to_use

In [9]:
@st.cache_resource
def get_qa_chain():
    db = get_database()
    prompt_template = '''
    Use the information in the context to answer the question. Only give direct response and answer only if you know, else say "I dont know"
    Give a 2 sentence answer only
    context: {context}
    question: {question}
    answer:
    '''
    return RetrievalQA.from_chain_type(
        llm=llm(),
        chain_type="stuff",
        retriever=db.as_retriever(search_kwargs={"k": 5}),
        chain_type_kwargs={"prompt": set_custom_prompt(prompt_template)}
    )


### Formatting of the answer recieved

In [45]:

def format_answer(text):
    # Remove bullet points
    text = text.replace("- ", "").replace("* ", "")

    # Remove repeated sentences (basic example)
    sentences = text.split('. ')
    seen = set()
    unique_sentences = []
    for sentence in sentences:
        if sentence.strip() and sentence not in seen:
            unique_sentences.append(sentence)
            seen.add(sentence)
    return '. '.join(unique_sentences)


# **Execution**

In [1]:
!huggingface-cli login



    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) Y
Token is valid (permission: read).
The token `testing` has been saved to /root/.cache/huggingface/stored_tokens
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticat

In [46]:

def main():
    st.title('''Hello! This is your MediGuide :)
             Ask any question and I will answer it.''')

    if "messages" not in st.session_state:
        st.session_state.messages = []

    for message in st.session_state.messages:
        st.chat_message(message["role"]).markdown(message["content"])


    prompt = st.chat_input("Your Question:")

    if prompt:
        st.chat_message("user").markdown(prompt)
        st.session_state.messages.append({"role":"user", "content": prompt})
        try:
            qa_chain = get_qa_chain()
            response = qa_chain.invoke({"query": prompt})
            result = response["result"]
            #answer = response["result"].partition("answer:")
            #ans_to_edit = answer[-1].strip()
            answer = response["result"].lower().partition("answer:")
            ans_to_edit = response["result"][response["result"].lower().find("answer:")+7:].strip()
            ans_formatted = format_answer(ans_to_edit)

            st.chat_message("assistant").markdown(ans_formatted)
            st.session_state.messages.append({"role":"assistant", "content": ans_to_edit})
        except Exception as e:
            st.error(f"Error: {str(e)}")
if __name__ == "__main__":
    main()



In [47]:
!ngrok config add-authtoken 2xjmBnZA0K7pHZ4rf2eJf4IMyys_798XTxGpKC4XCySoE4uWq

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [48]:
!pip install -q streamlit pyngrok
!pip install streamlit



In [49]:
%%writefile app.py
import streamlit as st

st.title("Streamlit in Colab")
st.write("Hello from Google Colab!")

Overwriting app.py


In [50]:
from pyngrok import ngrok
import threading
import time
import os


ngrok.kill()

def run():
    os.system("streamlit run infra.py")

thread = threading.Thread(target=run)
thread.start()

time.sleep(5)

public_url = ngrok.connect(8501)
print("Streamlit app is live at:", public_url)



Streamlit app is live at: NgrokTunnel: "https://b96b-34-74-102-211.ngrok-free.app" -> "http://localhost:8501"
