In [1]:
import streamlit as st
import google.generativeai as genai

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from dotenv import load_dotenv
import os
load_dotenv()
# CONFIGURING GENAI KEY
api_key=os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=api_key)

In [3]:
import fitz
# Function to extract text from a PDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

In [4]:
extracted_data = extract_text_from_pdf(r"C:\Users\ASUS\Documents\GitHub\Physio-Chatbot\Sources\Tidy's Physiotherapy.pdf")

In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
# Function to split the extracted text into chunks
def split_text_into_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = text_splitter.split_text(text)
    return chunks

In [6]:
splitted_chunks=split_text_into_chunks(extracted_data)

In [7]:
for i, chunk in enumerate(splitted_chunks[:5]):
    print(f"Chunk {i}: {chunk[:100]}") 

Chunk 0: Tidy's Physiotherapy
Dedication
To all the physiotherapy students, who have taught me so much.
I tha
Chunk 1: Manchester
UK
Honorary Research Fellow
Wrightington Wigan and Leigh NHS Trust
Wigan
Lancashire
UK
B 
Chunk 2: or by any means, electronic, mechanical, photocopying, recording or otherwise, without either the pr
Chunk 3: complete your request on-line via the Elsevier Science homepage (http://www.elsevier.com), by select
Chunk 4: Notice
Medical knowledge is constantly changing. Standard safety precautions must be followed, but a


In [8]:
def generate_gemini_embeddings(chunks):
    embeddings = []
    for chunk in chunks:
        if chunk.strip():  # Ensure the chunk is not empty
            response = genai.embed_content(
                model="models/text-embedding-004",  # Gemini Pro embedding model
                content=chunk
            )
            
            # Now we directly access 'embedding' as it contains the values directly
            if isinstance(response, dict) and 'embedding' in response:
                embeddings.append(response['embedding'])  # Append the embedding directly
    return embeddings

In [12]:
embedded_chunks=generate_gemini_embeddings(splitted_chunks)

In [9]:
load_dotenv()
papi_key = os.getenv("PINECONE_API_KEY")

In [10]:
from pinecone import Pinecone, ServerlessSpec

# Initialize Pinecone
pc = Pinecone(api_key=papi_key)

index = pc.Index("physio")

In [11]:
def upsert_embeddings_in_batches(text_chunks, embeddings, batch_size=100):
    vectors = []
    
    for i, embedding in enumerate(embeddings):
        # Create metadata for each chunk
        metadata = {"text": text_chunks[i], "source": "your_document_source"}
        vector = {
            "id": f"vec{i}",  # Unique ID for each vector
            "values": embedding,  # The embedding values
            "metadata": metadata  # Metadata for the chunk
        }
        vectors.append(vector)
        
        # Batch upsert every `batch_size` chunks
        if (i + 1) % batch_size == 0 or (i + 1) == len(embeddings):
            index.upsert(vectors=vectors, namespace="ns1")
            vectors = []  # Clear the list after each batch

# Call the function to upsert embeddings in batches
upsert_embeddings_in_batches(splitted_chunks, embedded_chunks, batch_size=100)

NameError: name 'embedded_chunks' is not defined

In [12]:
def generate_query_embedding(query):
    response = genai.embed_content(
        model="models/text-embedding-004",  # Gemini Pro embedding model
        content=query
    )
    
    # Extract and return the embedding from the response
    if 'embedding' in response:
        return response['embedding']
    else:
        raise ValueError(f"Failed to generate embeddings for query: {query}")

In [13]:
def retrieve_relevant_chunks(query_embedding):
    # Search Pinecone index using the query embedding
    query_response = index.query(
    vector=query_embedding,
    top_k=10,
    include_metadata=True,
    namespace="ns1"
)
    
    # Extract relevant chunks from the Pinecone response
    retrieved_chunks = [match['metadata']['text'] for match in query_response['matches']]
    return retrieved_chunks

In [14]:
def query_and_retrieve(query):
    # Step 1: Generate the embedding for the query
    query_embedding = generate_query_embedding(query)
    
    # Step 2: Retrieve relevant chunks using the generated embedding
    retrieved_texts = retrieve_relevant_chunks(query_embedding)
    
    # Step 3: Return the retrieved texts
    return retrieved_texts

# Example Usage
query = "What are the symptoms of anxiety?"
retrieved_texts = query_and_retrieve(query)

print(retrieved_texts)

['355\nSymptoms of Coronary Heart\nDisease\nAn angina attack is characterised by the sensation of\nsevere pain and heaviness or tightness behind the ster-\nnum. Pain radiating to the arms, neck, jaw, back or\nstomach is also common. One of the more common\ncauses of angina is physical activity, particularly after a\nlarge meal. However, sufferers can also get an attack\nduring other forms of stress, or even whilst resting or\nasleep.\nUnlike a transient ischaemic attack (TIA), a myocar-\ndial infarction', 'tightness, dryness or irritation in the upper respiratory\ntract. Attacks tend to be episodic, often occurring sev-\neral times a year. Their duration varies from a few sec-\nonds to many months and the severity may be anything\nfrom mild wheezing to great distress. The most pre-\ndominant features are summarised below.\nWheeze and dyspnoea\nDyspnoea may be intense and chiefly occurs on expira-\ntion, which becomes a conscious exhausting effort with', 'and tendon jerks, probably due 

In [15]:
# Function to use Gemini Pro LLM to generate the final answer
def generate_answer_with_gemini(query, retrieved_chunks):
    # Combine the retrieved chunks into a single context
    context = "\n".join(retrieved_chunks)

    # Crafting the prompt for PhysioBot
    prompt = f"""
    You are a PhysioBOT and you have been asked to provide information from the sources you have read : {context}
    A user asks you: "{query}"
    you must answer in a very friendly and informative way and you must provide the answer in detail and in a way that is easy to understand.
    Based on your knowledge, you provide the following answer:
    """

    model = genai.GenerativeModel('gemini-pro')
    response = model.generate_content([prompt])
    return response.text


In [17]:
# Main function to handle user input and process the query through Physio-BOT
def main():
    # Prompt user for input
    query = input("What would you like to ask Physio-BOT? ")

    print("Step 1: Embedding the query...")
    # Step 1: Embed the query using Gemini Pro
    query_embedding = generate_query_embedding(query)
    print("Step 1 complete.")

    print("Step 2: Retrieving relevant chunks from Pinecone...")
    # Step 2: Retrieve relevant chunks from Pinecone based on query embedding
    retrieved_chunks = retrieve_relevant_chunks(query_embedding)
    print(retrieved_chunks)
    print("Step 2 complete.")

    print("Step 3: Generating an answer using Gemini Pro LLM...")
    # Step 3: Generate an answer using Gemini Pro LLM with the retrieved chunks
    answer = generate_answer_with_gemini(query, retrieved_chunks)
    print("Step 3 complete.")


    # Display the final answer to the user
    print("\nPhysio-BOT's Answer:")
    print(answer)

# Run the main function
if __name__ == "__main__":
    main()

Step 1: Embedding the query...
Step 1 complete.
Step 2: Retrieving relevant chunks from Pinecone...
['Figure 2.34 (a) Resisted lateral rotation at the shoulder. (b) Resisted medial rotation at the shoulder\nmedial rotation at the shoulder.\nFigure 2.33 Quadrant test of the shoulder\n52\nrotation and slight flexion. The physiotherapist grasps\nthe humeral head with one hand, and the medial hand\nis used to stabilise the shoulder girdle. The lateral hand\napplies the anterior translation force in the same way as\nthe anterior draw test of the knee. Laxity of the joint', 'degrees; internal and external rotation beside the trunk\nand at 90 degrees of elevation in the plane of the\nscapula; and resisted muscle testing in positions of\nfunction and/or pain.\nOther shoulder joint abnormalities\nSporting activities that give rise to symptoms, such\nas the late cocking stage of throwing a ball over-\nhead, should also be assessed to determine faulty\nmechanics.\nMuscle length tests\nIt may be u

In [1]:
import streamlit as st

def main():
    st.title("Physio-BOT")
    st.write("Ask me anything related to physiotherapy!")

    user_query = st.text_input("Enter your question here:")

    if st.button("Get Answer"):
        if user_query.strip() == "":
            st.warning("Please enter a question.")
        else:
            # Step 1: Embed the query
            query_embedding = generate_query_embedding(user_query)

            # Step 2: Retrieve relevant chunks
            retrieved_chunks = retrieve_relevant_chunks(query_embedding)

            # Step 3: Generate an answer with the Gemini model
            answer = generate_answer_with_gemini(user_query, retrieved_chunks)

            # Display the answer in the Streamlit app
            st.subheader("Physio-BOT's Answer:")
            st.write(answer)

            # (Optional) If you want to see the chunks that were retrieved:
            # st.subheader("Retrieved Chunks for Debugging:")
            # for idx, chunk in enumerate(retrieved_chunks, start=1):
            #    st.markdown(f"*Chunk {idx}:*")
            #    st.write(chunk)

if __name__ == "__main__":
    main()

2025-01-12 15:06:18.318 
  command:

    streamlit run c:\Users\ASUS\Documents\GitHub\Physio-Chatbot\physioenv\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-01-12 15:06:18.323 Session state does not function when running a script without `streamlit run`
