In [3]:
import streamlit as st
import google.generativeai as genai

  from .autonotebook import tqdm as notebook_tqdm


In [15]:
from dotenv import load_dotenv
import os
load_dotenv()
load_dotenv(dotenv_path=r'C:\Users\ASUS\Documents\GitHub\Physio-Chatbot\.env')
# CONFIGURING GENAI KEY
genai.configure(api_key="AIzaSyBQYr-DYYiblGp4SwDGUgQ0oVN8lWlNgb4")
api_key=os.getenv("GOOGLE_API_KEY")

In [16]:
print(api_key)

AIzaSyBQYr-DYYiblGp4SwDGUgQ0oVN8lWlNgb4


In [6]:
import fitz
# Function to extract text from a PDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

In [7]:
extracted_data = extract_text_from_pdf(r"C:\Users\ASUS\Documents\GitHub\Physio-Chatbot\Sources\Tidy's Physiotherapy.pdf")

In [8]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
# Function to split the extracted text into chunks
def split_text_into_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = text_splitter.split_text(text)
    return chunks

In [9]:
splitted_chunks=split_text_into_chunks(extracted_data)

In [10]:
for i, chunk in enumerate(splitted_chunks[:5]):
    print(f"Chunk {i}: {chunk[:100]}") 

Chunk 0: Tidy's Physiotherapy
Dedication
To all the physiotherapy students, who have taught me so much.
I tha
Chunk 1: Manchester
UK
Honorary Research Fellow
Wrightington Wigan and Leigh NHS Trust
Wigan
Lancashire
UK
B 
Chunk 2: or by any means, electronic, mechanical, photocopying, recording or otherwise, without either the pr
Chunk 3: complete your request on-line via the Elsevier Science homepage (http://www.elsevier.com), by select
Chunk 4: Notice
Medical knowledge is constantly changing. Standard safety precautions must be followed, but a


In [11]:
def generate_gemini_embeddings(chunks):
    embeddings = []
    for chunk in chunks:
        if chunk.strip():  # Ensure the chunk is not empty
            response = genai.embed_content(
                model="models/text-embedding-004",  # Gemini Pro embedding model
                content=chunk
            )
            
            # Now we directly access 'embedding' as it contains the values directly
            if isinstance(response, dict) and 'embedding' in response:
                embeddings.append(response['embedding'])  # Append the embedding directly
    return embeddings

In [12]:
embedded_chunks=generate_gemini_embeddings(splitted_chunks)

In [13]:
load_dotenv()
load_dotenv(dotenv_path=r'C:\Users\ASUS\Documents\GitHub\Physio-Chatbot\.env')
papi_key = os.getenv("PINECONE_API_KEY")
print(papi_key)

pcsk_587F7D_GW5VmUoxCvHN1LjaaU4idQXDaYJX6EJqbTQMjcTyds7rbPxDkHav8YJAqPDY7BM


In [21]:
from pinecone import Pinecone, ServerlessSpec

# Initialize Pinecone
pc = Pinecone(api_key=papi_key)

index = pc.Index("physio")

In [23]:
def upsert_embeddings_in_batches(text_chunks, embeddings, batch_size=100):
    vectors = []
    
    for i, embedding in enumerate(embeddings):
        # Create metadata for each chunk
        metadata = {"text": text_chunks[i], "source": "your_document_source"}
        vector = {
            "id": f"vec{i}",  # Unique ID for each vector
            "values": embedding,  # The embedding values
            "metadata": metadata  # Metadata for the chunk
        }
        vectors.append(vector)
        
        # Batch upsert every `batch_size` chunks
        if (i + 1) % batch_size == 0 or (i + 1) == len(embeddings):
            index.upsert(vectors=vectors, namespace="ns1")
            vectors = []  # Clear the list after each batch

# Call the function to upsert embeddings in batches
upsert_embeddings_in_batches(splitted_chunks, embedded_chunks, batch_size=100)

In [24]:
def generate_query_embedding(query):
    response = genai.embed_content(
        model="models/text-embedding-004",  # Gemini Pro embedding model
        content=query
    )
    
    # Extract and return the embedding from the response
    if 'embedding' in response:
        return response['embedding']
    else:
        raise ValueError(f"Failed to generate embeddings for query: {query}")

In [31]:
def retrieve_relevant_chunks(query_embedding):
    # Search Pinecone index using the query embedding
    query_response = index.query(
        vector=query_embedding,
        top_k=2,  # Adjust the number of top results you want
        include_metadata=True  # Return metadata (e.g., source information) along with vectors
    )
    
    # Extract relevant chunks from the Pinecone response
    retrieved_chunks = [match['metadata']['text'] for match in query_response['matches']]
    return retrieved_chunks

In [29]:
# Function to use Gemini Pro LLM to generate the final answer
def generate_answer_with_gemini(query, retrieved_chunks):
    # Combine the retrieved chunks into a single context
    context = "\n".join(retrieved_chunks)

    # Crafting the prompt for PhysioBot
    prompt = f"""
    You are a PhysioBOT and you have been asked to provide information from the sources you have read : {context}
    A user asks you: "{query}"
    you must answer in a very friendly and informative way 
    Based on your knowledge, you provide the following answer:
    """

    model = genai.GenerativeModel('gemini-pro')
    response = model.generate_content([prompt])
    return response.text


In [28]:
# Main function to handle user input and process the query through Physio-BOT
def main():
    # Prompt user for input
    query = input("What would you like to ask Physio-BOT? ")

    print("Step 1: Embedding the query...")
    # Step 1: Embed the query using Gemini Pro
    query_embedding = generate_query_embedding(query)
    print("Step 1 complete.")

    print("Step 2: Retrieving relevant chunks from Pinecone...")
    # Step 2: Retrieve relevant chunks from Pinecone based on query embedding
    retrieved_chunks = retrieve_relevant_chunks(query_embedding)
    print("Step 2 complete.")

    print("Step 3: Generating an answer using Gemini Pro LLM...")
    # Step 3: Generate an answer using Gemini Pro LLM with the retrieved chunks
    answer = generate_answer_with_gemini(query, retrieved_chunks)
    print("Step 3 complete.")


    # Display the final answer to the user
    print("\nPhysio-BOT's Answer:")
    print(answer)

# Run the main function
if __name__ == "__main__":
    main()

Step 1: Embedding the query...
Step 1 complete.
Step 2: Retrieving relevant chunks from Pinecone...
Step 2 complete.
Step 3: Generating an answer using Gemini Pro LLM...
Step 3 complete.

Physio-BOT's Answer:
**Possible Reasons for Back Pain**

Back pain is a common condition that can have a variety of causes. Some of the most common causes include:

* **Muscle strain or sprain:** This is the most common cause of back pain. It can occur when you lift something heavy incorrectly, twist your back awkwardly, or overuse your back muscles.
* **Herniated disc:** This occurs when the soft, jelly-like center of an intervertebral disc pushes through the tough outer layer of the disc. This can put pressure on the nerves in your spinal cord, causing pain, numbness, and weakness.
* **Sciatica:** This is a type of back pain that radiates down the sciatic nerve, which runs from your lower back through your buttocks and down the back of your leg. It can be caused by a herniated disc, spinal stenosis,