In [79]:
import streamlit as st
import google.generativeai as genai

In [80]:
from dotenv import load_dotenv
import os
load_dotenv()
# CONFIGURING GENAI KEY
api_key=os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=api_key)

In [81]:
models = genai.list_models()

for model in models:
    print(model.name)

models/chat-bison-001
models/text-bison-001
models/embedding-gecko-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-exp-image-generation
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.0-flash-thinking-exp-01-21
models/gemini-2.0-flash-thinking-exp
models/gemini-2.0-flash-thi

In [82]:
import fitz
# Function to extract text from a PDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

In [83]:
extracted_data = extract_text_from_pdf(r"C:\Users\ASUS\Documents\GitHub\Physio-Chatbot\Sources\Tidy's Physiotherapy.pdf")

In [84]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
# Function to split the extracted text into chunks
def split_text_into_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = text_splitter.split_text(text)
    return chunks

In [85]:
splitted_chunks=split_text_into_chunks(extracted_data)

In [86]:
for i, chunk in enumerate(splitted_chunks[:5]):
    print(f"Chunk {i}: {chunk[:100]}") 

Chunk 0: Tidy's Physiotherapy
Dedication
To all the physiotherapy students, who have taught me so much.
I tha
Chunk 1: Manchester
UK
Honorary Research Fellow
Wrightington Wigan and Leigh NHS Trust
Wigan
Lancashire
UK
B 
Chunk 2: or by any means, electronic, mechanical, photocopying, recording or otherwise, without either the pr
Chunk 3: complete your request on-line via the Elsevier Science homepage (http://www.elsevier.com), by select
Chunk 4: Notice
Medical knowledge is constantly changing. Standard safety precautions must be followed, but a


In [96]:
# Function to generate embeddings for the text chunks
def generate_gemini_embeddings(chunks):
    """
    Generates embeddings for given text chunks using an appropriate Gemini embedding model.
    """
    embeddings = []
    for chunk in chunks:
        if chunk.strip():  # Ensure the chunk is not empty
            response = genai.embed_content(
                model="models/gemini-embedding-exp", 
                content=chunk[:500]
            )
            
            # Directly extract the embedding
            if isinstance(response, dict) and 'embedding' in response:
                embeddings.append(response['embedding'])
    
    return embeddings


In [97]:
embedded_chunks=generate_gemini_embeddings(splitted_chunks)

InternalServerError: 500 An internal error has occurred. Please retry or report in https://developers.generativeai.google/guide/troubleshooting

In [45]:
load_dotenv()
papi_key = os.getenv("PINECONE_API_KEY")

In [47]:
from pinecone import Pinecone, ServerlessSpec

# Initialize Pinecone
pc = Pinecone(api_key=papi_key)

index = pc.Index("physio")

In [50]:
def upsert_embeddings_in_batches(text_chunks, embeddings, batch_size=100):
    vectors = []
    
    for i, embedding in enumerate(embeddings):
        # Create metadata for each chunk
        metadata = {"text": text_chunks[i], "source": "your_document_source"}
        vector = {
            "id": f"vec{i}",  # Unique ID for each vector
            "values": embedding,  # The embedding values
            "metadata": metadata  # Metadata for the chunk
        }
        vectors.append(vector)
        
        # Batch upsert every `batch_size` chunks
        if (i + 1) % batch_size == 0 or (i + 1) == len(embeddings):
            index.upsert(vectors=vectors, namespace="ns1")
            vectors = []  # Clear the list after each batch

# Call the function to upsert embeddings in batches
upsert_embeddings_in_batches(splitted_chunks, embedded_chunks, batch_size=100)

In [63]:
def generate_query_embedding(query):
    response = genai.embed_content(
        model="models/embedding-gecko-001",  # Gemini Pro embedding model
        content=query
    )
    
    # Extract and return the embedding from the response
    if 'embedding' in response:
        return response['embedding']
    else:
        raise ValueError(f"Failed to generate embeddings for query: {query}")

In [52]:
def retrieve_relevant_chunks(query_embedding):
    # Search Pinecone index using the query embedding
    query_response = index.query(
    vector=query_embedding,
    top_k=10,
    include_metadata=True,
    namespace="ns1"
)
    
    # Extract relevant chunks from the Pinecone response
    retrieved_chunks = [match['metadata']['text'] for match in query_response['matches']]
    return retrieved_chunks

In [53]:
def query_and_retrieve(query):
    # Step 1: Generate the embedding for the query
    query_embedding = generate_query_embedding(query)
    
    # Step 2: Retrieve relevant chunks using the generated embedding
    retrieved_texts = retrieve_relevant_chunks(query_embedding)
    
    # Step 3: Return the retrieved texts
    return retrieved_texts

# Example Usage
query = "What are the symptoms of anxiety?"
retrieved_texts = query_and_retrieve(query)

print(retrieved_texts)

['355\nSymptoms of Coronary Heart\nDisease\nAn angina attack is characterised by the sensation of\nsevere pain and heaviness or tightness behind the ster-\nnum. Pain radiating to the arms, neck, jaw, back or\nstomach is also common. One of the more common\ncauses of angina is physical activity, particularly after a\nlarge meal. However, sufferers can also get an attack\nduring other forms of stress, or even whilst resting or\nasleep.\nUnlike a transient ischaemic attack (TIA), a myocar-\ndial infarction', 'tightness, dryness or irritation in the upper respiratory\ntract. Attacks tend to be episodic, often occurring sev-\neral times a year. Their duration varies from a few sec-\nonds to many months and the severity may be anything\nfrom mild wheezing to great distress. The most pre-\ndominant features are summarised below.\nWheeze and dyspnoea\nDyspnoea may be intense and chiefly occurs on expira-\ntion, which becomes a conscious exhausting effort with', 'and tendon jerks, probably due 

In [None]:
# Function to use Gemini Pro LLM to generate the final answer
def generate_answer_with_gemini(query, retrieved_chunks):
    # Combine the retrieved chunks into a single context
    context = "\n".join(retrieved_chunks)

    # Crafting the prompt for PhysioBot
    prompt = f"""
    You are a PhysioBOT and you have been asked to provide information from the sources you have read : {context}
    A user asks you: "{query}"
    you must answer in a very friendly and informative way and you must provide the answer in detail and in a way that is easy to understand.
    you must give detailed information about the query and provide a detailed answer and also keep in mind that you are not an expert physiotherapist but a chatbot. so end the answer by saying that the user should consult a physiotherapist for more information.
    Based on your knowledge, you provide the following answer:
    """

    model = genai.GenerativeModel('models/gemini-1.5-pro-latest')
    response = model.generate_content([prompt])
    return response.text


In [64]:
# Main function to handle user input and process the query through GODAI
def main():
    # Prompt user for input
    query = input("What would you like to ask PhysioBOT? ")

    # Step 1: Embed the query using Gemini Pro
    query_embedding = generate_query_embedding(query)

    # Step 2: Retrieve relevant chunks from Pinecone based on query embedding
    retrieved_chunks = retrieve_relevant_chunks(query_embedding)

    # Step 3: Generate an answer using Gemini Pro LLM with the retrieved chunks
    answer = generate_answer_with_gemini(query, retrieved_chunks)

    # Display the final answer to the user
    print("\nPhysioBOTS's Answer:")
    print(answer)

# Run the main function
if __name__ == "__main__":
    main()

NotFound: 404 models/embedding-gecko-001 is not found for API version v1beta, or is not supported for embedContent. Call ListModels to see the list of available models and their supported methods.