In [None]:
import streamlit as st
import google.generativeai as genai

In [None]:
from dotenv import load_dotenv
import os
load_dotenv()
# CONFIGURING GENAI KEY
api_key=os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=api_key)

In [None]:
models = genai.list_models()

for model in models:
    print(model.name)

In [None]:
import fitz
# Function to extract text from a PDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

In [None]:
extracted_data = extract_text_from_pdf(r"C:\Users\ASUS\Documents\GitHub\Physio-Chatbot\Sources\Tidy's Physiotherapy.pdf")

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
# Function to split the extracted text into chunks
def split_text_into_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
    chunks = text_splitter.split_text(text)
    return chunks

In [None]:
splitted_chunks=split_text_into_chunks(extracted_data)

In [None]:
for i, chunk in enumerate(splitted_chunks[:5]):
    print(f"Chunk {i}: {chunk[:100]}") 

In [None]:
def generate_query_embedding(query):
    response = genai.embed_content(
        model="models/text-embedding-004",  # Replace with a valid embedding model
        content=query
    )

    if isinstance(response, dict) and 'embedding' in response:
        return response['embedding']
    else:
        raise ValueError(f"Failed to generate embeddings for query: {query}")



In [None]:
# Function to generate embeddings for the text chunks
def generate_gemini_embeddings(chunks):
    """
    Generates embeddings for given text chunks using an appropriate Gemini embedding model.
    """
    embeddings = []
    for chunk in chunks:
        if chunk.strip():  # Ensure the chunk is not empty
            response = genai.embed_content(
                model="models/text-embedding-004", 
                content=chunk[:500]
            )
            
            # Directly extract the embedding
            if isinstance(response, dict) and 'embedding' in response:
                embeddings.append(response['embedding'])
    
    return embeddings

In [None]:
embedded_chunks=generate_gemini_embeddings(splitted_chunks)

In [None]:
load_dotenv()
papi_key = os.getenv("PINECONE_API_KEY")

In [None]:
from pinecone import Pinecone, ServerlessSpec

# Initialize Pinecone
pc = Pinecone(api_key=papi_key)

index = pc.Index("physio")

In [None]:
def upsert_embeddings_in_batches(text_chunks, embeddings, batch_size=100):
    vectors = []
    
    for i, embedding in enumerate(embeddings):
        # Create metadata for each chunk
        metadata = {"text": text_chunks[i], "source": "your_document_source"}
        vector = {
            "id": f"vec{i}",  # Unique ID for each vector
            "values": embedding,  # The embedding values
            "metadata": metadata  # Metadata for the chunk
        }
        vectors.append(vector)
        
        # Batch upsert every `batch_size` chunks
        if (i + 1) % batch_size == 0 or (i + 1) == len(embeddings):
            index.upsert(vectors=vectors, namespace="ns1")
            vectors = []  # Clear the list after each batch

# Call the function to upsert embeddings in batches
upsert_embeddings_in_batches(splitted_chunks, embedded_chunks, batch_size=100)

In [None]:
def generate_query_embedding(query):
    response = genai.embed_content(
        model="models/text-embedding-004",  # Replace with a valid embedding model
        content=query
    )

    if isinstance(response, dict) and 'embedding' in response:
        return response['embedding']
    else:
        raise ValueError(f"Failed to generate embeddings for query: {query}")


In [None]:
def retrieve_relevant_chunks(query_embedding):
    query_response = index.query(
        vector=query_embedding,
        top_k=10,
        include_metadata=True,
        namespace="ns1"
    )
    
    if "matches" not in query_response or not query_response["matches"]:
        return ["No relevant information found. Try rephrasing your query."]
    
    retrieved_chunks = [match['metadata']['text'] for match in query_response['matches']]
    return retrieved_chunks


In [None]:
def query_and_retrieve(query):
    # Step 1: Generate the embedding for the query
    query_embedding = generate_query_embedding(query)
    
    # Step 2: Retrieve relevant chunks using the generated embedding
    retrieved_texts = retrieve_relevant_chunks(query_embedding)
    
    # Step 3: Return the retrieved texts
    return retrieved_texts

# Example Usage
query = "What are the symptoms of anxiety?"
retrieved_texts = query_and_retrieve(query)

print(retrieved_texts)

In [None]:
# Function to use Gemini Pro LLM to generate the final answer
def generate_answer_with_gemini(query, retrieved_chunks):
    # Combine the retrieved chunks into a single context
    context = "\n".join(retrieved_chunks)

    # Crafting the prompt for PhysioBot
    prompt = f"""
    You are a PhysioBOT and you have been asked to provide information from the sources you have read : {context}
    A user asks you: "{query}"
    you must answer in a very friendly and informative way and you must provide the answer in detail and in a way that is easy to understand.
    you must give detailed information about the query and provide a detailed answer and also keep in mind that you are not an expert physiotherapist but a chatbot. so end the answer by saying that the user should consult a physiotherapist for more information.
    Based on your knowledge, you provide the following answer:
    """

    model = genai.GenerativeModel('models/gemini-1.5-pro-latest')
    response = model.generate_content([prompt])
    return response.text


In [None]:
# Main function to handle user input and process the query through GODAI
def main():
    # Prompt user for input
    query = input("What would you like to ask PhysioBOT? ")

    # Step 1: Embed the query using Gemini Pro
    query_embedding = generate_query_embedding(query)

    # Step 2: Retrieve relevant chunks from Pinecone based on query embedding
    retrieved_chunks = retrieve_relevant_chunks(query_embedding)

    # Step 3: Generate an answer using Gemini Pro LLM with the retrieved chunks
    answer = generate_answer_with_gemini(query, retrieved_chunks)

    # Display the final answer to the user
    print("\nPhysioBOTS's Answer:")
    print(answer)

# Run the main function
if __name__ == "__main__":
    main()