In [10]:
import google.generativeai as genai

## Key Configuration

In [2]:
from dotenv import load_dotenv
import os

In [4]:
load_dotenv()
google_key=os.getenv('GOOGLE_API_KEY')
genai.configure(api_key=google_key)


## Reading and Storing the Document

In [15]:
import fitz

In [16]:
# Function to merge multiple PDFs into one
def merge_pdfs_from_folder(folder_path, output_pdf_path):
    """
    Merges all PDFs in the specified folder into a single PDF.
    
    Args:
        folder_path (str): Path to the folder containing PDFs.
        output_pdf_path (str): Path to save the merged PDF.
        
    Returns:
        str: Path to the merged PDF.
    """
    # Create a new empty PDF document
    output_pdf = fitz.open()

    # Loop through all files in the folder
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".pdf"):  # Only consider PDF files
            file_path = os.path.join(folder_path, file_name)
            input_pdf = fitz.open(file_path)  # Open the PDF
            output_pdf.insert_pdf(input_pdf)  # Insert pages into the output PDF
            input_pdf.close()  # Close the input PDF after merging

    # Save the merged PDF
    output_pdf.save(output_pdf_path)
    output_pdf.close()

    return output_pdf_path

In [31]:
final_pdf=merge_pdfs_from_folder("Empath-AI/Data/Mental_Health-PDFS","Empath-AI/Data/Final.pdf")

In [32]:
# Function to extract text from a PDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

In [34]:
extracted_text=extract_text_from_pdf("Empath-AI/Data/Final.pdf")

In [39]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

def split_text_into_chunks(text):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=50)
    chunks = text_splitter.split_text(text)
    return chunks



In [40]:
extracted_chunks=split_text_into_chunks(extracted_text)

In [42]:
for i, chunk in enumerate(extracted_chunks[:5]):
    print(f"Chunk {i}: {chunk[:200]}") 

Chunk 0: MENTAL 
HEALTH CARE
 in Settings Where Mental Health 
Resources Are Limited
An Easy-Reference 
GUIDEBOOK 
for Healthcare Providers
 in Developed and Developing Countries
PA M E L A  S M I T H ,  M D
 
Chunk 1: An Easy-Reference Guidebook for Healthcare Providers 
in Developed and Developing Countries
 Copyright © 2014 Pamela Smith.
All rights reserved. No part of this book may be used or reproduced by any m
Chunk 2: Archway Publishing books may be ordered through booksellers or by contacting:
Archway Publishing
1663 Liberty Drive
Bloomington, IN 47403
www.archwaypublishing.com
1-(888)-242-5904
Because of the dyna
Chunk 3: views of the publisher, and the publisher hereby disclaims any responsibility for them.
The  eld guide is not a substitute for comprehensive psychiatry, psychology, or 
other related mental health te
Chunk 4: describe generally accepted practices. Application of this information in a particular 
situation remains the responsibility of the practitioner or hea

In [43]:
def generate_gemini_embeddings(chunks):
    embeddings = []
    for chunk in chunks:
        if chunk.strip():  # Ensure the chunk is not empty
            response = genai.embed_content(
                model="models/text-embedding-004",  # Gemini Pro embedding model
                content=chunk
            )
            
            # Now we directly access 'embedding' as it contains the values directly
            if isinstance(response, dict) and 'embedding' in response:
                embeddings.append(response['embedding'])  # Append the embedding directly
    return embeddings

In [44]:
embedded_data=generate_gemini_embeddings(extracted_chunks)

In [46]:
len(embedded_data[7])

768

In [3]:
load_dotenv()
pinecone_key=os.getenv("PINECONE_API_KEY")

In [4]:
from pinecone import Pinecone
pc=Pinecone(api_key=pinecone_key)
index=pc.Index("empath-ai")

  from tqdm.autonotebook import tqdm


In [5]:
def upsert_embeddings_in_batches(text_chunks, embeddings, batch_size=100):
    vectors = []
    
    for i, embedding in enumerate(embeddings):
        # Create metadata for each chunk
        metadata = {"text": text_chunks[i], "source": "your_document_source"}
        vector = {
            "id": f"vec{i}",  # Unique ID for each vector
            "values": embedding,  # The embedding values
            "metadata": metadata  # Metadata for the chunk
        }
        vectors.append(vector)
        
        # Batch upsert every `batch_size` chunks
        if (i + 1) % batch_size == 0 or (i + 1) == len(embeddings):
            index.upsert(vectors=vectors, namespace="ns1")
            vectors = []  # Clear the list after each batch



In [50]:
# Call the function to upsert embeddings in batches
upsert_embeddings_in_batches(extracted_chunks, embedded_data, batch_size=100)

## RAG Model

In [6]:
def generate_query_embedding(query):
    response = genai.embed_content(
        model="models/text-embedding-004",  # Gemini Pro embedding model
        content=query
    )
    
    # Extract and return the embedding from the response
    if 'embedding' in response:
        return response['embedding']
    else:
        raise ValueError(f"Failed to generate embeddings for query: {query}")

In [12]:
def retrieve_relevant_chunks(query_embedding):
    # Search Pinecone index using the query embedding
    query_response = index.query(
        vector=query_embedding,
        top_k=2,  # Adjust the number of top results you want
        include_metadata=True,  # Return metadata (e.g., source information) along with vectors
        namespace="ns1" 
    )
    
    # Extract relevant chunks from the Pinecone response
    retrieved_chunks = [match['metadata']['text'] for match in query_response['matches']]

    
    return retrieved_chunks

In [None]:
# def query_and_retrieve(query):
#     # Step 1: Generate the embedding for the query
#     query_embedding = generate_query_embedding(query)
    
#     # Step 2: Retrieve relevant chunks using the generated embedding
#     retrieved_texts = retrieve_relevant_chunks(query_embedding)
    
#     # Step 3: Return the retrieved texts
#     return retrieved_texts

# # Example Usage
# query = "What are the symptoms of anxiety?"
# retrieved_texts = query_and_retrieve(query)

In [15]:
# Function to use Gemini Pro LLM to generate the final answer
def generate_answer_with_gemini(query, retrieved_chunks):
    # Combine the retrieved chunks into a single context
    context = "\n".join(retrieved_chunks)
    
    # Crafting the prompt for GODAI
    prompt = f"""
    You are EMPATH-AI, created by Soham and Priya, designed to offer personalized emotional support based on a detailed assessment 
    of the user's emotional and mental health state. 
    Your tone should feel like that of a caring and close friend—warm, understanding, and encouraging. Be specific and personal when providing advice, referencing practical steps the user can take. 

    If suicidal thoughts are detected, prioritize providing immediate comfort and specific, actionable steps. This might include grounding techniques like deep breathing or asking the user to reach out to someone they trust. 

    When referring to coping strategies, include specific suggestions. For example:
    - "Studies show that deep breathing exercises for 5 minutes a day can significantly reduce anxiety. How about giving it a try? Here’s a simple breathing technique you can follow: breathe in for 4 seconds, hold for 4, and breathe out for 4."
    - "Experts recommend creating small goals, like getting up at the same time each day or setting a bedtime, to help improve mood and reduce anxiety. You could try this by setting a 10 PM bedtime as a start."

    When relevant, refer to **specific research or studies**. For example:
    - "According to a study published in the Journal of Clinical Psychology, practicing gratitude for just 5 minutes a day can help improve mood. Maybe try writing down three things you’re grateful for each morning."
    - "Research shows that regular exercise, even just 20 minutes of walking, has been linked to reducing feelings of anxiety and depression. How about we set a small goal for today—maybe a quick walk around the block?"

    Always provide easy-to-implement actions, and make sure your suggestions feel supportive and conversational. Your goal is to provide practical steps they can easily follow, while making them feel understood and supported.

    

    The following is the input that includes the detailed emotional assessment and explanations provided by the first model:
    "{query}"

    Now, based on the retrieved information from some of the psychological books provided below and the user's current emotional state, offer a friendly, specific, and supportive response. Include research-backed suggestions where appropriate and provide actionable steps the user can take to feel better:
    RETRIEVED INFORMATION: {context}
    """


    model = genai.GenerativeModel('gemini-pro')
    response = model.generate_content([prompt])
    return response.text


In [18]:
# Main function to handle user input and process the query through GODAI
def main():
    # Prompt user for input
    query = input("Ask Empath-AI")

    # Step 1: Embed the query using Gemini Pro
    query_embedding = generate_query_embedding(query)

    # Step 2: Retrieve relevant chunks from Pinecone based on query embedding
    retrieved_chunks = retrieve_relevant_chunks(query_embedding)
    print("Retrived Chunks:",retrieved_chunks)

    # Step 3: Generate an answer using Gemini Pro LLM with the retrieved chunks
    answer = generate_answer_with_gemini(query, retrieved_chunks)

    # Display the final answer to the user
    print("\nEMPATH-AI's Answer:")
    print(answer)

# Run the main function
if __name__ == "__main__":
    main()

Retrived Chunks: ['stomach. I wasn’t sick — I didn’t have a fever or something wrong. I don’t\nknow if it’s related, but my boyfriend has this girl, this girlfriend that he\ngoes down to see, and this particular weekend I didn’t want him to go\ndown — I hadn’t seen him for a long time because of the vacation. What’s\nreally disgusting is that next weekend she’s coming up here! That about\nkicked me over the edge. Could it be that the pains in my stomach come\nfrom that, that I’m more afraid of losing him than I think?”', 'a very small step; the “You don’t show it” and “You show it to that baby” are\ncombined.]\nClient: You show it to that baby and not to me! You show it to that baby and not to\nme!!! [increasing anger] IT’S ALL GONE!!\nTherapist: Pound the pillow! [a direction] Say, “ You took your love away and gave it\nto that baby! Say that!; [This prompt amplified the feeling “It’s all gone” and\n“You show it to that baby.”]\nClient: You took your love away and gave it to that baby