In [None]:
# Install required packages
%pip install -U langchain-community PyPDF2 pinecone-client sentence-transformers

In [None]:
%pip install openai

In [None]:
!openai migrate

In [None]:
!pip install google-generativeai

In [12]:
import os
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from pinecone import Pinecone, ServerlessSpec

# Initialize Pinecone with your API key
pc = Pinecone(
    api_key="pcsk_327fZT_9n2i2FHdK1rXvfFN2RK2i4Z7edicDFE7sSFmyE1bVuGgjjZfAUXUtncrqyUk1Am"  # Replace with your Pinecone API key
)

# Define the index name
index_name = "formulae-index"

# Check if the index exists; if not, create it
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=768,  # Ensure this matches your embedding model output dimension
        metric="cosine",  # Metric for similarity search
        spec=ServerlessSpec(cloud="aws", region="us-east-1")  # Adjust region based on your setup
    )

# Connect to the index
index = pc.Index(index_name)

# Function to extract text from a PDF
def extract_text_from_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text

# Load and process the PDF
pdf_path = "medicalformulae.pdf"  # Replace with your PDF file path
pdf_text = extract_text_from_pdf(pdf_path)

# Split the text into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=10)
docs = text_splitter.create_documents([pdf_text])

# Embedding model
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")

# Generate embeddings and upsert into Pinecone
vectors = []
for i, doc in enumerate(docs):
    embedding = embeddings.embed_query(doc.page_content)
    vectors.append({
        "id": f"doc-{i}",
        "values": embedding,
        "metadata": {"content": doc.page_content}
    })

index.upsert(vectors)
print("Training complete! PDF data has been added to Pinecone.")


Training complete! PDF data has been added to Pinecone.


In [15]:
import os
from langchain_community.embeddings import HuggingFaceEmbeddings
from pinecone import Pinecone

# Initialize Pinecone
pc = Pinecone(
    api_key="pcsk_327fZT_9n2i2FHdK1rXvfFN2RK2i4Z7edicDFE7sSFmyE1bVuGgjjZfAUXUtncrqyUk1Am"  # Replace with your Pinecone API key
)

# Connect to the existing index
index_name = "formulae-index"  # Ensure this matches the trained index
index = pc.Index(index_name)

# Embedding model
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")

# Inference loop
print("Ask your questions (type 'exit' to quit):")
while True:
    # Take user input
    user_query = input("Your question: ").strip()
    
    # Exit condition
    if user_query.lower() == "exit":
        print("Exiting. Goodbye!")
        break

    # Generate embedding for the query
    query_embedding = embeddings.embed_query(user_query)
    
    # Query the Pinecone index
    response = index.query(vector=query_embedding, top_k=4, include_metadata=True)
    
    # Check if matches are found
    if not response.matches:
        print("No relevant information found.")
    else:
        print("Relevant Information:")
        for match in response.matches:
            print(match['metadata']['content'])
            print("-" * 50)


Ask your questions (type 'exit' to quit):
Relevant Information:
\] 
Where:  
\begin{itemize}  
    \item \( \text{DBP} \) = Diastolic Blood Pressure  
    \item \( \text{SBP} \) = Systolic Blood Pressure  
\end{itemize}  
 
\subsection*{3. Cardiac Output (CO)}
--------------------------------------------------
\[ 
\text{BMI} = \frac{ \text{Weight (kg)}}{ \text{Height (m)}^2}  
\] 
 
\subsection*{2. Mean Arterial Pressure (MAP)}  
\[ 
\text{MAP} = \text{DBP} + \frac{ \text{SBP} - \text{DBP}}{3}  
\]
--------------------------------------------------
\[ 
\text{CO} = \text{SV} \times \text{HR}  
\] 
Where:  
\begin{itemize}  
    \item \( \text{SV} \) = Stroke Volume  
    \item \( \text{HR} \) = Heart Rate  
\end{itemize}
--------------------------------------------------
\item \( \text{Patm} \) = Atmospheric Pressure  
    \item \( \text{PH}_2 \text{O} \) = Water Vapor Pressure  
    \item \( \text{PaCO}_2 \) = Partial Pr essure of Arterial CO \(_2\)
------------------------------------

    api_key="pcsk_327fZT_9n2i2FHdK1rXvfFN2RK2i4Z7edicDFE7sSFmyE1bVuGgjjZfAUXUtncrqyUk1Am"  # Replace with your Pinecone API key


In [18]:
import os
from langchain_community.embeddings import HuggingFaceEmbeddings
from pinecone import Pinecone
import google.generativeai as genai

# Initialize Pinecone
pc = Pinecone(
    api_key="pcsk_327fZT_9n2i2FHdK1rXvfFN2RK2i4Z7edicDFE7sSFmyE1bVuGgjjZfAUXUtncrqyUk1Am"  # Replace with your Pinecone API key
)

# Connect to the existing index
index_name = "formulae-index"  # Ensure this matches the trained index
index = pc.Index(index_name)

# Embedding model
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")

# Configure Google Gemini API
API_KEY = "AIzaSyAZsRTDVpsPRhHpezqaIc2ZvanreBv0Xx4"
genai.configure(api_key=API_KEY)

# Initialize Gemini Model
gemini_model = genai.GenerativeModel("gemini-1.5-flash")

# Function to query Google Gemini
def query_gemini(query, relevant_info):
    try:
        chat = gemini_model.start_chat()
        prompt = f"""
        You are an AI system answering queries based on a data store. 
        Here is the data you should use:
        {relevant_info}

        Here is the user's query:
        {query}

        Now you are supposed to take this data into context and write SQL queries if asked to do so, if not asked, reply without sql queries
        """
        response = chat.send_message(prompt)
        return response.text
    except Exception as e:
        print(f"Error querying Gemini: {e}")
        return None

# Inference loop
print("Ask your questions (type 'exit' to quit):")
while True:
    # Take user input
    user_query = input("Your question: ").strip()
    
    # Exit condition
    if user_query.lower() == "exit":
        print("Exiting. Goodbye!")
        break

    # Generate embedding for the query
    query_embedding = embeddings.embed_query(user_query)
    
    # Query the Pinecone index
    response = index.query(vector=query_embedding, top_k=4, include_metadata=True)
    
    # Check if matches are found
    if not response.matches:
        print("No relevant information found.")
    else:
        # Combine relevant information
        relevant_info = "\n".join([match['metadata']['content'] for match in response.matches])
        print("Relevant Information:")
        #print(relevant_info)
        print("-" * 50)

        # Query Gemini with the relevant info
        gemini_response = query_gemini(user_query, relevant_info)
        if gemini_response:
            print("Gemini Response:")
            print(gemini_response)
            print("-" * 50)


Ask your questions (type 'exit' to quit):


Relevant Information:
--------------------------------------------------
Gemini Response:
The Cockcroft-Gault formula is used to estimate creatinine clearance (CrCl).  For males, the formula is:

CrCl = [(140 - Age) × Weight (kg)] / [Serum Creatinine (mg/dL) × 72]

--------------------------------------------------
Relevant Information:
--------------------------------------------------
Gemini Response:
```sql
-- Assuming a table named 'patient_data' with the following columns:
-- Age (INT), Weight_kg (FLOAT), Serum_Creatinine_mg_dL (FLOAT), Sex (VARCHAR), Race (VARCHAR)

SELECT
    Age,
    Weight_kg,
    Serum_Creatinine_mg_dL,
    CASE
        WHEN Sex = 'Male' THEN (140 - Age) * Weight_kg / (Serum_Creatinine_mg_dL * 72)
        ELSE (140 - Age) * Weight_kg / (Serum_Creatinine_mg_dL * 72) * 0.85  -- Assuming 85% of male value for females.  A more accurate calculation would require a separate formula for females as shown in the provided text.
    END as Cockcroft_Gault_CrCl
FROM
    