In [8]:
! pip install boto3 
! pip install pinecone-client 
! pip install openai 
! pip install numpy 
! pip install langchain 
! pip install python-dotenv 
! pip install langchain-openai 
! pip install langchain-core




In [11]:
import os
import boto3
from pinecone import Pinecone
import openai
import re
import numpy as np
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import Pinecone as PineconeVectorStore
from dotenv import load_dotenv

load_dotenv(verbose=True,override=True)



True

In [12]:
def extract_relevant_symptoms(text):
    print("Extracting relevant symptoms from the text...")
    relevant_symptoms = []
    symptom_pattern = r"Possible symptoms to consider:\s*([^.]+)"
    match = re.search(symptom_pattern, text, re.IGNORECASE)
    if match:
        symptoms = [symptom.strip() for symptom in match.group(1).split(",")]
        relevant_symptoms = symptoms
        print(f"Extracted {len(relevant_symptoms)} relevant symptoms.")
        return relevant_symptoms
    else:
        # Try a different pattern if the first one doesn't match
        symptom_pattern = r"Symptoms:\s*([^.]+)"
        match = re.search(symptom_pattern, text, re.IGNORECASE)
        if match:
            symptoms = [symptom.strip() for symptom in match.group(1).split(",")]
            relevant_symptoms = symptoms
            print(f"Extracted {len(relevant_symptoms)} relevant symptoms.")
            return relevant_symptoms
        else:
            # Try a different pattern for the provided text
            symptom_pattern = r"Possible symptoms to consider:\n([^.]+)"
            match = re.search(symptom_pattern, text, re.IGNORECASE)
            if match:
                symptoms = [symptom.strip() for symptom in match.group(1).split("\n")]
                relevant_symptoms = symptoms
                print(f"Extracted {len(relevant_symptoms)} relevant symptoms.")
                return relevant_symptoms
            else:
                print("No relevant symptoms found in the text.")
                return []

In [13]:
def generate_diagnosis(descriptive_texts):
     openai.api_key = os.getenv("OPENAI_API_KEY")
     combined_text = " ".join(descriptive_texts)
     response = openai.chat.completions.create(
          model="gpt-3.5-turbo-0125",
          messages=[
               {"role": "system", "content": "You are a helpful medical assistant."},
               {"role": "user", "content": f"Based on the symptoms '{combined_text}', what are the top 5 possible diseases, a small description, their symptoms, home remedies if any, medicines if any and ways to cure them?"}
          ],
          max_tokens=1024,
          n=1,
          stop=None,
          temperature=0.9,
     )
     return response.choices[0].message.content # type: ignore


In [16]:
#RAG
def app():
    print("Medication")

    # Load the summary from S3
    print("Loading summary from S3...")
    s3 = boto3.client('s3',
                      aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
                      aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"))
    bucket_name = os.getenv("S3_BUCKET_NAME")
    file_name = "Emily Garcia.txt"
    try:
        response = s3.get_object(Bucket=bucket_name, Key=file_name)
        summary = response['Body'].read().decode('utf-8')
        print("Summary loaded successfully.")
    except:
        #st.error("Error loading summary from S3")
        print("Error loading summary from S3.")
        return

    print("Connecting to Pinecone index...")
    pc = Pinecone(api_key=os.getenv('pinecone_api_key'))
    index_name = "perclias"
    index = pc.Index(name=index_name)
    print("Connected to Pinecone index.")

    # Extract symptoms from the summary
    print("Extracting symptoms from the summary...")
    symptoms = extract_relevant_symptoms(summary)
    print(f"Extracted {len(symptoms)} relevant symptoms: {symptoms}")
    processed_symptoms = [symptom.lower().strip() for symptom in symptoms]
    print(f"\nProcessed symptoms: {processed_symptoms}")

    # Vectorize the symptoms using LangChain
    print("Vectorizing symptoms using LangChain...")
    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
    symptom_vectors = embeddings.embed_documents(processed_symptoms)
    #print(symptom_vectors)

    # Club all the symptom vectors together
    print("Clubbing symptom vectors together...")
    combined_symptom_vector = np.mean(symptom_vectors, axis=0).tolist()

    #print(combined_symptom_vector)
    #print(combined_symptom_vector_1)

    # Query the Pinecone index for medication and remedies
    if symptoms:
        #print("Recommended Medications, Remedies, and Diseases:")
        try:
            query_result = index.query(
                vector=combined_symptom_vector,
                top_k=7,
                include_metadata=True
            )
            print(query_result)

            if query_result["matches"]:
                print("\nRecommended IDs based on Pinecone results:")
                for match in query_result["matches"]:
                    id = match['id']
                    score = match['score']
                    print(f"- ID: {id} (Score: {score})")
            else:
                print("No matches found in Pinecone.")
            
            descriptive_texts = [match['id'] for match in query_result['matches']]
            print(f"\nRetrieved descriptions from Pinecone: {descriptive_texts}")

            if descriptive_texts:
                print("Generating diagnosis report...")
                diagnosis_report = generate_diagnosis(descriptive_texts)
                print(f"\nDiagnosis Report: {diagnosis_report}")
            else:
                print("\nNo relevant data found in Pinecone.")

            print("Query completed.")

        except Exception as e:
            print(f"Error processing symptoms: {e}")
        
    else:
        print("Possible Diseases Based on OpenAI Analysis:")
        print("No symptoms found in the summary, querying OpenAI for possible diseases...")
        openai.api_key = os.getenv("OPENAI_API_KEY")
        response = openai.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful medical assistant."},
               {"role": "user", "content": f"Based on the provided summary, what are the top 5 possible diseases, a small description, their symptoms, home remedies if any, medicines if any and ways to cure them?"}
            ],
            max_tokens=1024,
            n=1,
            stop=None,
            temperature=0.7,
        )
        print("OpenAI query completed.")
        disease_info = response.choices[0].message.content # type: ignore
        print(disease_info)

if __name__ == "__main__":
    app()


Medication
Loading summary from S3...
Summary loaded successfully.
Connecting to Pinecone index...
Connected to Pinecone index.
Extracting symptoms from the summary...
Extracting relevant symptoms from the text...
Extracted 1 relevant symptoms.
Extracted 1 relevant symptoms: ['- Severe abdominal pain\n- Diarrhea\n- Elevated calprotectin levels\n- Inflammation']

Processed symptoms: ['- severe abdominal pain\n- diarrhea\n- elevated calprotectin levels\n- inflammation']
Vectorizing symptoms using LangChain...
Clubbing symptom vectors together...
{'matches': [{'id': 'albumin levelHigh erythrocyte sedimentation rateElevated '
                    'C-reactive protein (CRP)Fecal fatLow blood count '
                    '(hemoglobin and hematocrit)Abnormal liver blood testsHigh '
                    'white blood cell count Elevated fecal calprotectin level '
                    'in stoolTreatment',
              'score': 0.613925755,
              'values': []},
             {'id': 'Symptoms i