In [1]:
import gradio as gr
import os
import time
from crewai import Agent, Task, Crew, Process
from crewai_tools import ScrapeWebsiteTool, SerperDevTool
from crewai_tools import tool
from datetime import datetime

#from docx import Document
from io import BytesIO
import base64

import numpy as np
import pandas as pd
from IPython.display import Markdown, display

from dotenv import load_dotenv
#load_dotenv("./.env")
load_dotenv("/home/jovyan/Albert/.env")

os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")
os.environ["SLIP_AUTH_URL"] = os.environ.get("SLIP_AUTH_URL")
os.environ["SLIP_USERNAME"] = os.environ.get("SLIP_USERNAME")
os.environ["SLIP_PASSWORD"] =  os.environ.get("SLIP_PASSWORD")
os.environ["SLIP_CLIENT_ID"]= os.environ.get("SLIP_CLIENT_ID")
os.environ["SLIP_APP_NAME"]=os.environ.get("SLIP_APP_NAME")
os.environ["SERPER_API_KEY"] = "25d43901c968b0f8830387b7ed72231673428b9c"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from llm_utils import get_llm
llm = get_llm('gpt-4o-0806',temperature=0)

In [3]:
df=pd.read_csv('/home/jovyan/Roja/ds-ihan/v2/LLM_Agents/data/data_prep/LLM_data.csv')

In [4]:
df.head()

Unnamed: 0,id,dos,codeindex,age,gender,type,value,code,cd_desc,abnormal_cd
0,1,2019-08-08,80380,73,Female,lab,,5821-4,WBC #/area UrnS HPF,A
1,1,2019-06-04,73878,73,Female,lab,3.699999,1751-7,Albumin SerPl-mCnc,UNK
2,1,2019-08-27,84709,73,Female,gpi,,34000003,Amlodipine,
3,1,2019-06-04,82967,73,Female,lab,282.000158,777-3,Platelet # Bld Auto,UNK
4,1,2019-08-08,75247,73,Female,lab,,2349-9,Glucose Ur Ql,UNK


In [5]:
df.shape

(223, 10)

In [6]:
#df=df[~df['type'].isin(['EMR_Note'])]

In [7]:
df['value'] = pd.to_numeric(df['value'], errors='coerce')
df['value']=np.round(df['value'],2)

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 223 entries, 0 to 222
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   id           223 non-null    int64  
 1   dos          223 non-null    object 
 2   codeindex    223 non-null    int64  
 3   age          223 non-null    int64  
 4   gender       223 non-null    object 
 5   type         223 non-null    object 
 6   value        128 non-null    float64
 7   code         223 non-null    object 
 8   cd_desc      223 non-null    object 
 9   abnormal_cd  144 non-null    object 
dtypes: float64(1), int64(3), object(6)
memory usage: 17.6+ KB


In [9]:
df.columns = df.columns.str.strip()

In [10]:
search_tool = SerperDevTool()
scrape_tool = ScrapeWebsiteTool()

In [11]:
# Define the new basic member information agent
basic_info_agent = Agent(
    role="Basic Member Information Provider",
    goal="Display the member ID, age, and gender for the specified member",
    backstory="You are a data retrieval specialist responsible for quickly and accurately providing basic member information.",
    verbose=True,
    allow_delegation=False,
    llm=llm  
)

In [12]:

diagnosis_agent = Agent(
    role="Diagnosis Analyst",
    goal="Provide a comprehensive analysis of the patient's diagnostic history",
    backstory="You are an expert in medical diagnostics with years of experience in analyzing patient histories.",
    verbose=True,
    allow_delegation=False,
    llm=llm  
)

procedure_agent = Agent(
    role="Procedure Analyst",
    goal="Analyze and summarize all medical procedures the patient has undergone",
    backstory="You are a medical procedure specialist with deep knowledge of various medical interventions.",
    verbose=True,
    allow_delegation=False,
    llm=llm
)

medication_agent = Agent(
    role="Medication Analyst",
    goal="Provide a detailed overview of the patient's medication history",
    backstory="You are a clinical pharmacologist with expertise in analyzing medication regimens and their effects.",
    verbose=True,
    allow_delegation=False,
    llm=llm
)

lab_test_agent = Agent(
    role="Lab Test Analyst",
    goal="Analyze and interpret all laboratory tests the patient has undergone",
    backstory="You are an expert in analyzing laboratory test codes and results. Your summaries are renowned for their depth, clarity, and insightful interpretation of lab test patterns and their clinical significance.",
    verbose=True,
    allow_delegation=False,
    llm=llm
)

In [13]:
# Define the prompt for basic member information retrieval
BASIC_INFO_PROMPT = """
Retrieve and display the following information for the member:
1. Member ID: {member_id}
2. Age: {age}
3. Gender: {gender}


Example :
Member Information

Member ID: 10
Age: 35
Gender: Male

Format the output like the top in markdown format.

Note: Display "Female" if the value of gender is 0, "Male" if the value of gender is 1, and "Unknown" for any other value.
"""

In [14]:
DIAG_SUMMARY_PROMPT = """
Task Description
Your task is to summarize the following 'Patient Diagnosis Code History' and nothing else. The summary should be thorough, capturing ALL diagnoses present in the data.

Input Data
Patient Diagnosis Code History:
{diagnosis_history}

Output Format
Generate the summary in the following markdown format:

Diagnosis Summary:

Diagnosis Category 1:
- Diagnosis 2 (code2) in [dos1, dos2, dos3]
- Diagnosis 1 (code1) in [dos1, dos2, dos3, dos4, dos5]

Diagnosis Category 2:
- Diagnosis 6 (code6) in [dos1, dos2, dos3]
- Diagnosis 4 (code4) in [dos1]

Diagnosis Category 3: 
- Diagnosis 3 (code3) in [dos1, dos2, dos3, dos4, dos5]
- Diagnosis 5 (code5) in [dos1, dos2, dos3, dos4, dos5, dos6]

Rules and Guidelines

1.Only include diagnoses and dates explicitly present in the provided input data.
2.Do not infer or add any information not directly stated in the input data.
3.Each diagnosis should appear only once, with all associated dates listed chronologically.
4.Prioritize more serious and then recent disease categories to the top.
5.Within each category, prioritize more serious and recent diseases to the top.
6.If a disease does not fit into a designated category, list it as a stand-alone item.
7.Ensure ALL diagnoses from the input data are included in the summary without missing any..

Special Case
If there are no diagnoses in the patient's history, output only:
"There are no diagnoses observed in the patient's history."

Diagnosis Summary:
""".strip()

In [15]:
PROCEDURE_PROMPT = """

Task Description
Your task is to summarize the following 'Patient Procedure Code History' and nothing else. The summary should capture ALL procedures in the data.

Input Data
Patient Procedure Code History:
{procedure_history}

Output Format
Generate the summary in the following markdown format:

Procedure Summary:

Procedure Category 1:
- Proc 2 (code2) in [dos1, dos2, dos3]
- Proc 1 (code1) in [dos1, dos2, dos3, dos4, dos5]

Procedure Category 2:
- Proc 6 (code6) in [dos1, dos2, dos3, dos4, dos5]
- Proc 4 (code4) in [dos1, dos2, dos3]

Procedure Category 3: 
- Proc 3 (code3) in [dos1, dos2, dos3]
- Proc 5 (code5) in [dos1, dos2]

Rules and Guidelines

1.Only include procedures and dates explicitly present in the provided input data.
2.Do not infer or add any information not directly stated in the input data.
3.Each procedure should appear only once, with all associated dates listed chronologically.
4.Prioritize more serious and then recent procedure categories to the top.
5.Within each category, prioritize more serious and recent procedures to the top.
6.If a procedure does not fit into a designated category, list it as a stand-alone item.
7.Ensure ALL procedures from the input data are included in the summary without missing any.

Special Case
If there are no procedures in the patient's history, output only:
"There were no procedures performed in the patient's history."

Procedure Summary:
""".strip()


In [16]:
MEDICATION_PROMPT = """
Task Description
Your task is to summarize the following 'Patient RX Code History' and nothing else. The summary should account for ALL medications in the data.

Input Data
Patient RX Code History:
{medication_history}

Output Format
Generate the summary in the following markdown format:

Prescription Medication Summary:

Medication Category 1:
- Rx 2 (code2) in [dos1, dos2, dos3]
- Rx 1 (code1) in [dos1, dos2, dos3, dos4]

Medication Category 2:
- Rx 6 (code6) in [dos1, dos2, dos3, dos4, dos5]
- Rx 4 (code4) in [dos1, dos2, dos3]

Medication Category 3: 
- Rx 3 (code3) in [dos1, dos2, dos3, dos4, dos5]
- Rx 5 (code5) in [dos1, dos2, dos3, dos4, dos5, dos6]

Rules and Guidelines

1.Categorize medications based on type of medication
2.Only include medications and dates explicitly present in the provided input data.
3.Do not infer or add any information not directly stated in the input data.
4.Each medication should appear only once, with all associated dates listed chronologically.
5.Prioritize more serious and then recent medication categories to the top.
6.Within each category, prioritize more serious and recent medications to the top.
7.If a medication does not fit into a designated category, list it as a stand-alone item.
8.Ensure ALL medications from the input data are included in the summary without missing any.

Special Case
If there are no medications in the patient's history, output only:
"There were no medications prescribed in the patient's history."


Prescription Medication Summary :
""".strip()


In [17]:
LAB_TEST_PROMPT ="""
Task Description
Your task is to summarize the following 'Patient Lab Code History' and nothing else. The summary should capture ALL lab tests in the data. For each test, you must display the exact abnormal code value from the input data.

Input Data
Patient Lab Code History:
{lab_test_history}

Output Format
Generate the summary in the following markdown format:
Lab Summary:

Lab Category 1:
- Complete Lab Test Name (code) : [date1:value1 abnormal_code_value, date2:value2 abnormal_code_value]

Lab Category 2:
- Complete Lab Test Name (code) : [date1:value1 abnormal_code_value, date2:value2 abnormal_code_value]

Rules and Guidelines
- Categorize lab tests into appropriate groups based on test type
- Include ONLY lab tests explicitly present in the input data
- Do not infer or add any information not directly stated
- If a value is missing, display nothing for value
- Use the exact abnormal_code value from input data (do not transform/map to other strings)
- Display all dates and results for each lab test in chronological order within square brackets
- Separate multiple date-value pairs with commas
- Each lab test should appear only once, consolidating all dates and results
- Prioritize more serious and then recent lab categories to the top
- Within each category, prioritize more serious and recent lab tests to the top
- List tests without a category as stand-alone items
- ALL lab tests from input data must be included
- No lab test should be omitted
- No duplicate entries allowed

Special Case
If there are no lab tests in the patient's history, output only:
"There were no lab tests conducted in the patient's history."

Lab Summary:
""".strip()

In [18]:
# Tasks for each agent
# Create a task for basic member information retrieval
basic_info_task = Task(
    description=BASIC_INFO_PROMPT,
    agent=basic_info_agent,
    expected_output="A concise display of the member's ID, age, and gender."
)

diagnosis_task = Task(
    description=DIAG_SUMMARY_PROMPT,
    agent=diagnosis_agent,
    expected_output="A comprehensive analysis of the patient's diagnostic history, or a statement indicating no diagnoses if applicable"
)

procedure_task = Task(
    description=PROCEDURE_PROMPT,
    agent=procedure_agent,
    expected_output="A detailed summary of all medical procedures the patient has undergone, or a statement indicating no procedures if applicable"
)

medication_task = Task(
    description=MEDICATION_PROMPT,
    agent=medication_agent,
    expected_output="A comprehensive summary of the patient's medication history, or a statement indicating no medications if applicable"
)

lab_test_task = Task(
    description=LAB_TEST_PROMPT,
    agent=lab_test_agent,
    expected_output="A detailed analysis of all lab tests with results the patient has undergone, or a statement indicating no lab tests if applicable"
)


In [19]:
def process_member_data(member_id):
    df_member = df[df['id'] == member_id]
    
    if df_member.empty:
        return None, None, None, None
    
     #extract basic information 
    # df_age_gender=df_member[(df_member['type']=='age') | (df_member['type']=='gender')].reset_index(drop=True)
    # age=df_age_gender.loc[0,'value']
    # gender=df_age_gender.loc[1,'value']
    
    age=df_member['age'][0]
    gender=df_member['gender'][0]
    
    df_diag = df_member[df_member['type'] == 'diag']
    df_diag = df_diag[['id', 'dos', 'code', 'cd_desc', 'type']]
    df_proc = df_member[df_member['type'] == 'proc']
    df_proc = df_proc[['id', 'dos', 'code', 'cd_desc', 'type']]
    df_rx = df_member[df_member['type'] == 'gpi']
    df_rx = df_rx[['id', 'dos', 'code', 'cd_desc', 'type']]
    df_lab = df_member[df_member['type'] == 'lab']
    df_lab=df_lab[['id','dos','code','value','abnormal_cd','cd_desc','type']]
    
    basic_info_crew = Crew(agents=[basic_info_agent], tasks=[basic_info_task], verbose=True)
    basic_info_result = basic_info_crew.kickoff({"member_id": member_id,"age":age,"gender":gender})
    
    diagnosis_crew = Crew(agents=[diagnosis_agent], tasks=[diagnosis_task], verbose=True)
    diag_result = diagnosis_crew.kickoff({"diagnosis_history": df_diag.to_string()})
    
    procedure_crew = Crew(agents=[procedure_agent], tasks=[procedure_task], verbose=True)
    proc_result = procedure_crew.kickoff({"procedure_history": df_proc.to_string()})
    
    medication_crew = Crew(agents=[medication_agent], tasks=[medication_task], verbose=True)
    med_result = medication_crew.kickoff({"medication_history": df_rx.to_string()})
    
    lab_test_crew = Crew(agents=[lab_test_agent], tasks=[lab_test_task], verbose=True)
    lab_result = lab_test_crew.kickoff({"lab_test_history": df_lab.to_string()})

    
    return basic_info_result,diag_result, proc_result, med_result, lab_result

In [22]:
df_member = df[df['id'] == 1]
df_proc = df_member[df_member['type'] == 'proc']
df_proc = df_proc[['id', 'dos', 'code', 'cd_desc', 'type']]

In [23]:
 procedure_crew = Crew(agents=[procedure_agent], tasks=[procedure_task], verbose=True)



In [32]:
n_iterations = 2
inputs = {
    "procedure_history": df_proc.to_string(),
    "topic": "Procedure summary"
}
filename = "proc_trained.pkl"

# Train the crew
try:
    procedure_crew.train(
        n_iterations=n_iterations,
        inputs=inputs
    )
except Exception as e:
    print(f"Training failed: {str(e)}")

[1m[95m [2024-11-25 12:44:02][DEBUG]: == Working Agent: Procedure Analyst[00m
[1m[95m [2024-11-25 12:44:02][INFO]: == Starting Task: Task Description
Your task is to summarize the following 'Patient Procedure Code History' and nothing else. The summary should capture ALL procedures in the data.

Input Data
Patient Procedure Code History:
     id         dos   code                              cd_desc  type
19    1  2019-05-29  G8427   ELIG CLIN DOC M UPDTD REC PT  MEDS  proc
20    1  2019-05-29  1159F                MED LIST DOCD IN RCRD  proc
34    1  2019-07-23  G8427   ELIG CLIN DOC M UPDTD REC PT  MEDS  proc
38    1  2019-07-23  G8397  DILATED MACULAR/FUNDUS EXAM PERFORM  proc
49    1  2019-05-29  1160F            RVW MEDS BY RX/DR IN RCRD  proc
56    1  2019-05-29  1170F                 FXNL STATUS ASSESSED  proc
73    1  2019-07-23  G9906  PT ID TOBAC USER RECV TOB CESS INTV  proc
81    1  2019-07-23  1111F         DSCHRG MED/CURRENT MED MERGE  proc
84    1  2019-07-23  9213

This is the agent's final answer: Procedure Summary:

Ophthalmic Procedures:
- DILATED MACULAR/FUNDUS EXAM PERFORM (G8397) in [2019-07-23]
- OPHTHALMIC BIOMETRY (92136) in [2019-07-23]
- DILAT RTA XM EVC RTNOPTHY (2022F) in [2019-07-23]
- XCAPSL CTRC RMVL W/O ECP (66984) in [2019-12-04]

Medication Management:
- ELIG CLIN DOC M UPDTD REC PT MEDS (G8427) in [2019-05-29, 2019-07-23, 2019-06-06, 2019-07-02]
- MED LIST DOCD IN RCRD (1159F) in [2019-05-29]
- RVW MEDS BY RX/DR IN RCRD (1160F) in [2019-05-29, 2019-07-23]
- DSCHRG MED/CURRENT MED MERGE (1111F) in [2019-07-23]

Tobacco Use Interventions:
- PT ID TOBAC USER RECV TOB CESS INTV (G9906) in [2019-07-23]
- PT SCR TOB USE & ID AS TOB USER (G9902) in [2019-07-23]

Office Visits:
- OFFICE O/P EST LOW 20-29 MIN (99213) in [2019-06-06, 2019-07-02]
- OFFICE O/P NEW LOW 30-44 MIN (99203) in [2019-07-23]

Functional and Pain Assessments:
- FXNL STATUS ASSESSED (1170F) in [2019-05-29]
- AMNT PAIN NOTED NONE PRSNT (1126F) in [2019-05-29]

Adva

[32;1m[1;3mThought: I now can give a great answer  
Final Answer: 

Procedure Summary:

Ophthalmic Procedures:
1. DILATED MACULAR/FUNDUS EXAM PERFORM (G8397) in [2019-07-23]
2. OPHTHALMIC BIOMETRY (92136) in [2019-07-23]
3. DILAT RTA XM EVC RTNOPTHY (2022F) in [2019-07-23]
4. XCAPSL CTRC RMVL W/O ECP (66984) in [2019-12-04]

Medication Management:
1. ELIG CLIN DOC M UPDTD REC PT MEDS (G8427) in [2019-05-29, 2019-07-23, 2019-06-06, 2019-07-02]
2. MED LIST DOCD IN RCRD (1159F) in [2019-05-29]
3. RVW MEDS BY RX/DR IN RCRD (1160F) in [2019-05-29, 2019-07-23]
4. DSCHRG MED/CURRENT MED MERGE (1111F) in [2019-07-23]

Tobacco Use Interventions:
1. PT ID TOBAC USER RECV TOB CESS INTV (G9906) in [2019-07-23]
2. PT SCR TOB USE & ID AS TOB USER (G9902) in [2019-07-23]

Office Visits:
1. OFFICE O/P EST LOW 20-29 MIN (99213) in [2019-06-06, 2019-07-02]
2. OFFICE O/P NEW LOW 30-44 MIN (99203) in [2019-07-23]

Functional and Pain Assessments:
1. FXNL STATUS ASSESSED (1170F) in [2019-05-29]
2. AMNT P

This is the agent's final answer: Procedure Summary:

Procedure Category 1:  
1. XCAPSL CTRC RMVL W/O ECP (66984) in [2019-12-04]  
2. DILAT RTA XM EVC RTNOPTHY (2022F) in [2019-07-23]  
3. DILATED MACULAR/FUNDUS EXAM PERFORM (G8397) in [2019-07-23]  
4. MACUL RESULT PHY/QHP MNG DM (5010F) in [2019-07-23]  
5. OPHTHALMIC BIOMETRY (92136) in [2019-07-23]  

Procedure Category 2:  
1. OFFICE O/P NEW LOW 30-44 MIN (99203) in [2019-07-23]  
2. OFFICE O/P EST LOW 20-29 MIN (99213) in [2019-06-06, 2019-07-02]  

Procedure Category 3:  
1. INIT PM E/M NEW PAT 65+ YRS (99387) in [2019-05-29]  

Procedure Category 4:  
1. ELIG CLIN DOC M UPDTD REC PT MEDS (G8427) in [2019-05-29, 2019-06-06, 2019-07-02, 2019-07-23]  
2. MED LIST DOCD IN RCRD (1159F) in [2019-05-29]  
3. RVW MEDS BY RX/DR IN RCRD (1160F) in [2019-05-29, 2019-07-23]  
4. DSCHRG MED/CURRENT MED MERGE (1111F) in [2019-07-23]  

Procedure Category 5:  
1. PT SCR TOB USE & ID AS TOB USER (G9902) in [2019-07-23]  
2. PT ID TOBAC USER R

[32;1m[1;3mThought: I now can give a great answer  
Final Answer: 

Procedure Summary:

Procedure Category 1:  
1. XCAPSL CTRC RMVL W/O ECP (66984) in [2019-12-04]  
2. DILAT RTA XM EVC RTNOPTHY (2022F) in [2019-07-23]  
3. DILATED MACULAR/FUNDUS EXAM PERFORM (G8397) in [2019-07-23]  
4. MACUL RESULT PHY/QHP MNG DM (5010F) in [2019-07-23]  
5. OPHTHALMIC BIOMETRY (92136) in [2019-07-23]  

Procedure Category 2:  
1. OFFICE O/P NEW LOW 30-44 MIN (99203) in [2019-07-23]  
2. OFFICE O/P EST LOW 20-29 MIN (99213) in [2019-06-06, 2019-07-02]  

Procedure Category 3:  
1. INIT PM E/M NEW PAT 65+ YRS (99387) in [2019-05-29]  

Procedure Category 4:  
1. ELIG CLIN DOC M UPDTD REC PT MEDS (G8427) in [2019-05-29, 2019-06-06, 2019-07-02, 2019-07-23]  
2. MED LIST DOCD IN RCRD (1159F) in [2019-05-29]  
3. RVW MEDS BY RX/DR IN RCRD (1160F) in [2019-05-29, 2019-07-23]  
4. DSCHRG MED/CURRENT MED MERGE (1111F) in [2019-07-23]  

Procedure Category 5:  
1. PT SCR TOB USE & ID AS TOB USER (G9902) in 

Returning anything other than `self` from a top level model validator isn't supported when validating via `__init__`.
See the `model_validator` docs (https://docs.pydantic.dev/latest/concepts/validators/#model-validators) for more details.


In [34]:
n_iterations = 2
inputs = {
    "procedure_history": df_proc.to_string(),
    "topic": "Procedure summary"
}
filename = "proc_trained.pkl"

# Train the crew
try:
    procedure_crew.train(
        n_iterations=n_iterations,
        inputs=inputs
    )
except Exception as e:
    print(f"Training failed: {str(e)}")

[1m[95m [2024-11-25 14:30:35][DEBUG]: == Working Agent: Procedure Analyst[00m
[1m[95m [2024-11-25 14:30:35][INFO]: == Starting Task: Task Description
Your task is to summarize the following 'Patient Procedure Code History' and nothing else. The summary should capture ALL procedures in the data.

Input Data
Patient Procedure Code History:
     id         dos   code                              cd_desc  type
19    1  2019-05-29  G8427   ELIG CLIN DOC M UPDTD REC PT  MEDS  proc
20    1  2019-05-29  1159F                MED LIST DOCD IN RCRD  proc
34    1  2019-07-23  G8427   ELIG CLIN DOC M UPDTD REC PT  MEDS  proc
38    1  2019-07-23  G8397  DILATED MACULAR/FUNDUS EXAM PERFORM  proc
49    1  2019-05-29  1160F            RVW MEDS BY RX/DR IN RCRD  proc
56    1  2019-05-29  1170F                 FXNL STATUS ASSESSED  proc
73    1  2019-07-23  G9906  PT ID TOBAC USER RECV TOB CESS INTV  proc
81    1  2019-07-23  1111F         DSCHRG MED/CURRENT MED MERGE  proc
84    1  2019-07-23  9213

KeyboardInterrupt: Interrupted by user