In [1]:
import streamlit as st
import os
import time
from crewai import Agent, Task, Crew, Process
from crewai_tools import ScrapeWebsiteTool, SerperDevTool
from crewai_tools import tool

from docx import Document
from io import BytesIO
import base64

import pandas as pd
from IPython.display import Markdown, display

from dotenv import load_dotenv
#load_dotenv("./.env")
load_dotenv("/home/jovyan/.env")

os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")
os.environ["SLIP_AUTH_URL"] = os.environ.get("SLIP_AUTH_URL")
os.environ["SLIP_USERNAME"] =os.environ.get("SLIP_USERNAME")
os.environ["SLIP_PASSWORD"] =  os.environ.get("SLIP_PASSWORD")
os.environ["SLIP_CLIENT_ID"]= os.environ.get("SLIP_CLIENT_ID")
os.environ["SLIP_APP_NAME"]=os.environ.get("SLIP_APP_NAME")
os.environ["SERPER_API_KEY"] = "25d43901c968b0f8830387b7ed72231673428b9c"

In [2]:
# #longchain
# from langchain_openai import ChatOpenAI
# from langchain.agents.agent_toolkits import JsonToolkit
# #from langchain.chat_models import ChatOpenAI
# from langchain.tools.json.tool import JsonSpec
# from langchain.tools import BaseTool, StructuredTool, tool
# from langchain.agents import AgentType, initialize_agent, create_json_agent, AgentExecutor, create_openai_tools_agent
# from langchain_core.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder
# #from langchain_experimental.agents import create_csv_agent
# from langchain_experimental.agents.agent_toolkits import create_csv_agent, create_pandas_dataframe_agent

In [3]:
from llm_utils import get_llm

#llm = get_llm('gpt-35-turbo-0613')
#llm = get_llm('gpt-35-turbo-16k-0613')
#llm = get_llm('gpt-4-8k-0613')
#llm = get_llm('gpt-4-32k-0613')
#llm = get_llm('gpt-4-turbo-1106-preview')
#llm = get_llm('gpt-4-turbo-0613')
#llm = get_llm('gpt-35-turbo-0125')
#llm = get_llm('gpt-4-turbo-0125-preview')
#llm = get_llm('gpt-4-turbo-0409')
#llm = get_llm('gpt-4-vision-preview')
llm = get_llm('gpt-4o-0513')
#response = llm.invoke('why sky is blue?')
#response.content

In [4]:
df=pd.read_csv('./diag_5mcids.csv')
df

Unnamed: 0,mcid,dos,codeIndex,cd_value,ICD_FULL_DESC,type
0,0,2017-02-28,13415,M25551,Pain in right hip,diag
1,0,2017-06-27,13415,M25551,Pain in right hip,diag
2,0,2017-08-22,13415,M25551,Pain in right hip,diag
3,0,2018-02-22,13415,M25551,Pain in right hip,diag
4,0,2018-04-18,13415,M25551,Pain in right hip,diag
...,...,...,...,...,...,...
816,4,2018-11-01,14174,M546,Pain in thoracic spine,diag
817,4,2019-01-11,14174,M546,Pain in thoracic spine,diag
818,4,2019-05-07,14174,M546,Pain in thoracic spine,diag
819,4,2018-11-29,9753,J309,"Allergic rhinitis, unspecified",diag


In [5]:
df[df['mcid']==4]

Unnamed: 0,mcid,dos,codeIndex,cd_value,ICD_FULL_DESC,type
136,4,2017-02-21,14172,M545,Low back pain,diag
137,4,2019-05-07,14172,M545,Low back pain,diag
143,4,2017-02-21,14160,M5416,"Radiculopathy, lumbar region",diag
171,4,2019-07-16,42211,Z79899,Other long term (current) drug therapy,diag
206,4,2017-12-12,2758,D692,Other nonthrombocytopenic purpura,diag
...,...,...,...,...,...,...
816,4,2018-11-01,14174,M546,Pain in thoracic spine,diag
817,4,2019-01-11,14174,M546,Pain in thoracic spine,diag
818,4,2019-05-07,14174,M546,Pain in thoracic spine,diag
819,4,2018-11-29,9753,J309,"Allergic rhinitis, unspecified",diag


In [6]:
df_json = df.to_json()
import json

# Convert the JSON string back to a dictionary of dictionaries
df_list = json.loads(df_json)

In [7]:
search_tool = SerperDevTool()
scrape_tool = ScrapeWebsiteTool()

@tool
def patientRecordSelect_tool(df_list, mcid):
    """Select the patient's medical records for patient with given mcid.
    'dos': date of service.
    'cd_value': ICD10 diagnosis codes.
    'ICD_FULL_DESC': description of ICD10 code.
    """
    df = pd.DataFrame(df_list)
    output = df[df['mcid'] == mcid][['dos', 'cd_value', 'ICD_FULL_DESC']]
    return output

patientRecordSelect_agent = Agent(
    role="Get patient's medical records",
    goal="For a given patient identified by mcid, get this patient's medical recordes "
         "from a given data using tool patientRecordSelect_tool.",
    backstory="You use given mcid to find the corresponding medical records.",
    tools=[patientRecordSelect_tool],
    allow_delegation=False,
    llm=llm  #Make sure this variable is defined
)

patientRecordSelect_task = Task(
    description="Get the medical records for the given patient {mcid} from file {df_list}. These medical records will be used in later steps",
    expected_output="a table with columns: date of service, ICD10 diagnosis codes, code description",
    agent=patientRecordSelect_agent
)

summary_agent = Agent(
    role="Summarizer",
    goal="Summarize patient's medical records. Find out what diseases this patient had or having, and possible diseases may happen soon. ",
    tools = [scrape_tool, search_tool],
    verbose=True,
    llm = llm,
    backstory=(
        "As a medical doctor, you have a new patient who brings to you his/her past medical records."
        "Through analyzing this patient's medical records, you find what diseases this patient had in the past, "
        "what chronic desease this patient are having, "
        "and what desease this patient may have in the near future."
    )
)

summary_task = Task(
    description=(
        "1. Analyze the patient's medical records .\n"
        "2. Provide a summary of past, present, and possible future diseases based on the provided information."
        "3. Limit the length to 200 words."
    ),
    expected_output="A summary of patient's medical conditions.",
    output_file="summary.txt",
    agent=summary_agent,
    context=[patientRecordSelect_task]
)

commonDisease_agent = Agent(
    role="Detector of common diseases",
    goal="Find out if the given patient has diabetes and chronic kidney disease.",
    tools = [scrape_tool, search_tool],
    verbose=True,
    llm = llm,
    backstory=(
        "Diabetes, chronic kidney disease are very common, especially among older people. "
        "We are dealing with medicare population. Analyzing a patient's medical records to see if this patient has these diseases. "
        "If this patient has any of the mentioned diseases, please suggest treatment plan, one for each detected disease."
    )
)

commonDisease_task = Task(
    description=(
        "1. Analyze the patient's medical records .\n"
        "2. Answer yes or no for diabetes and chronic kidney disease."
        "3. If the answer is yes, suggest treatment plan and dietary restrictions. When searching the internet, make sure check out Mayo Clinic website."
        "4. Limit the length to 200 words."
        
    ),
    expected_output="Yes or no answer to common chronic diseases. Suggestions if the answer is yes.",
    output_file="commonDisease.txt",
    agent=commonDisease_agent,
    context=[patientRecordSelect_task, summary_task]
)

# Create Crew
crew = Crew(
    agents=[patientRecordSelect_agent, summary_agent, commonDisease_agent],
    tasks=[patientRecordSelect_task, summary_task, commonDisease_task],
    verbose=1
)

In [None]:
inputs = {
    "df_list": df_list,
    "mcid": 4
}

t_start=time.time()
result = crew.kickoff(inputs=inputs)
print("Time used (minutes):", (time.time() - t_start)/60)
print(result)

[1m[95m [2024-07-30 16:42:13][DEBUG]: == Working Agent: Get patient's medical records[00m
[91m 

I encountered an error while trying to use the tool. This was the error: patientRecordSelect_tool() missing 1 required positional argument: 'df_list'.
 Tool patientRecordSelect_tool accepts these inputs: patientRecordSelect_tool() - Select the patient's medical records for patient with given mcid.     'dos': date of service.     'cd_value': ICD10 diagnosis codes.     'ICD_FULL_DESC': description of ICD10 code.     
[00m


In [None]:
display(Markdown("./summary.md"))

In [None]:
display(Markdown("./commonDisease.md.md"))

In [None]:
t_start=time.time()
result = crew.kickoff(inputs=inputs)
print("Time used (minutes):", (time.time() - t_start)/60)


In [None]:
display(Markdown("./summary.md"))

In [None]:
display(Markdown("./commonDisease.md"))