In [1]:
from typing import Annotated, Literal, TypedDict, Any
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
from langchain_core.tools import tool
import pandas as pd
import pickle
from tqdm import tqdm
import os
import json
from langchain_core.documents import Document
from pydantic import Field, BaseModel
import pickle
from langchain_openai import ChatOpenAI
class MedicalAskContent(BaseModel):
    
    disease: list[str] = Field(
        default_factory=list,
        description="医生提问中的疾病对象"
    )
    symptom: list[str] = Field(
        default_factory=list,
        description="医生提问中的症状对象"
    )
    medcine: list[str] = Field(
        default_factory=list,
        description="医生提问中的药物对象"
    )
    surgery: list[str] = Field(
        default_factory=list,
        description="医生提问中的手术对象"
    )
    body_part: list[str] = Field(
        default_factory=list,
        description="医生提问中的身体部位对象"
    )
    medical_check: list[str] = Field(
        default_factory=list,
        description="医生提问中的检查项目对象"
    )
    concept: list[str] = Field(
        default_factory=list,
        description="医生提问中的问诊医学概念对象"
    )
class RetrievalAction(BaseModel):
    """The action of retrievaling from the knowledge base."""
    reasoning: str = Field(
        description="The reasong why you are doing the following query."
    )
    queries: list[str] = Field(description="The list of queries to execute. Each should be a question.")

class RetrievalItem(TypedDict):
    
    query: str
    refined_result: str

class AskAction(BaseModel):
    """The action of asking the patient for extra information."""
    reasoning: str = Field(
        description="The reason why you are asking the following question."
    )
    disease: list[str] = Field(
        default_factory=list,
        description="你提问的疾病对象"
    )
    symptom: list[str] = Field(
        default_factory=list,
        description="你提问的症状对象"
    )
    medcine: list[str] = Field(
        default_factory=list,
        description="你提问的药物对象"
    )
    surgery: list[str] = Field(
        default_factory=list,
        description="你提问的手术对象"
    )
    body_part: list[str] = Field(
        default_factory=list,
        description="你提问的身体部位对象"
    )
    medical_check: list[str] = Field(
        default_factory=list,
        description="你提问的检查项目对象"
    )
    concept: list[str] = Field(
        default_factory=list,
        description="你提问的问诊医学概念对象"
    )
    text: str = Field(
        description="The text sent to the patient."
    )

class TellAction(BaseModel):
    """The action of telling the patient something."""
    reasoning: str = Field(
        description="The reason why you are telling the user the following text."
    )
    text: str = Field(
        description="The text sent to the patient."
    )

class Conversation(TypedDict):
    
    patient: str
    doctor: str

class RagMeta(TypedDict):

    vector_store: list[Document]
    baidu_search: tuple[
        Annotated[str, "question"],
        Annotated[str, "answer"],
        Annotated[str, "failed"],
        Annotated[str, "src"]
    ]

class RetrievalRequest(BaseModel):

    reasoning: str = Field(
        description="The process of how you arrived on the following queries you are going to make based on the input. Reasoning should be concise and contain only important points."
    )
    queries: list[str] = Field(
        description="The queries you want to make. Each should be a question."
    )

class RefinedResult(BaseModel):

    refined_result: str = Field(
        description="The refined result of the given message. Should be in no more than 2 to 3 sentences. Should contain no more content than what's related to the query."
    )

class ReasoningCheck(BaseModel):

    reasoning: str = Field(
        description="The reason why you assert that you can deduce the answer of the professional doctor from the given information, or why you cannot deduce it. Reasoning should be concise and contain only important points."
    )
    deducible: bool = Field(description="Whether you can deduce the answer of the professional doctor from the given information.")

class Response(BaseModel):

    reasoning: str = Field(
        description="The reason why you ask the user for more information or respond to them. Reasoning should be concise and contain only important points."
    )
    is_asking: bool = Field(
        description="Wehther your sentence asks the user for more information or tell them something. If this is true, this response will be marked as ask, or else it will be marked as tell."
    )
    entities: list[str] = Field(
        description="The medical entities related to this conversation. Only the important ones should be included."
    )
    text: str = Field(
        description="The text sent to the patient."
    )

class RetrievalItem(TypedDict):
    
    query: str
    refined_result: str

class StructuredOutputWithRaw(TypedDict):
    
    raw: AIMessage
    parsed: Any
    parsing_error: Any


In [2]:
lded = pickle.load(open(
    "./lora_sft_zh_med_ask_records.pkl", "rb"
))

In [3]:
import Levenshtein

In [4]:
rr = 0

In [5]:
def flatten_medical_ask_content(i: MedicalAskContent) -> str:
    return f"{''.join(i.disease)}{''.join(i.symptom)} {''.join(i.medcine)}{''.join(i.surgery)}{''.join(i.body_part)}{''.join(i.medical_check)}{''.join(i.concept)}"

In [None]:
cnt = 0

In [24]:
for each in lded:
    try:
        model_out = each
        model_entities = MedicalAskContent(
            disease=model_out[2].disease,
            symptom=model_out[2].symptom,
            medcine=model_out[2].medcine,
            surgery=model_out[2].surgery,
            body_part=model_out[2].body_part,
            medical_check=model_out[2].medical_check,
            concept=model_out[2].concept,
        )
        r = Levenshtein.ratio(
            flatten_medical_ask_content(each[0]['all_meta']['doc_objs']),
            flatten_medical_ask_content(model_entities),
        )
        if r > 0.3:
            print(model_entities)
            print(each[2].text)
            print(each[0]['all_meta']['doc_objs'])
            print(each[0]['all_meta']['doc_ask_sentence'])
    except:
        continue

disease=[] symptom=['疼痛'] medcine=[] surgery=[] body_part=[] medical_check=[] concept=['饮食习惯', '食物', '疼痛性质']
请问您在吃东西时是否注意到特定类型的食物会引起更多的疼痛？此外，您的饮食习惯最近是否有改变？
disease=[] symptom=['疼痛'] medcine=[] surgery=[] body_part=[] medical_check=[] concept=[]
可以分清左右和上下疼痛么？
disease=[] symptom=['腹痛', '头晕', '乏力'] medcine=[] surgery=[] body_part=[] medical_check=[] concept=['持续时间', '伴随症状']
为了更好地理解您的状况，请您告诉我您是否有其他伴随症状，比如腹痛、头晕、乏力等？此外，您最近的月经周期是否正常？这些信息将有助于我们判断您的情况。
disease=[] symptom=['腰酸', '乏力'] medcine=[] surgery=[] body_part=[] medical_check=[] concept=[]
腰酸乏力明显吗？
disease=[] symptom=[] medcine=[] surgery=[] body_part=[] medical_check=[] concept=[]
您能详细描述一下您不清楚的具体内容吗？这样我可以更好地理解您的情况并提供帮助。
disease=[] symptom=[] medcine=[] surgery=[] body_part=[] medical_check=['检查'] concept=[]
做过检查吗？
disease=[] symptom=['头痛', '恶心', '呕吐', '视力模糊'] medcine=[] surgery=[] body_part=[] medical_check=[] concept=['压力', '情绪波动', '睡眠质量', '其他不适症状']
为了更好地理解您的情况，您能否描述一下头痛的频率？是否伴随有恶心、呕吐或视力模糊等症状？另外，您最近是否有经历过较大的压力或情绪波动？以及您的睡眠质量如何？这些信息将有助于我们

In [7]:
rr / len(lded)

6.008159934720522