In [None]:
file_path = "..\data\wzq-medical-cases.xlsx" # The path of original medical cases
output_file = "..\data\output.xlsx" # Define the path of the output Excel file

In [None]:
import pandas as pd
df = pd.read_excel(file_path)
texts = df['Original medical cases']
print(texts[:10])

In [None]:
# Extract prompts required for different entities
class ExtractEntityPrompt:
    def extract_symptom_few_shot_prompt(self):
        return '''你的任务是提取出文本中的症状，用列表的格式列出，若文本中无症状请输出Output:“NaN”。以下是一些例子：
            Input: 腰疽愈后，肾伤未复，风邪易乘虚而袭，身热形寒，头脑胀痛眩晕，项背肩胛酸胀，脉浮而濡，舌干而燥。
            Output: [‘身热形寒’, ‘头脑胀痛眩晕’, ‘项背肩胛酸胀’, ‘脉浮而濡’, ‘舌干而燥’]
            Input:再以宣湿除陈，以醒胃气，调理尚须从缓也。
            Output: NaN
            Input: 燥屎已下，郁热下行，颧红、唇绛、舌赤均已退淡，芒刺亦软，惟午后稍觉烦躁，入夜犹欠清爽。
            Output: [‘惟午后稍觉烦躁’, ‘入夜犹欠清爽’]'''

    def extract_pathogenesis_few_shot_prompt(self):
        return '''你的任务是提取出文本中的病因，用列表的格式列出，若文本中无病因请输出Output:“NaN”。以下是一些例子：
            Input:寒温失调，感受时行伤风，头眩胀痛，咳嗽发热，鼻塞多涕，欲作呕恶，两手脉俱反关，弦劲而滑。仍以轻宣，化风豁痰。
            Output:['感受时行伤风']
            Input:哕逆既平，热亦见退，大便已行，舌苔黄糙化薄，肌肤瘙痒，脉弦滑而濡。
            Output:NaN
            Input:劳顿感风，引动伏湿，肺胃二气相迫，发热壮盛，哕逆呃忒，连声紧促，夜眠不安，脉弦数。
            Output:['劳顿感风，引动伏湿', '肺胃二气相迫']'''
    def extract_treatment_principle_few_shot_prompt(self):
        return '''你的任务是提取出文本中的治则，用列表的格式列出，若文本中无治则治法请输出Output:“NaN”。以下是一些例子：
            Input:寒温失调，感受时行伤风，头眩胀痛，咳嗽发热，鼻塞多涕，欲作呕恶，两手脉俱反关，弦劲而滑。仍以轻宣，化风豁痰。
            Output:['轻宣'，'化风豁痰']
            Input:外因已却，形色清爽，惟脑后右边筋脉牵强拘急，应及腰脊，此则腰疽之后，肾伤未复。
            Output:NaN
            Input:风邪与痰滯相搏，寒热、咳嗽、头痛、呓语、若寐，亟以宣豁，防痰结生变。
            Output:['宣豁']'''
        
extractor = ExtractEntityPrompt()

In [None]:
from zhipuai import ZhipuAI
client = ZhipuAI(api_key="zhipu_api_key") # Fill in your own APIKey
entity_type = 'symptom' # Fill in the entity type to be extracted
few_shot_prompt = f'extract_{entity_type}_few_shot_prompt'

def extract(text):
    response = client.chat.completions.create(
        model="glm-3-turbo",  # Fill in the name of the model to be called
        messages=[
            {"role": "system", "content": getattr(extractor, few_shot_prompt)()},
            {"role": "user", "content": '''用给出的格式提取以下文本中的症状。
            Input: ''' + text + '''
            Output:'''}
        ], # Modify the corresponding prompt content based on the entity to be extracted
    )
    result = response.choices[0].message
    return result

In [None]:
with pd.ExcelWriter(output_file, engine='openpyxl', mode='w') as writer:
    pd.DataFrame(columns=['Original medical cases', entity_type]).to_excel(writer, sheet_name='Sheet1', index=False)
    columns = ['Original medical cases', entity_type]
    n = 0
    startrow = 1
    
    for text in texts:
        n += 1
        entities = extract(text).content
        fill_text = entities if entities else [float('NaN')]  # If there is no symptom information, fill in 'NaN'
        df = pd.DataFrame([{columns[0]: text, columns[1]: fill_text}]) # Adjust the columns in the output Excel here
        df.to_excel(writer, sheet_name='Sheet1', index=False, header=(n == 0), startrow=startrow)
        startrow += 1
        print(f"Article {n} Medical case entity - {entity_type} extraction completed")
        
print(f"Data saved to {output_file}")