In [16]:
from sentence_transformers import SentenceTransformer, util
import json

In [17]:
import pandas as pd
import numpy as np
import os

TRAINING_NUM = 16

print(os.getcwd())
print(os.path.exists("../utils/healthCare.parquet"))

# get input training data
df_physiological = pd.read_parquet("../utils/healthCare.parquet").dropna()
df_psychological = pd.read_json("../utils/mentalHealth.json", lines=True).dropna()

# select "input" from df_physiological, and randomly select 25 rows
df_physiological = df_physiological[["input"]]
df_physiological = df_physiological.sample(TRAINING_NUM//2, random_state=42)
df_psychological = df_psychological[["Context"]]
df_psychological = df_psychological.sample(TRAINING_NUM//2, random_state=42)
df_physiological.head(5)

d:\learning\year4_2\dia\cw\Dia_Yixin\model_intent
True


Unnamed: 0,input
47493,"I wake in the night, usually about 2-3 hours a..."
65740,"Honorable Sir,I am Ripon Dev from Bangladesh.M..."
69490,Ive had a cold which started on Christmas eve ...
39656,I had cervical laminectomy surgery for spinal ...
45796,"Hello, At the end of lacrosse practice about a..."


In [18]:
# select "input" from df_physiological, and randomly select 25 rows
df_psychological.head(5)

Unnamed: 0,Context
495,I've hit my head on walls and floors ever sinc...
1592,Over a year ago I had a female friend. She tur...
2314,"My long-distance girlfriend is in a sorority, ..."
1475,Cheating is something unacceptable for me but ...
2772,When my daughter is stressed about a silly thi...


In [19]:

def get_embedding(physiological_templates, psychological_templates):
	# 加载语义模型
	model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

	# 定义模板（生理和心理）

	# 合并所有模板
	templates = [{"text": text, "category": "physiological"} for text in physiological_templates] + \
				[{"text": text, "category": "psychological"} for text in psychological_templates]

	# 计算嵌入向量
	for template in templates:
		template['embedding'] = model.encode(template['text']).tolist()

	# 保存到文件（JSON 格式）
	with open("template_embeddings"+str(TRAINING_NUM)+".json", "w", encoding="utf-8") as file:
		json.dump(templates, file, ensure_ascii=False, indent=4)

	print("save embedding template into 'template_embeddings.json'")


In [20]:
get_embedding(df_physiological["input"].tolist(), df_psychological["Context"].tolist())

save embedding template into 'template_embeddings.json'


In [22]:
def match_intent(input_text):
	# 加载预先计算的模板嵌入
	with open("template_embeddings"+str(TRAINING_NUM)+".json", "r", encoding="utf-8") as file:
		templates = json.load(file)

	# 加载语义模型
	model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

	# 计算用户输入的嵌入向量
	user_embedding = model.encode(input_text)

	best_score = -1
	best_category = "physiological"  # default category

	# 与所有模板嵌入计算相似度
	for template in templates:
		template_embedding = template['embedding']
		score = util.cos_sim(user_embedding, template_embedding).item()
		if score > best_score:
			best_score = score
			best_category = template['category']

	return f"{best_category}_agent"

# 测试
user_input = """i feel like i have persistent knee pain that has been going on for two days."""
agent = match_intent(user_input)
print(f"分配给: {agent}")


分配给: physiological_agent
