In [1]:
#Load Patient Post
import pandas as pd

data_path = "output.csv"

df = pd.read_csv(data_path)
posts = df['translated_post'].tolist()


In [2]:
#Load DSM-5 Criteria from json file

import json
from typing import List, Dict, Iterable, Tuple, Any

dsm_path = "DSM-5/DSM_Criteria_Array_Fixed.json"

with open(dsm_path, 'r', encoding='utf-8') as f:
    dsm_data = json.load(f)

criteria_texts: List[str] = []
criteria_details: List[Dict[str, str]] = []

for item in dsm_data:
    diagnosis = item.get("diagnosis", "")
    criteria_list = item.get("criteria", [])
    
    # Each criterion is a dict with "id" and "text" keys
    for criterion_dict in criteria_list:
        criterion_text = criterion_dict.get("text", "").strip()
        criterion_id = criterion_dict.get("id", "")
        
        if criterion_text:  # Only add non-empty criteria
            criteria_texts.append(criterion_text)
            criteria_details.append({
                "diagnosis": diagnosis,
                "criterion_id": criterion_id,
                "text": criterion_text
            })

print(f"Loaded {len(criteria_texts)} DSM-5 criteria")
print(f"Example criterion: {criteria_texts[0][:100]}...")

# Clean criteria_texts before using it
print("=== CLEANING DATA ===")
original_count = len(criteria_texts)

# Filter out non-string and empty items
clean_criteria_texts = []
clean_criteria_details = []

for i, text in enumerate(criteria_texts):
    # Ensure it's a string and not empty
    if isinstance(text, str) and text.strip():
        clean_criteria_texts.append(text.strip())
        if i < len(criteria_details):
            clean_criteria_details.append(criteria_details[i])
    else:
        print(f"Removing invalid item at index {i}: {type(text)} -> {repr(text)}")

# Update the lists
criteria_texts = clean_criteria_texts
criteria_details = clean_criteria_details

print(f"Cleaned data: {original_count} -> {len(criteria_texts)} items")

# Verify the cleaning worked
if criteria_texts:
    print(f"All items are strings: {all(isinstance(x, str) for x in criteria_texts)}")
    print(f"No empty strings: {all(len(x.strip()) > 0 for x in criteria_texts)}")
    print(f"Sample: {criteria_texts[0][:50]}...")
else:
    print("❌ ERROR: No valid criteria found!")


Loaded 105 DSM-5 criteria
Example criterion: Severe recurrent temper outbursts manifested verbally (e.g., verbal rages) and/or be-
haviorally (e....
=== CLEANING DATA ===
Cleaned data: 105 -> 105 items
All items are strings: True
No empty strings: True
Sample: Severe recurrent temper outbursts manifested verba...


In [3]:
# DSM-5 Retriever
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer


class CriteriaRetriever:
    def __init__(self, criteria: List[str]):
        self.vectorizer = TfidfVectorizer(stop_words="english")
        self.matrix = self.vectorizer.fit_transform(criteria)
        # Precompute norms for cosine similarity
        self.doc_norms = np.linalg.norm(self.matrix, axis=1) + 1e-10

    def retrieve(self, query: str, top_k: int = 5):
        q_vec = self.vectorizer.transform([query.lower().strip()])
        dot_products = self.matrix @ q_vec.T  # Simple matrix multiplication
        similarities = dot_products / (self.doc_norms * q_vec.toarray().flatten() + 1e-10)
        top_indices = np.argsort(similarities.toarray().ravel())[-top_k:][::-1]
        return [(i, similarities[i, 0]) for i in top_indices if similarities[i, 0] > 0]

In [4]:
# Load TAIDE  (Not finished)
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

class LocalLLM:
    def __init__(self, model_name: str = "taide/Llama-3.1-TAIDE-LX-8B-Chat"):
        self.model_name = model_name
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForCausalLM.from_pretrained(model_name)
        self.dtype = torch.float16 if self.device == "cuda" else torch.bfloat16
        self.max_length = 2048

    def generate(self, prompt: str, max_new_tokens: int = 512, temperature: float = 0.7, top_p: float = 0.9) -> str:
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
        input_length = inputs.input_ids.shape[1]
        if input_length + max_new_tokens > self.max_length:
            raise ValueError(f"Input length {input_length} with max_new_tokens {max_new_tokens} exceeds model's max_length {self.max_length}.")
        
        with torch.autocast(device_type=self.device, dtype=self.dtype):
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=max_new_tokens,
                temperature=temperature,
                top_p=top_p,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id
            )
        generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        return generated_text

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Build Prompt

def build_baseline_prompt(question: str) -> str:
    return f"""你是一個具有DSM-5診斷標準知識的助理，能夠根據病患的文字描述給出你看到能的症狀，透過推理過程驗證你的答案是正確的。
Patient Description: {question}
Reasoning Process:
Answer:"""
def build_rag_prompt(question: str, retrieved_criteria: List[Tuple[str, float]]) -> str:
    criteria_text = "\n".join([f"- {text} (Score: {score:.4f})" for text, score in retrieved_criteria])
    return f"""你是一個具有DSM-5診斷標準知識的助理，能夠根據病患的文字描述以及提供的診斷標準給出你看到的症狀，透過推理過程驗證你的答案是正確的。
Patient Description: {question}
Criteria:
{criteria_text}
Reasoning Process:
Answer:"""

In [6]:
# Run Baseline & RAG
def run_baseline(llm: LocalLLM, question: str) -> str:
    prompt = build_baseline_prompt(question)
    return llm.generate(prompt)
def run_rag(llm: LocalLLM, retriever: CriteriaRetriever, question: str, top_k: int = 50) -> str:
    retrieved_indices = retriever.retrieve(question, top_k=top_k)
    retrieved_criteria = [(criteria_texts[i], score) for i, score in retrieved_indices]
    prompt = build_rag_prompt(question, retrieved_criteria)
    return llm.generate(prompt)

In [7]:
# Test output
print(posts[0])

assistant
原文：

原翻譯：

我該如何避免再次崩潰？ 我今年過得很差， 曾經有過想自殺的念頭， 我的愛人離開了， 我在大學裡不及格， 身體上也被打， 也被性侵， 今天我爺爺過世了。 我覺得自己快撐不下去， 只能勉強地做些基本的事來求生存， 同時也要當別人的支柱。 我的情緒韌性已經被擊垮了。 我感覺「還好」但我以前就有過這種感覺， 結果我只是在騙自己， 反而使情況更糟。 有什麼建議可以幫我更好地管理和處理我的情緒， 或是更了解自己？

修正後的翻譯：

你該如何避免再次崩潰？ 我今年過得很差， 曾經有過想自殺的念頭， 我的愛人離開了， 我在大學裡不及格， 身體上也被打， 也被性侵， 今天我爺爺過世了。 我覺得自己快撐不下去， 只能勉強地做些基本的事來求生存， 同時也要當別人的支柱。 我的情緒韌性已經被擊垮了。 我感覺「還好」但我以前就有過這種感覺， 結果我只是在騙自己， 反而使情況更糟。 有什麼建議可以幫我更好地管理和處理我的情緒， 或是更了解自己？

(原文保持不變，直接翻譯而出)


In [8]:
model = LocalLLM()
retriever = CriteriaRetriever(criteria_texts)

Loading checkpoint shards: 100%|██████████| 4/4 [00:04<00:00,  1.18s/it]


ValueError: scipy.sparse does not support dtype object. The only supported types are: bool, int8, uint8, int16, uint16, int32, uint32, int64, uint64, longlong, ulonglong, float32, float64, longdouble, complex64, complex128, clongdouble.

In [None]:
#Test baseline prompt
baseline_prompt = build_baseline_prompt(posts[0])
rag_prompt = build_rag_prompt(model, retriever, posts[0])
print(baseline_prompt)
print(rag_prompt)


In [None]:
# Save Results

In [None]:
# Compute Evaluation Metrics