In [8]:
!pip install langgraph langchain langchainhub langchain-core langserve
!pip install openllm-client requests

Collecting langchain
  Downloading langchain-0.3.24-py3-none-any.whl.metadata (7.8 kB)
Collecting langchainhub
  Downloading langchainhub-0.1.21-py3-none-any.whl.metadata (659 bytes)
Collecting langserve
  Downloading langserve-0.3.1-py3-none-any.whl.metadata (40 kB)
Collecting langchain-core
  Downloading langchain_core-0.3.57-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain-text-splitters<1.0.0,>=0.3.8 (from langchain)
  Downloading langchain_text_splitters-0.3.8-py3-none-any.whl.metadata (1.9 kB)
Collecting types-requests<3.0.0.0,>=2.31.0.2 (from langchainhub)
  Downloading types_requests-2.32.0.20250328-py3-none-any.whl.metadata (2.3 kB)
Downloading langchain-0.3.24-py3-none-any.whl (1.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m56.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langchainhub-0.1.21-py3-none-any.whl (5.2 kB)
Downloading langchain_core-0.3.57-py3-none-any.whl (437 kB)
Downloading langserve-0.3.1-py3-none-any.whl (1

In [11]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from langgraph.graph import StateGraph, END
from langchain.llms.ollama import Ollama
from langchain_core.runnables import RunnableLambda
from sklearn.metrics import classification_report

# Load data
df = pd.read_csv("SDOH_MIMICIII_physio_release.csv")
label_counts = df.drop(columns=["provider_type", "patient_id", "note_id", "sentence_index", "text"]).sum()
top4_labels = label_counts.sort_values(ascending=False).head(4).index.tolist()
df["text"] = df["text"].fillna("")
df[top4_labels] = df[top4_labels].fillna(0).astype(int)

# Prepare 15% test set
_, temp_texts, _, temp_labels = train_test_split(df["text"], df[top4_labels], test_size=0.3, random_state=42)
_, test_texts, _, test_labels = train_test_split(temp_texts, temp_labels, test_size=0.5, random_state=42)

# Few-shot examples per agent
FEW_SHOT_EXAMPLES = {
    "RELATIONSHIP_married": [
        ("The patient is married and lives with his wife.", 1),
        ("He mentioned having no current spouse.", 0)
    ],
    "SUPPORT_plus": [
        ("She receives strong emotional support from her family.", 1),
        ("No one is around to support him.", 0)
    ],
    "EMPLOYMENT_employed": [
        ("He is employed as a construction worker.", 1),
        ("She is currently unemployed and seeking jobs.", 0)
    ],
    "SUPPORT_minus": [
        ("Patient reported being socially isolated with no support.", 1),
        ("She is surrounded by caring friends and family.", 0)
    ]
}

# Prompt builder
def build_prompt(label, sentence):
    prompt = (
        "You are a clinical annotation assistant.\n"
        f"Determine whether this sentence shows evidence of: {label}\n"
        "Reply with 1 if present, 0 if not.\n\n"
    )
    for example_text, value in FEW_SHOT_EXAMPLES[label]:
        prompt += f"Sentence: {example_text}\nAnswer: {value}\n\n"
    prompt += f"Sentence: {sentence}\nAnswer:"
    return prompt

# LangGraph agent setup using Ollama llama3.3
llm = Ollama(model="llama3.3", temperature=0)

def make_agent(label):
    def classify(sentence: str):
        prompt = build_prompt(label, sentence)
        response = llm.invoke(prompt)
        return int(response.strip()[0]) if response.strip()[0] in "01" else 0
    return RunnableLambda(classify)

# Create agents
agents = {label: make_agent(label) for label in top4_labels}

# Classify test data
results = []
for text in tqdm(test_texts.tolist()[:50], desc="LLama3.3 LangGraph Classification"):
    row = {"text": text}
    for label in top4_labels:
        row[f"{label}_pred"] = agents[label].invoke(text)
    results.append(row)

# Final result
df_preds = pd.DataFrame(results).reset_index(drop=True)
df_preds[[f"{label}_true" for label in top4_labels]] = test_labels.reset_index(drop=True)[top4_labels]

# Export
output_path = "sdoh_llama3_multiagent_predictions.csv"
df_preds.to_csv(output_path, index=False)
print("✅ Exported predictions to:", output_path)


  llm = Ollama(model="llama3.3", temperature=0)
LLama3.3 LangGraph Classification: 100%|████████████████████████████████████████████████| 50/50 [02:56<00:00,  3.52s/it]

✅ Exported predictions to: sdoh_llama3_multiagent_predictions.csv





In [13]:
from sklearn.metrics import classification_report

# Flatten predictions and true labels
y_true = test_labels.reset_index(drop=True)[top4_labels].iloc[:50].values
y_pred = df_preds[[f"{label}_pred" for label in top4_labels]].values

# Generate classification report
report = classification_report(y_true, y_pred, target_names=top4_labels, zero_division=0)
print("Test Set Metrics:\n", report)


Test Set Metrics:
                       precision    recall  f1-score   support

RELATIONSHIP_married       1.00      1.00      1.00         1
        SUPPORT_plus       0.00      0.00      0.00         2
 EMPLOYMENT_employed       0.00      0.00      0.00         0
       SUPPORT_minus       0.00      0.00      0.00         0

           micro avg       0.12      0.33      0.18         3
           macro avg       0.25      0.25      0.25         3
        weighted avg       0.33      0.33      0.33         3
         samples avg       0.01      0.02      0.01         3

