# Few-shot Classifiction of SDoH

## 0. Setup

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import os
from pathlib import Path
import sys
from IPython.display import display, HTML

# Add the project root to the Python path to import the modules
project_root = Path().absolute().parent
sys.path.append(str(project_root))

In [3]:
import torch
import transformers

# Use shared cache
os.environ['HF_HOME'] = '/data/resource/huggingface'
os.environ['TRANSFORMERS_OFFLINE'] = '1'  # Force offline mode

# What models are available
cache_dir = "/data/resource/huggingface/hub"
available_models = []

# Suppress warnings from transformers
transformers.logging.set_verbosity_error()



In [4]:
# Load cleaned data
brc_referrals_cleaned = pd.read_csv("../data/processed/brc-cleaned/referrals_cleaned.csv")

## 1. Few-shot classification of SDoH

### 1.1 Loading the models

In [4]:
if os.path.exists(cache_dir):
    for item in os.listdir(cache_dir):
        if item.startswith("models--"):
            # Convert models--org--name to org/name format
            model_name = item.replace("models--", "").replace("--", "/")
            available_models.append(model_name)

print("Available cached models:")
for model in sorted(available_models):
    print(f"  {model}")

Available cached models:
  CohereForAI/aya-23-35B
  CohereForAI/aya-23-8B
  CohereForAI/aya-vision-8b
  HuggingFaceTB/SmolLM-135M-Instruct
  LLaMAX/LLaMAX3-8B-Alpaca
  Qwen/Qwen1.5-4B
  Qwen/Qwen2-7B
  Qwen/Qwen2.5-1.5B
  Qwen/Qwen2.5-3B
  Qwen/Qwen2.5-72B-Instruct
  Qwen/Qwen2.5-7B
  Qwen/Qwen2.5-7B-Instruct
  Qwen/Qwen2.5-7B-instruct
  Qwen/Qwen2.5-VL-7B-Instruct
  Qwen/Qwen3-0.6B
  Qwen/Qwen3-8B
  Unbabel/wmt20-comet-qe-da
  Unbabel/wmt22-comet-da
  bert-base-uncased
  bert-large-uncased
  cardiffnlp/twitter-roberta-base-sentiment
  cardiffnlp/twitter-roberta-base-sentiment-latest
  clairebarale/refugee_cases_ner
  cross-encoder/nli-deberta-v3-large
  cross-encoder/stsb-roberta-base
  cross-encoder/stsb-roberta-large
  deepseek-ai/DeepSeek-R1-Distill-Llama-70B
  deepseek-ai/DeepSeek-R1-Distill-Llama-8B
  deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
  deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
  deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
  facebook/nllb-200-3.3B
  facebook/nllb-200-distilled-

In [6]:
print(f"Transformers version: {transformers.__version__}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

Transformers version: 4.52.3
PyTorch version: 2.6.0
CUDA available: True


In [7]:
# Load one of the instruction-tuned models
# Qwen/Qwen2.5-7B-Instruct
# meta-llama/Llama-3.1-8B-Instruct
# microsoft/Phi-4-mini-instruct
# mistralai/Mistral-7B-Instruct-v0.3

from src.classification.model_helpers import load_instruction_model

model_name = "meta-llama/Llama-3.1-8B-Instruct"
tokenizer, model = None, None

tokenizer, model = load_instruction_model(model_name)

Loading meta-llama/Llama-3.1-8B-Instruct...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

✓ meta-llama/Llama-3.1-8B-Instruct loaded successfully!


### 1.2 Extraction from one note

In [8]:
# Load a specific note: Case Reference = CAS-467812
sample_note = brc_referrals_cleaned[brc_referrals_cleaned['Case Reference'] == 'CAS-467812'].iloc[0]['Referral Notes (depersonalised)']

In [9]:
from src.classification.prompt_creation_helpers import create_automated_prompt

prompt_example_basic = create_automated_prompt("This is a sentence", tokenizer=tokenizer, prompt_type="five_shot_basic")
print("=" * 50)
print("Example Prompt (Five Shot Basic):")
print("=" * 50)
print(prompt_example_basic)

Example Prompt (Five Shot Basic):
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

You are analyzing a referral note sentence to identify Social Determinants of Health, and classifying them as Adverse or Protective.

Given a sentence, output all SDoH factors that can be inferred from that sentence from the following list: 
Loneliness, Housing, Finances, FoodAccess, Digital, Employment, EnglishProficiency.

Each SDoH must be classified as either "Adverse" or "Protective". 
If the sentence does NOT mention any of the above categories, output <LIST>NoSDoH</LIST>.

Your response must be a comma-separated list of SDoH-Polarity pairs embedded in <LIST> and </LIST> tags.

**STRICT RULES**:
- DO NOT generate any other text, explanations, or new SDoH labels.
- A sentence CAN be labeled with one or more SDoH factors.
- The only accepted format is <LIST>...</LIST>.

EXAMPLES:
Input: "She is unemployed and struggles to pay 

In [10]:
from src.classification.SDoH_classification_helpers import SDoHExtractor

# Initialize the SDoH extractor
extractor = SDoHExtractor(
    model=model,
    tokenizer=tokenizer,
    prompt_type="five_shot_basic",
    debug=True,
)

# Extract SDoH factors
results = extractor.extract_from_note(sample_note)
results_df = extractor.results_to_dataframe(results, note_id="sample")

print("\nExtracted SDoH Factors:")
display(results_df)


Extracted SDoH Factors:


Unnamed: 0,note_id,sentence_number,sentence,has_sdoh,sdoh_factors,num_sdoh_factors
0,sample,1,Lives with husband for whom patient is carer,True,"Housing-Protective, Employment-Adverse",2
1,sample,2,Living on ready meals at present,True,FoodAccess-Adverse,1
2,sample,3,[PERSON] concerned that they may not be eating...,True,FoodAccess-Adverse,1
3,sample,4,Carers in [REDACTED] times daily for patient t...,True,"Housing-Protective, Employment-Adverse",2
4,sample,5,"Depending on side - effects of radiotherapy , ...",True,FoodAccess-Adverse,1
5,sample,6,Patient feeling slightly overwhelmed by everyt...,True,Loneliness-Adverse,1
6,sample,7,FPOC and Carers Support Shropshire numbers giv...,True,"Loneliness-Adverse, Finances-NoSDoH, Housing-N...",6
7,sample,8,Very supportive daughter who lives in [PERSON],True,"Loneliness-Protective, Housing-Adverse",2
8,sample,9,The patient is due to start radiotherapy on [R...,False,NoSDoH,0
9,sample,10,Due to start radiotherapy on [REDACTED] at SAT...,False,NoSDoH,0


In [9]:
results_df.head()

Unnamed: 0,note_id,sentence_number,sentence,has_sdoh,sdoh_factors,num_sdoh_factors
0,sample,1,Lives with husband for whom patient is carer,True,"Housing-Protective, Employment-Adverse",2
1,sample,2,Living on ready meals at present,True,FoodAccess-Adverse,1
2,sample,3,[PERSON] concerned that they may not be eating...,True,FoodAccess-Adverse,1
3,sample,4,Carers in [REDACTED] times daily for patient t...,True,"Housing-Protective, Employment-Adverse",2
4,sample,5,"Depending on side - effects of radiotherapy , ...",True,FoodAccess-Adverse,1


In [None]:
# Some debugging
print("Prompt: \n")
print(results['sentences'][1]['debug']['prompt'])

print("Raw response: \n")
print(results['sentences'][1]['debug']['raw_response'])

### 1.3. Evaluating few-shot extraction on test set

In [10]:
# Set desired model and prompt config
MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
PROMPT_TYPE = "five_shot_basic"

# Load model and tokenizer
from src.classification.model_helpers import load_instruction_model
from src.classification.SDoH_classification_helpers import SDoHExtractor

tokenizer, model = load_instruction_model(MODEL_NAME)

# Confirm it's loaded
if tokenizer is None or model is None:
    raise ValueError(f"Failed to load model: {MODEL_NAME}")

# Create extractor using your standard constructor
extractor = SDoHExtractor(
    model=model,
    tokenizer=tokenizer,
    prompt_type=PROMPT_TYPE,
    debug=False
)

Loading meta-llama/Llama-3.1-8B-Instruct...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

✓ meta-llama/Llama-3.1-8B-Instruct loaded successfully!


In [22]:
from sklearn.metrics import classification_report, f1_score
from sklearn.preprocessing import MultiLabelBinarizer
from tqdm import tqdm

# === Step 1: Load test set ===
test_df = pd.read_csv("../data/processed/train-test/test_set.csv")
test_df["label_pair"] = test_df["label_pair"].apply(eval)

In [23]:
# === Step 2: Run model inference using extractor ===
y_true = []
y_pred = []
sentences = []

for _, row in tqdm(test_df.iterrows(), total=len(test_df)):
    sentence = row["Sentence"]
    gold = sorted(row["label_pair"])
    
    result = extractor.extract_from_sentence(sentence)
    pred = sorted(result["sdoh_factors"])  # list of predicted labels
    
    y_true.append(gold)
    y_pred.append(pred)
    sentences.append(sentence)

# === Step 3: Binarize for multilabel metrics ===
mlb = MultiLabelBinarizer()
y_true_bin = mlb.fit_transform(y_true)
y_pred_bin = mlb.transform(y_pred)  # must not refit — only transform

# === Step 4: Print F1 scores ===
print("Few-Shot Classification Report:\n")
print(classification_report(y_true_bin, y_pred_bin, target_names=mlb.classes_))

# === Step 5: Save CSV for manual inspection ===
eval_results_df = pd.DataFrame({
    "Sentence": sentences,
    "Gold Labels": [", ".join(lbls) for lbls in y_true],
    "Predicted Labels": [", ".join(lbls) for lbls in y_pred],
    "Exact Match": [set(t) == set(p) for t, p in zip(y_true, y_pred)]
})

eval_results_df.to_csv("../results/eval/few_shot_eval_30_06.csv", index=False)
print("\nSaved evaluation results to: ../results/eval/few_shot_eval_30_06.csv")

100%|██████████| 243/243 [02:08<00:00,  1.89it/s]

Few-Shot Classification Report:

                            precision    recall  f1-score   support

           Digital-Adverse       0.40      0.67      0.50         6
        Digital-Protective       0.00      0.00      0.00         1
        Employment-Adverse       0.17      1.00      0.29         3
     Employment-Protective       0.00      0.00      0.00         1
EnglishProficiency-Adverse       0.40      1.00      0.57         2
          Finances-Adverse       0.38      0.82      0.52        17
       Finances-Protective       0.00      0.00      0.00         1
        FoodAccess-Adverse       0.94      0.85      0.89        20
     FoodAccess-Protective       0.50      1.00      0.67         1
           Housing-Adverse       0.44      0.71      0.55        28
        Housing-Protective       0.12      1.00      0.22         1
        Loneliness-Adverse       0.57      0.69      0.63        39
     Loneliness-Protective       0.50      0.57      0.53         7
              


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [24]:
# Generate report as dict
report_dict = classification_report(
    y_true_bin,
    y_pred_bin,
    target_names=mlb.classes_,
    output_dict=True
)

# Convert to DataFrame
report_df = pd.DataFrame(report_dict).transpose()

# Save to CSV
report_path = "../results/eval/few_shot_eval_report_30_06.csv"
report_df.to_csv(report_path)
print(f"\nSaved classification report to: {report_path}")


Saved classification report to: ../results/eval/few_shot_eval_report_30_06.csv


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
