In [2]:
import os
from transformers import AutoModelForSequenceClassification, AutoTokenizer, TextClassificationPipeline
import pandas as pd

sdoh_to_labels = {
    "sdoh_community_present": 2,
    "sdoh_community_absent": 2,
    "sdoh_education": 2,
    "sdoh_economics": 3,
    "sdoh_environment": 3,
    "behavior_alcohol": 5,
    "behavior_tobacco": 5,
    "behavior_drug": 5
}

sdoh_to_models = {
    "sdoh_community_present": None,
    "sdoh_community_absent": None,
    "sdoh_education": None,
    "sdoh_economics": None,
    "sdoh_environment": None,
    "behavior_alcohol": None,
    "behavior_tobacco": None,
    "behavior_drug": None
}

data_path = "../data/SOCIALHISTORIES.csv"
model_name = "emilyalsentzer/Bio_ClinicalBERT"
tokenizer = AutoTokenizer.from_pretrained(model_name, do_lower_case=True)
saved_model_dir = "../saved_models/standard"
df = pd.read_csv(data_path)
sample = df.sample(1)
text = sample["TEXT"].values[0]

for sdoh, num_labels in sdoh_to_labels.items():
    model =  AutoModelForSequenceClassification.from_pretrained(os.path.join(saved_model_dir, sdoh))
    sdoh_to_models[sdoh] = model

In [5]:
results = {
    "sdoh_community_present": None,
    "sdoh_community_absent": None,
    "sdoh_education": None,
    "sdoh_economics": None,
    "sdoh_environment": None,
    "behavior_alcohol": None,
    "behavior_tobacco": None,
    "behavior_drug": None
}

true_labels = {
    "sdoh_community_present": 0,
    "sdoh_community_absent": 0,
    "sdoh_education": 0,
    "sdoh_economics": 0,
    "sdoh_environment": 0,
    "behavior_alcohol": 0,
    "behavior_tobacco": 0,
    "behavior_drug": 0
}

for sdoh, model in sdoh_to_models.items():
    true_labels[sdoh] = int(sample[sdoh])
    pipeline = TextClassificationPipeline(model=model, tokenizer=tokenizer, task="text-classification", device=0)
    result = pipeline(text)
    results[sdoh] = result[0]['label'].split("_")[-1]

print("True labels: ", true_labels)
print("Results: ", results)

True labels:  {'sdoh_community_present': 1, 'sdoh_community_absent': 0, 'sdoh_education': 0, 'sdoh_economics': 1, 'sdoh_environment': 1, 'behavior_alcohol': 4, 'behavior_tobacco': 3, 'behavior_drug': 0}
Results:  {'sdoh_community_present': '1', 'sdoh_community_absent': '0', 'sdoh_education': '0', 'sdoh_economics': '1', 'sdoh_environment': '1', 'behavior_alcohol': '4', 'behavior_tobacco': '3', 'behavior_drug': '0'}


  true_labels[sdoh] = int(sample[sdoh])
