In [None]:
!pip install transformers -q

In [None]:
!pip install torch -q

In [None]:
from transformers import pipeline
import re
import json
from collections import defaultdict

In [None]:
# Transcript text
text = """
  Physician: Good morning, Ms. Jones. How are you feeling today?
  Patient: Good morning, doctor. I’m doing better, but I still have some discomfort now and then.
  Physician: I understand you were in a car accident last September. Can you walk me through what happened?
  Patient: Yes, it was on September 1st, around 12:30 in the afternoon. I was driving from Cheadle Hulme to Manchester when I had to stop in traffic. Out of nowhere, another car hit me from behind, which pushed my car into the one in front.
  Physician: That sounds like a strong impact. Were you wearing your seatbelt?
  Patient: Yes, I always do.
  Physician: What did you feel immediately after the accident?
  Patient: At first, I was just shocked. But then I realized I had hit my head on the steering wheel, and I could feel pain in my neck and back almost right away.
  Physician: Did you seek medical attention at that time?
  Patient: Yes, I went to Moss Bank Accident and Emergency. They checked me over and said it was a whiplash injury, but they didn’t do any X-rays. They just gave me some advice and sent me home.
  Physician: How did things progress after that?
  Patient: The first four weeks were rough. My neck and back pain were really bad—I had trouble sleeping and had to take painkillers regularly. It started improving after that, but I had to go through ten sessions of physiotherapy to help with the stiffness and discomfort.
  Physician: That makes sense. Are you still experiencing pain now?
  Patient: It’s not constant, but I do get occasional backaches. It’s nothing like before, though.
  Physician: That’s good to hear. Have you noticed any other effects, like anxiety while driving or difficulty concentrating?
  Patient: No, nothing like that. I don’t feel nervous driving, and I haven’t had any emotional issues from the accident.
  Physician: And how has this impacted your daily life? Work, hobbies, anything like that?
  Patient: I had to take a week off work, but after that, I was back to my usual routine. It hasn’t really stopped me from doing anything.
  Physician: That’s encouraging. Let’s go ahead and do a physical examination to check your mobility and any lingering pain.
  [Physical Examination Conducted]
  Physician: Everything looks good. Your neck and back have a full range of movement, and there’s no tenderness or signs of lasting damage. Your muscles and spine seem to be in good condition.
  Patient: That’s a relief!
  Physician: Yes, your recovery so far has been quite positive. Given your progress, I’d expect you to make a full recovery within six months of the accident. There are no signs of long-term damage or degeneration.
  Patient: That’s great to hear. So, I don’t need to worry about this affecting me in the future?
  Physician: That’s right. I don’t foresee any long-term impact on your work or daily life. If anything changes or you experience worsening symptoms, you can always come back for a follow-up. But at this point, you’re on track for a full recovery.
  Patient: Thank you, doctor. I appreciate it.
  Physician: You’re very welcome, Ms. Jones. Take care, and don’t hesitate to reach out if you need anything.
"""

## Load Transformer(`d4data/biomedical-ner-all`) Model

In [None]:
# Load biomedical NER model
ner = pipeline("ner", model="d4data/biomedical-ner-all", aggregation_strategy="simple")
# NER object
entities = ner(text)

Device set to use cuda:0


## Extract Entities

In [None]:
# Group entities
grouped = defaultdict(list)
for ent in entities:
    word = ent["word"].strip()
    if word and "##" not in word:
        key = ent["entity_group"]
        # manage missing key value
        if key not in grouped:
            grouped[key] = list()
        grouped[key].append(word)

In [None]:
grouped

defaultdict(list,
            {'Sign_symptom': ['discomfort',
              'pain',
              'pain',
              'stiff',
              'pain',
              'anxiety',
              'nervous',
              'emotional issues'],
             'Activity': ['car accident'],
             'Time': ['12 : 30 in'],
             'Nonbiological_location': ['hulme'],
             'Biological_structure': ['neck', 'back', 'neck', 'back'],
             'Duration': ['weeks'],
             'Medication': ['painkillers'],
             'Lab_value': ['improving'],
             'Detailed_description': ['ten sessions'],
             'Therapeutic_procedure': ['physiotherapy']})

## Extract Patient Name

In [None]:
# Extract Patient Name
match = re.search(r"(Mr\.?|Ms\.?|Mrs\.?)\s+([A-Z][a-z]+)", text, re.I) # regular expression to extract name
if match:
    patient_name = match.group(0)
else:
    patient_name = "Not Specified"
print(f"Patient Name is {patient_name}")

Patient Name is Ms. Jones


## Extract Symptoms

In [None]:
# Extract synptoms
symptom_keywords = [
    "pain", "ache", "stiffness", "weakness",
    "tenderness", "swelling", "anxiety", "trouble sleeping"
]

symptoms = grouped.get("Sign_symptom", [])
for word in symptom_keywords:
    if word.lower() in text.lower() and word.lower() not in symptoms:
        symptoms.append(word.lower())


In [None]:
# Clean duplicates and unwanted words
clean_symptoms = []
for s in symptoms:
    if s.lower() not in [x.lower() for x in clean_symptoms]:
        clean_symptoms.append(s)

symptoms = clean_symptoms

In [None]:
symptoms

['discomfort',
 'pain',
 'stiff',
 'anxiety',
 'nervous',
 'emotional issues',
 'ache',
 'stiffness',
 'tenderness',
 'trouble sleeping']

## Extract Diagnosis

In [None]:
# Extract Diagnosis using zero-shot
zero_shot = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
candidate_labels = ["Whiplash injury", "Neck strain", "Back injury", "Normal"]
prediction = zero_shot(text, candidate_labels)
diagnosis = prediction['labels'][0]


Device set to use cuda:0


In [None]:
print(f"The diagnosis is {diagnosis}")

The diagnosis is Whiplash injury


In [None]:
# Extract Treatment
treatments = []
if "Therapeutic_procedure" in grouped:
    treatments.extend(grouped["Therapeutic_procedure"])
if "Medication" in grouped:
    treatments.extend(grouped["Medication"])

extra_treatments = re.findall(r"\b(physiotherapy|painkillers?|ultrasound|X-ray|advice|analgesic)\b", text, re.I)
for t in extra_treatments:
    if t not in treatments:
        treatments.append(t)

# Remove duplicates
clean_treatments = []
for t in treatments:
    if t.lower() not in [x.lower() for x in clean_treatments]:
        clean_treatments.append(t)

treatments = clean_treatments

In [None]:
treatments

['physiotherapy', 'painkillers', 'advice']

In [None]:
# Extract Current Status
status_match = re.search(r"(occasional\s+\w+ache|not constant|improving|full range of movement|no tenderness)", text, re.I)
if status_match:
    current_status = status_match.group(0).capitalize()
else:
    current_status = "Improving"

In [None]:
current_status

'Improving'

In [None]:
# Extract Prognosis
prog_match = re.search(r"(full recovery|positive|on track|no long-term|expected within\s+\w+\s+months)", text, re.I)
if prog_match:
    prognosis = "The patient is " + prog_match.group(0).lower() + " and expected to make a full recovery."
else:
    prognosis = "The patient's recovery appears stable with no signs of long-term complications."

In [None]:
prognosis

'The patient is positive and expected to make a full recovery.'

In [None]:
# --- Summary ---
summary = {
    "Patient_Name": patient_name,
    "Symptoms": symptoms,
    "Diagnosis": diagnosis,
    "Treatment": treatments,
    "Current_Status": current_status,
    "Prognosis": prognosis
}

# Print nicely
print(json.dumps(summary, indent=2))

{
  "Patient_Name": "Ms. Jones",
  "Symptoms": [
    "discomfort",
    "pain",
    "stiff",
    "anxiety",
    "nervous",
    "emotional issues",
    "ache",
    "stiffness",
    "tenderness",
    "trouble sleeping"
  ],
  "Diagnosis": "Whiplash injury",
  "Treatment": [
    "physiotherapy",
    "painkillers",
    "advice"
  ],
  "Current_Status": "Improving",
  "Prognosis": "The patient is positive and expected to make a full recovery."
}


## Sentiment Analysis

In [None]:
# Load sentiment pipeline
sentiment_pipe = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

Device set to use cuda:0


In [None]:
# Extract patient dialogues
patient_lines = [line.strip() for line in text.split('\n') if line.strip().startswith('Patient:')]
patient_text = ' '.join(patient_lines).replace('Patient: ', '')

In [None]:
# Sentiment Classification: Map to Anxious/Neutral/Reassured
sent_result = sentiment_pipe(patient_text)[0]
sentiment_label = sent_result['label']
if sentiment_label == 'NEGATIVE' and sent_result['score'] > 0.6:
    sentiment = "Anxious"
elif sentiment_label == 'POSITIVE':
    sentiment = "Reassured"
else:
    sentiment = "Neutral"

In [None]:

# Intent Detection: Simple rule-based for now (could use zero-shot classification)
zero_shot = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
intents = ["Seeking reassurance", "Reporting symptoms", "Expressing concern"]
intent_result = zero_shot(patient_text, candidate_labels=intents)
intent = intent_result['labels'][0]

Device set to use cuda:0


In [None]:
sentiment_output = {
    "Sentiment": sentiment,
    "Intent": intent
}
print(json.dumps(sentiment_output, indent=2))

{
  "Sentiment": "Anxious",
  "Intent": "Seeking reassurance"
}


## SOAP Implementation

In [None]:
# Subjective: Patient-reported info (history, symptoms)
subjective = {
    "Chief_Complaint": ', '.join(symptoms[:2]) if symptoms else "Pain",
    "History_of_Present_Illness": ' '.join([line for line in patient_lines if 'accident' in line or 'pain' in line])
}

In [None]:
# Objective: Exam findings (from physician lines)
physician_lines = [line.strip() for line in text.split('\n') if line.strip().startswith('Physician:')]
objective = {
    "Physical_Exam": ' '.join([line for line in physician_lines if 'examination' in line or 'range' in line or 'tenderness' in line]).replace('Physician: ', ''),
    "Observations": "Patient appears in normal health."
}

In [None]:
# Assessment: Diagnosis and status
assessment = {
    "Diagnosis": diagnosis,
    "Severity": "Mild, improving" if "improving" in text.lower() else "Unknown"
}

In [None]:
# Plan: Treatment and follow-up
plan = {
    "Treatment": ', '.join(treatments),
    "Follow-Up": "Return if symptoms worsen." if "follow-up" in text.lower() else "None specified"
}

In [None]:
soap = {
    "Subjective": subjective,
    "Objective": objective,
    "Assessment": assessment,
    "Plan": plan
}
print(json.dumps(soap, indent=2))

{
  "Subjective": {
    "Chief_Complaint": "discomfort, pain",
    "History_of_Present_Illness": "Patient: At first, I was just shocked. But then I realized I had hit my head on the steering wheel, and I could feel pain in my neck and back almost right away. Patient: The first four weeks were rough. My neck and back pain were really bad\u2014I had trouble sleeping and had to take painkillers regularly. It started improving after that, but I had to go through ten sessions of physiotherapy to help with the stiffness and discomfort. Patient: No, nothing like that. I don\u2019t feel nervous driving, and I haven\u2019t had any emotional issues from the accident."
  },
  "Objective": {
    "Physical_Exam": "That\u2019s encouraging. Let\u2019s go ahead and do a physical examination to check your mobility and any lingering pain. Everything looks good. Your neck and back have a full range of movement, and there\u2019s no tenderness or signs of lasting damage. Your muscles and spine seem to be i