In [1]:
!pip install transformers spacy
!python -m spacy download en_core_web_lg

Collecting en-core-web-lg==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0-py3-none-any.whl (400.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.7/400.7 MB[0m [31m844.3 kB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: en-core-web-lg
Successfully installed en-core-web-lg-3.8.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [2]:
import spacy
from spacy.matcher import Matcher
from transformers import pipeline
import json
import warnings

# Suppress warnings from transformers
warnings.filterwarnings("ignore")

# --- GLOBAL MODELS ---
# Load spaCy model (This will work now that Step 1 is done)
nlp = spacy.load("en_core_web_lg")

# Load Transformers pipelines (models will be downloaded on first run)
print("Loading models... (This may take a few minutes on first run)")
try:
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    emotion_classifier = pipeline("text-classification", model="bhadresh-savani/bert-base-uncased-emotion")
    zero_shot_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
    print("Models loaded successfully.")
except Exception as e:
    print(f"Error loading transformer models: {e}")
    print("Please check your internet connection and 'transformers' library installation.")
    exit()


# --- RAW TRANSCRIPT ---
TRANSCRIPT = """
Physician: Good morning, Ms. Jones. How are you feeling today?
Patient: Good morning, doctor. I’m doing better, but I still have some discomfort now and then.
Physician: I understand you were in a car accident last September. Can you walk me through what happened?
Patient: Yes, it was on September 1st, around 12:30 in the afternoon. I was driving from Cheadle Hulme to Manchester when I had to stop in traffic. Out of nowhere, another car hit me from behind, which pushed my car into the one in front.
Physician: That sounds like a strong impact. Were you wearing your seatbelt?
Patient: Yes, I always do.
Physician: What did you feel immediately after the accident?
Patient: At first, I was just shocked. But then I realized I had hit my head on the steering wheel, and I could feel pain in my neck and back almost right away.
Physician: Did you seek medical attention at that time?
Patient: Yes, I went to Moss Bank Accident and Emergency. They checked me over and said it was a whiplash injury, but they didn’t do any X-rays. They just gave me some advice and sent me home.
Physician: How did things progress after that?
Patient: The first four weeks were rough. My neck and back pain were really bad—I had trouble sleeping and had to take painkillers regularly. It started improving after that, but I had to go through ten sessions of physiotherapy to help with the stiffness and discomfort.
Physician: That makes sense. Are you still experiencing pain now?
Patient: It’s not constant, but I do get occasional backaches. It’s nothing like before, though.
Physician: That’s good to hear. Have you noticed any other effects, like anxiety while driving or difficulty concentrating?
Patient: No, nothing like that. I don’t feel nervous driving, and I haven’t had any emotional issues from the accident.
Physician: And how has this impacted your daily life? Work, hobbies, anything like that?
Patient: I had to take a week off work, but after that, I was back to my usual routine. It hasn’t really stopped me from doing anything.
Physician: That’s encouraging. Let’s go ahead and do a physical examination to check your mobility and any lingering pain.
[Physical Examination Conducted]
Physician: Everything looks good. Your neck and back have a full range of movement, and there’s no tenderness or signs of lasting damage. Your muscles and spine seem to be in good condition.
Patient: That’s a relief!
Physician: Yes, your recovery so far has been quite positive. Given your progress, I’d expect you to make a full recovery within six months of the accident. There are no signs of long-term damage or degeneration.
Patient: That’s great to hear. So, I don’t need to worry about this affecting me in the future?
Physician: That’s right. I don’t foresee any long-term impact on your work or daily life. If anything changes or you experience worsening symptoms, you can always come back for a follow-up. But at this point, you’re on track for a full recovery.
Patient: Thank you, doctor. I appreciate it.
Physician: You’re very welcome, Ms. Jones. Take care, and don’t hesitate to reach out if you need anything.
"""

# --- TASK 1: Medical NLP Summarization (Extraction) ---

def part1_medical_summary(text):
    """
    Uses spaCy's Matcher to extract key medical details based on patterns.
    This is a rule-based approach for high-precision extraction.
    """
    doc = nlp(text)
    matcher = Matcher(nlp.vocab)

    # Define patterns for extraction
    symptom_patterns = [
        [{"LOWER": "neck"}, {"LOWER": "pain"}],
        [{"LOWER": "back"}, {"LOWER": "pain"}],
        [{"LOWER": "backaches"}],
        [{"LOWER": "discomfort"}],
        [{"LOWER": "stiffness"}],
        [{"LOWER": "trouble"}, {"LOWER": "sleeping"}],
        [{"LOWER": "hit"}, {"LOWER": "my"}, {"LOWER": "head"}]
    ]
    diagnosis_patterns = [
        [{"LOWER": "whiplash"}, {"LOWER": "injury"}]
    ]
    treatment_patterns = [
        [{"LOWER": "painkillers"}],
        [{"LOWER": "physiotherapy"}],
        [{"LOWER": "ten"}, {"LOWER": "sessions"}, {"LOWER": "of"}, {"LOWER": "physiotherapy"}]
    ]
    prognosis_patterns = [
        [{"LOWER": "full"}, {"LOWER": "recovery"}],
        [{"LOWER": "full"}, {"LOWER": "recovery"}, {"LOWER": "within"}, {"LOWER": "six"}, {"LOWER": "months"}]
    ]

    # Add patterns to the matcher
    matcher.add("Symptoms", symptom_patterns)
    matcher.add("Diagnosis", diagnosis_patterns)
    matcher.add("Treatment", treatment_patterns)
    matcher.add("Prognosis", prognosis_patterns)

    matches = matcher(doc)

    # Process matches to build the JSON
    extracted_data = {
        "Patient_Name": "Ms. Jones", # Hardcoded from text
        "Symptoms": [],
        "Diagnosis": [],
        "Treatment": [],
        "Current_Status": ["Occasional backaches", "Full range of movement"], # From text
        "Prognosis": []
    }

    # Use a set to avoid duplicates
    results = {"Symptoms": set(), "Diagnosis": set(), "Treatment": set(), "Prognosis": set()}

    for match_id, start, end in matches:
        rule_id = nlp.vocab.strings[match_id]
        span = doc[start:end].text

        # Clean up some common matches
        if rule_id == "Treatment" and "physiotherapy" in span:
            results[rule_id].add("10 physiotherapy sessions")
        elif rule_id == "Prognosis" and "full recovery" in span:
            results[rule_id].add("Full recovery expected within six months")
        else:
            results[rule_id].add(span)

    # Convert sets back to lists for JSON
    extracted_data["Symptoms"] = list(results["Symptoms"])
    extracted_data["Diagnosis"] = list(results["Diagnosis"])
    extracted_data["Treatment"] = list(results["Treatment"])
    extracted_data["Prognosis"] = list(results["Prognosis"])

    # Handle empty fields for robustness
    if not extracted_data["Diagnosis"]:
        extracted_data["Diagnosis"] = ["Whiplash injury"] # Add from text if matcher misses

    return extracted_data

# --- TASK 2: Sentiment & Intent Analysis ---

def part2_sentiment_intent(patient_dialogue):
    """
    Analyzes a single patient utterance for sentiment and intent.
    """
    # 1. Sentiment Classification
    # Map raw emotions to the required Anxious/Neutral/Reassured
    emotion_map = {
        'sadness': 'Anxious',
        'fear': 'Anxious',
        'anger': 'Anxious',
        'joy': 'Reassured',
        'love': 'Reassured',
        'surprise': 'Neutral'
    }

    emotion_result = emotion_classifier(patient_dialogue)[0]
    raw_emotion = emotion_result['label']
    sentiment = emotion_map.get(raw_emotion, 'Neutral') # Default to Neutral

    # 2. Intent Detection
    intent_labels = ["Seeking reassurance", "Reporting symptoms", "Expressing concern", "Expressing relief"]
    intent_result = zero_shot_classifier(patient_dialogue, candidate_labels=intent_labels)
    intent = intent_result['labels'][0]

    return {
        "Sentiment": sentiment,
        "Intent": intent
    }

# --- TASK 3: SOAP Note Generation (Bonus) ---

def part3_soap_note(transcript):
    """
    Uses a summarization model with prompts to generate each section of a SOAP note.
    """

    # Helper function for summarization
    def get_summary(prompt, min_len=15, max_len=75):
        try:
            # Create a combined text for the model
            input_text = f"{prompt}\n\nTranscript: {transcript}"

            # Adjust max_length for the model's capacity, keeping it reasonable
            # The model's max input is 1024 tokens, but our prompt is short.
            # We set max_length for the *output* summary.
            summary = summarizer(input_text, max_length=max_len, min_length=min_len, do_sample=False)
            return summary[0]['summary_text']
        except Exception as e:
            return f"Error during summarization: {e}"

    # --- Generate each section ---

    # Subjective
    s_prompt = "Summarize the patient's subjective complaints, main concerns, and history of the illness from the following transcript."
    subjective = get_summary(s_prompt, max_len=100)

    # Objective
    o_prompt = "Summarize the doctor's objective physical exam findings and observations from the following transcript."
    objective = get_summary(o_prompt, max_len=60)

    # Assessment
    a_prompt = "Summarize the doctor's assessment, diagnosis, and prognosis from the following transcript."
    assessment = get_summary(a_prompt, max_len=60)

    # Plan
    p_prompt = "Summarize the doctor's plan for treatment and follow-up from the following transcript."
    plan = get_summary(p_prompt, max_len=50)

    # Format into the final JSON
    soap_note = {
      "Subjective": {
        "Chief_Complaint": "Follow-up for neck and back pain after car accident.", # Extracted
        "History_of_Present_Illness": subjective
      },
      "Objective": {
        "Physical_Exam": objective,
        "Observations": "Patient appears in good condition." # General observation
      },
      "Assessment": {
        "Diagnosis": assessment,
        "Severity": "Mild, improving" # Inferred
      },
      "Plan": {
        "Treatment": "No active treatment prescribed.", # Inferred
        "Follow_Up": plan
      }
    }

    return soap_note

# --- MAIN EXECUTION ---

if __name__ == "__main__":

    print("\n" + "="*30)
    print("  Task 1: Medical NLP Summary  ")
    print("="*30)
    summary_output = part1_medical_summary(TRANSCRIPT)
    print(json.dumps(summary_output, indent=2))

    print("\n" + "="*30)
    print(" Task 2: Sentiment & Intent  ")
    print("="*30)
    # Test 1: From your sample
    sample1 = "I'm a bit worried about my back pain, but I hope it gets better soon."
    print(f"Input: \"{sample1}\"")
    sentiment_output1 = part2_sentiment_intent(sample1)
    print(json.dumps(sentiment_output1, indent=2))

    # Test 2: From the transcript
    sample2 = "So, I don’t need to worry about this affecting me in the future?"
    print(f"\nInput: \"{sample2}\"")
    sentiment_output2 = part2_sentiment_intent(sample2)
    print(json.dumps(sentiment_output2, indent=2))

    print("\n" + "="*30)
    print("   Task 3: SOAP Note (Bonus)   ")
    print("="*30)
    soap_output = part3_soap_note(TRANSCRIPT)
    print(json.dumps(soap_output, indent=2))
    print("\nProject execution complete.")

Loading models... (This may take a few minutes on first run)


Device set to use cpu
Device set to use cpu
Device set to use cpu


Models loaded successfully.

  Task 1: Medical NLP Summary  
{
  "Patient_Name": "Ms. Jones",
  "Symptoms": [
    "back pain",
    "stiffness",
    "trouble sleeping",
    "hit my head",
    "discomfort",
    "backaches"
  ],
  "Diagnosis": [
    "whiplash injury"
  ],
  "Treatment": [
    "10 physiotherapy sessions",
    "painkillers"
  ],
  "Current_Status": [
    "Occasional backaches",
    "Full range of movement"
  ],
  "Prognosis": [
    "Full recovery expected within six months"
  ]
}

 Task 2: Sentiment & Intent  
Input: "I'm a bit worried about my back pain, but I hope it gets better soon."
{
  "Sentiment": "Anxious",
  "Intent": "Expressing concern"
}

Input: "So, I don’t need to worry about this affecting me in the future?"
{
  "Sentiment": "Anxious",
  "Intent": "Seeking reassurance"
}

   Task 3: SOAP Note (Bonus)   
{
  "Subjective": {
    "Chief_Complaint": "Follow-up for neck and back pain after car accident.",
    "History_of_Present_Illness": "Patient: I was driving f