In [1]:
!python -m pip install --upgrade pip




In [2]:
!pip install spacy transformers scikit-learn keybert --timeout 120 --no-cache-dir




In [3]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --timeout 120


Looking in indexes: https://download.pytorch.org/whl/cpu


In [4]:
!python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     -- ------------------------------------- 0.8/12.8 MB 5.6 MB/s eta 0:00:03
     ---- ----------------------------------- 1.3/12.8 MB 4.8 MB/s eta 0:00:03
     ---- ----------------------------------- 1.6/12.8 MB 2.5 MB/s eta 0:00:05
     ---- ----------------------------------- 1.6/12.8 MB 2.5 MB/s eta 0:00:05
     ---- ----------------------------------- 1.6/12.8 MB 2.5 MB/s eta 0:00:05
     ----- ---------------------------------- 1.8/12.8 MB 1.4 MB/s eta 0:00:08
     ------ --------------------------------- 2.1/12.8 MB 1.5 MB/s eta 0:00:08
     ------ --------------------------------- 2.1/12.8 MB 1.5 MB/s eta 0:00:08
     ------ --------------------------------- 2.

In [5]:
!pip install -U huggingface_hub
!pip install -U keybert


Collecting huggingface_hub
  Using cached huggingface_hub-1.2.3-py3-none-any.whl.metadata (13 kB)
Using cached huggingface_hub-1.2.3-py3-none-any.whl (520 kB)
Installing collected packages: huggingface_hub
  Attempting uninstall: huggingface_hub
    Found existing installation: huggingface-hub 0.36.0
    Uninstalling huggingface-hub-0.36.0:
      Successfully uninstalled huggingface-hub-0.36.0
Successfully installed huggingface_hub-1.2.3


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
datasets 2.12.0 requires huggingface-hub<1.0.0,>=0.11.0, but you have huggingface-hub 1.2.3 which is incompatible.
transformers 4.29.0 requires huggingface-hub<1.0,>=0.11.0, but you have huggingface-hub 1.2.3 which is incompatible.


Collecting huggingface-hub>=0.4.0 (from sentence-transformers>=0.3.8->keybert)
  Using cached huggingface_hub-0.36.0-py3-none-any.whl.metadata (14 kB)
Using cached huggingface_hub-0.36.0-py3-none-any.whl (566 kB)
Installing collected packages: huggingface-hub
  Attempting uninstall: huggingface-hub
    Found existing installation: huggingface_hub 1.2.3
    Uninstalling huggingface_hub-1.2.3:
      Successfully uninstalled huggingface_hub-1.2.3
Successfully installed huggingface-hub-0.36.0


In [6]:
import spacy
from collections import Counter

nlp = spacy.load("en_core_web_sm")

def extract_keywords_spacy(text, top_n=5):
    doc = nlp(text.lower())
    keywords = [
        token.lemma_
        for token in doc
        if token.pos_ in ["NOUN", "PROPN"]
        and not token.is_stop
        and token.is_alpha
    ]
    return Counter(keywords).most_common(top_n)


In [7]:
extract_keywords_spacy(
    "Medical NLP pipelines extract symptoms and diagnoses from clinical notes"
)


[('nlp', 1), ('pipeline', 1), ('symptom', 1), ('diagnosis', 1), ('note', 1)]

In [8]:
import spacy
from spacy.matcher import PhraseMatcher

nlp = spacy.load("en_core_web_sm")

MEDICAL_TERMS = {
    "SYMPTOM": [
        "neck pain", "back pain", "head impact",
        "stiffness", "discomfort"
    ],
    "DIAGNOSIS": [
        "whiplash injury"
    ],
    "TREATMENT": [
        "physiotherapy", "painkillers"
    ],
    "PROGNOSIS": [
        "full recovery", "no long-term damage"
    ]
}

def extract_medical_entities(text):
    matcher = PhraseMatcher(nlp.vocab, attr="LOWER")

    for label, terms in MEDICAL_TERMS.items():
        patterns = [nlp(term) for term in terms]
        matcher.add(label, patterns)

    doc = nlp(text)
    entities = {k: [] for k in MEDICAL_TERMS}

    for match_id, start, end in matcher(doc):
        label = nlp.vocab.strings[match_id]
        entities[label].append(doc[start:end].text)

    return entities


In [9]:
def generate_medical_summary(entities):
    summary = {
        "Patient_Name": "Janet Jones",  # From metadata / EHR
        "Symptoms": list(set(entities.get("SYMPTOM", []))),
        "Diagnosis": entities.get("DIAGNOSIS", ["Not mentioned"])[0],
        "Treatment": list(set(entities.get("TREATMENT", []))),
        "Current_Status": "Occasional back pain",
        "Prognosis": entities.get("PROGNOSIS", ["Not specified"])[0]
    }

    return summary


In [10]:
from transformers import pipeline

sentiment_pipeline = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english",
    framework="pt"   # üî• THIS FIXES EVERYTHING
)

sentiment_pipeline("I love football analytics and AI!")


Xformers is not installed correctly. If you want to use memorry_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


[{'label': 'POSITIVE', 'score': 0.9988646507263184}]

In [11]:
def detect_intent(text):
    text = text.lower()

    if any(word in text for word in ["worried", "concerned", "afraid"]):
        return "Seeking reassurance"

    if any(word in text for word in ["pain", "hurt", "ache"]):
        return "Reporting symptoms"

    if any(word in text for word in ["better", "recover", "okay"]):
        return "Confirming recovery"

    return "General inquiry"


In [12]:
%%writefile medical_ner.py
import spacy

nlp = spacy.load("en_core_web_sm")

def extract_medical_entities(text):
    """
    Extract medical-related entities from text.
    Returns a dictionary grouped by entity label.
    """
    doc = nlp(text)

    entities = {}
    for ent in doc.ents:
        entities.setdefault(ent.label_, []).append(ent.text)

    return entities


Overwriting medical_ner.py


In [13]:
import medical_ner
print(dir(medical_ner))


['__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', 'extract_medical_entities', 'nlp', 'spacy']


In [14]:
%%writefile sentiment_intent.py
from transformers import pipeline

# Force PyTorch backend
sentiment_pipeline = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english",
    framework="pt"
)

def classify_sentiment(text):
    """
    Classifies sentiment of the input text.
    Returns label and confidence score.
    """
    result = sentiment_pipeline(text)[0]
    return {
        "label": result["label"],
        "score": round(result["score"], 3)
    }


Overwriting sentiment_intent.py


In [15]:
%%writefile intent_detection.py

def detect_intent(text):
    """
    Detects high-level intent from patient dialogue.
    Rule-based (simple & reliable for medical use).
    """

    text_lower = text.lower()

    if any(word in text_lower for word in ["pain", "hurt", "ache", "injury"]):
        return "Report_Symptom"

    if any(word in text_lower for word in ["worried", "anxious", "scared", "concerned"]):
        return "Express_Concern"

    if any(word in text_lower for word in ["treatment", "therapy", "physiotherapy", "medication"]):
        return "Discuss_Treatment"

    if any(word in text_lower for word in ["recover", "better", "improve", "heal"]):
        return "Prognosis_Inquiry"

    return "General_Information"


Overwriting intent_detection.py


In [16]:
from medical_ner import extract_medical_entities
from medical_summary import generate_medical_summary
from sentiment_intent import classify_sentiment
from intent_detection import detect_intent

TRANSCRIPT = """
I had a car accident. My neck and back hurt a lot for four weeks.
I had ten physiotherapy sessions, and now I only have occasional back pain.
"""

PATIENT_DIALOGUE = "I'm a bit worried about my back pain, but I hope it gets better soon."

def run_pipeline():
    entities = extract_medical_entities(TRANSCRIPT)
    summary = generate_medical_summary(entities)

    sentiment = classify_sentiment(PATIENT_DIALOGUE)
    intent = detect_intent(PATIENT_DIALOGUE)

    output = {
        **summary,
        "Sentiment": sentiment,
        "Intent": intent
    }

    return output


if __name__ == "__main__":
    import json
    print(json.dumps(run_pipeline(), indent=2))





{
  "Patient_Name": "Janet Jones",
  "Symptoms": [],
  "Diagnosis": "Not mentioned",
  "Treatment": [],
  "Current_Status": "Occasional back pain",
  "Prognosis": "Not specified",
  "Sentiment": {
    "label": "POSITIVE",
    "score": 0.973
  },
  "Intent": "Report_Symptom"
}


In [17]:
%%writefile soap_note_generator.py

def generate_soap_note(entities, summary, sentiment, intent):
    """
    Generate a SOAP note from extracted NLP outputs.
    """

    soap_note = {
        "Subjective": {
            "Patient_Reported_Symptoms": summary.get("Symptoms", []),
            "Patient_Concerns": sentiment["label"],
            "Intent": intent
        },
        "Objective": {
            "Clinical_Findings": entities
        },
        "Assessment": {
            "Diagnosis": summary.get("Diagnosis", "Not mentioned"),
            "Current_Status": summary.get("Current_Status", "Not specified"),
            "Prognosis": summary.get("Prognosis", "Not specified")
        },
        "Plan": {
            "Treatment": summary.get("Treatment", []),
            "Follow_Up": "As advised by physician"
        }
    }

    return soap_note


Overwriting soap_note_generator.py


In [18]:
from soap_note_generator import generate_soap_note


In [21]:
from medical_ner import extract_medical_entities
from medical_summary import generate_medical_summary
from sentiment_intent import classify_sentiment
from intent_detection import detect_intent
from soap_note_generator import generate_soap_note

TRANSCRIPT = """
I had a car accident. My neck and back hurt a lot for four weeks.
I had ten physiotherapy sessions, and now I only have occasional back pain.
"""

PATIENT_DIALOGUE = "I'm a bit worried about my back pain, but I hope it gets better soon."

# 1Ô∏è‚É£ Extract entities
entities = extract_medical_entities(TRANSCRIPT)

# 2Ô∏è‚É£ Generate medical summary
summary = generate_medical_summary(entities)

# 3Ô∏è‚É£ Sentiment analysis
sentiment = classify_sentiment(PATIENT_DIALOGUE)

# 4Ô∏è‚É£ Intent detection
intent = detect_intent(PATIENT_DIALOGUE)

# 5Ô∏è‚É£ Generate SOAP note (CORRECT call)
soap_note = generate_soap_note(entities, summary, sentiment, intent)

import json
print(json.dumps(soap_note, indent=2))


{
  "Subjective": {
    "Patient_Reported_Symptoms": [],
    "Patient_Concerns": "POSITIVE",
    "Intent": "Report_Symptom"
  },
  "Objective": {
    "Clinical_Findings": {
      "DATE": [
        "four weeks"
      ],
      "CARDINAL": [
        "ten"
      ]
    }
  },
  "Assessment": {
    "Diagnosis": "Not mentioned",
    "Current_Status": "Occasional back pain",
    "Prognosis": "Not specified"
  },
  "Plan": {
    "Treatment": [],
    "Follow_Up": "As advised by physician"
  }
}
