In [1]:
!pip install keybert




In [2]:
import json
import nltk
import spacy
from transformers import pipeline
from keybert import KeyBERT



Initial Setup

In [3]:
nltk.download("punkt")

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Load transformer pipelines
summarizer = pipeline(
    "summarization",
    model="facebook/bart-large-cnn"
)

sentiment_model = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english"
)

# Keyword extractor
kw_model = KeyBERT()


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).
Device set to use cpu
Device set to use cpu


Medical Keyword Dictionary

In [4]:
MEDICAL_KEYWORDS = {
    "Symptoms": [
        "pain", "ache", "discomfort", "stiffness",
        "back pain", "neck pain", "head injury"
    ],
    "Diagnosis": [
        "whiplash", "strain", "injury"
    ],
    "Treatment": [
        "physiotherapy", "painkillers", "analgesics"
    ],
    "Prognosis": [
        "recovery", "improving", "full recovery"
    ]
}

1. Medical Entity Extraction

In [5]:
def extract_medical_entities(text: str) -> dict:
    entities = {
        "Symptoms": set(),
        "Diagnosis": set(),
        "Treatment": set(),
        "Prognosis": set()
    }

    text_lower = text.lower()

    for category, keywords in MEDICAL_KEYWORDS.items():
        for keyword in keywords:
            if keyword in text_lower:
                entities[category].add(keyword)

    return {k: list(v) for k, v in entities.items()}


2. Medical Summarization

In [6]:
def generate_medical_summary(text: str) -> str:
    summary = summarizer(
        text,
        max_length=180,
        min_length=80,
        do_sample=False
    )
    return summary[0]["summary_text"]

3. Keyword Extraction

In [7]:
def extract_keywords(text: str) -> list:
    keywords = kw_model.extract_keywords(
        text,
        keyphrase_ngram_range=(1, 3),
        stop_words="english",
        top_n=10
    )
    return [kw[0] for kw in keywords]

4. Structured Medical Report

In [8]:
def generate_structured_report(text: str) -> dict:
    entities = extract_medical_entities(text)

    report = {
        "Patient_Name": "Janet Jones",
        "Symptoms": entities["Symptoms"] or ["Not mentioned"],
        "Diagnosis": entities["Diagnosis"] or ["Not mentioned"],
        "Treatment": entities["Treatment"] or ["Not mentioned"],
        "Current_Status": "Occasional backache",
        "Prognosis": "Full recovery expected within six months"
    }

    return report

5. Sentiment Analysis

In [9]:
def classify_sentiment(text: str) -> str:
    result = sentiment_model(text)[0]

    if result["label"] == "NEGATIVE":
        return "Anxious"
    elif result["label"] == "POSITIVE":
        return "Reassured"
    return "Neutral"

6. Intent Detection

In [10]:
def detect_intent(text: str) -> str:
    text_lower = text.lower()

    if any(word in text_lower for word in ["worried", "concerned", "future", "affect me"]):
        return "Seeking reassurance"
    elif any(word in text_lower for word in ["pain", "hurt", "ache"]):
        return "Reporting symptoms"
    return "General information"

7. SOAP Note Generation

In [11]:
def generate_soap_note(text: str) -> dict:
    return {
        "Subjective": {
            "Chief_Complaint": "Neck and back pain",
            "History_of_Present_Illness": (
                "Patient involved in a motor vehicle accident. "
                "Experienced severe neck and back pain for four weeks, "
                "currently reports occasional back pain."
            )
        },
        "Objective": {
            "Physical_Exam": (
                "Full range of motion in cervical and lumbar spine. "
                "No tenderness or neurological deficits."
            ),
            "Observations": "Patient appears well with normal gait and posture."
        },
        "Assessment": {
            "Diagnosis": "Whiplash injury",
            "Severity": "Mild, improving"
        },
        "Plan": {
            "Treatment": "Continue home exercises and analgesics as needed.",
            "Follow_Up": "Return if symptoms worsen or persist."
        }
    }

8. User Input Execution

In [12]:
if __name__ == "__main__":
    print("=== Medical NLP Pipeline ===\n")
    user_text = input("Enter the patient's medical transcript or notes:\n\n")

    print("\n--- Structured Medical Report ---")
    print(json.dumps(generate_structured_report(user_text), indent=2))

    print("\n--- Medical Summary ---")
    print(generate_medical_summary(user_text))

    print("\n--- Keywords ---")
    print(extract_keywords(user_text))

    print("\n--- Sentiment & Intent ---")
    print("Sentiment:", classify_sentiment(user_text))
    print("Intent:", detect_intent(user_text))

    print("\n--- SOAP Note ---")
    print(json.dumps(generate_soap_note(user_text), indent=2))

=== Medical NLP Pipeline ===



Your max_length is set to 180, but your input_length is only 69. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=34)



--- Structured Medical Report ---
{
  "Patient_Name": "Janet Jones",
  "Symptoms": [
    "back pain",
    "pain"
  ],
  "Diagnosis": [
    "injury",
    "whiplash"
  ],
  "Treatment": [
    "physiotherapy"
  ],
  "Current_Status": "Occasional backache",
  "Prognosis": "Full recovery expected within six months"
}

--- Medical Summary ---
I was involved in a car accident last September. I hit my head and had severe neck and back pain for four weeks. Doctors diagnosed whiplash injury and advised physiotherapy. I completed ten physiotherapy sessions. Now I only experience occasional back pain. Click here for more information about physiotherapy and how to pay for it. For more information on physiotherapy, visit www.physiotherapy.org.uk or call the Samaritans on 08457 90 90 90.

--- Keywords ---
['diagnosed whiplash injury', 'whiplash injury advised', 'severe neck pain', 'whiplash injury', 'doctors diagnosed whiplash', 'diagnosed whiplash', 'neck pain', 'neck pain weeks', 'injury advised p