In [2]:
# Example case notes
case_notes = [
    "45 year old woman diagnosed with CAD and prescribed aspirin.",
    "32 year old man reports chest pain and was treated with metoprolol.",
    "60 year old male patient with history of hypertension and diabetes, started on amlodipine.",
    "Patient is a 28 year old female diagnosed with COVID-19 and given acetaminophen."
]


In [3]:
from transformers import pipeline

# Load the pipeline for medical entity recognition
pipe = pipeline("token-classification", model="Clinical-AI-Apollo/Medical-NER", aggregation_strategy='simple')

# Function to extract entities from case notes
def extract_medical_entities(case_notes):
    structured_data = []
    
    for note in case_notes:
        result = pipe(note)
        extracted_entities = {}
        
        # Collecting entities by their type
        for entity in result:
            entity_type = entity['entity_group']
            extracted_entities[entity_type] = extracted_entities.get(entity_type, []) + [entity['word']]
        
        structured_data.append(extracted_entities)
    
    return structured_data

# Extracted data
extracted_data = extract_medical_entities(case_notes)

# Display extracted data
for i, data in enumerate(extracted_data):
    print(f"Case Note {i + 1}: {data}")


Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Case Note 1: {'AGE': ['45 year old'], 'SEX': ['woman'], 'DISEASE_DISORDER': ['CAD'], 'MEDICATION': ['aspirin']}
Case Note 2: {'AGE': ['32 year old'], 'SEX': ['man'], 'BIOLOGICAL_STRUCTURE': ['chest'], 'SIGN_SYMPTOM': ['pain'], 'MEDICATION': ['metoprolol']}
Case Note 3: {'AGE': ['60 year old'], 'SEX': ['male'], 'DISEASE_DISORDER': ['hypertension', 'diabetes'], 'MEDICATION': ['amlodipine']}
Case Note 4: {'AGE': ['28 year old'], 'SEX': ['female'], 'DISEASE_DISORDER': ['COVID-19'], 'MEDICATION': ['acetaminophen']}


In [4]:
# Function to create a structured summary
def summarize_extracted_entities(extracted_data):
    for i, data in enumerate(extracted_data):
        print(f"\nSummary of Case Note {i + 1}:")
        if 'PATIENT_AGE' in data:
            print(f" - Patient Age: {', '.join(data['PATIENT_AGE'])}")
        if 'DISEASE' in data:
            print(f" - Diagnosed Disease(s): {', '.join(data['DISEASE'])}")
        if 'SYMPTOM' in data:
            print(f" - Symptoms: {', '.join(data['SYMPTOM'])}")
        if 'MEDICATION' in data:
            print(f" - Prescribed Medication(s): {', '.join(data['MEDICATION'])}")


In [5]:
# Case note input (you can change or add more case notes here)
case_note = "45 year old woman diagnosed with CAD and prescribed aspirin."

# Extract entities from the input
extracted_data = extract_medical_entities([case_note])

In [6]:
extracted_data

[{'AGE': ['45 year old'],
  'SEX': ['woman'],
  'DISEASE_DISORDER': ['CAD'],
  'MEDICATION': ['aspirin']}]

In [1]:
## Advanced Medical Entity Extraction and Reporting SystemAdvanced Medical Entity Extraction and Reporting System

In [2]:
import spacy

# Load SpaCy model for text processing
nlp = spacy.load("en_core_web_sm")

# Function to clean and preprocess text
def preprocess_text(text):
    doc = nlp(text)
    cleaned_text = " ".join([token.lemma_ for token in doc if not token.is_stop and not token.is_punct])
    
    # Replace medical abbreviations
    abbreviations = {"CAD": "Coronary Artery Disease", "HTN": "Hypertension"}
    for abbr, expanded in abbreviations.items():
        cleaned_text = cleaned_text.replace(abbr, expanded)
    
    return cleaned_text


In [4]:
from transformers import pipeline

# Load the medical NER model pipeline
ner_pipe = pipeline("token-classification", model="Clinical-AI-Apollo/Medical-NER", aggregation_strategy='simple')

# Extract medical entities from the case notes
def extract_entities(case_note):
    entities = ner_pipe(case_note)
    
    # Start with an empty dictionary
    entity_dict = {}
    for entity in entities:
        entity_type = entity['entity_group']
        
        # Add the entity type if it's not already in the dictionary
        if entity_type not in entity_dict:
            entity_dict[entity_type] = []
        
        entity_dict[entity_type].append(entity['word'])
    
    return entity_dict

# Test with sample data
case_note = "45 year old woman diagnosed with CAD and prescribed aspirin."
extracted = extract_entities(case_note)
print(extracted)


Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


{'AGE': ['45 year old'], 'SEX': ['woman'], 'DISEASE_DISORDER': ['CAD'], 'MEDICATION': ['aspirin']}


In [5]:
## With Data Anlytics, Visulisation and Batch processing

In [6]:
import spacy
from transformers import pipeline
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load SpaCy model for text preprocessing
nlp = spacy.load("en_core_web_sm")

# Load the medical NER model pipeline
ner_pipe = pipeline("token-classification", model="Clinical-AI-Apollo/Medical-NER", aggregation_strategy='simple')

# Preprocess text
def preprocess_text(text):
    doc = nlp(text)
    cleaned_text = " ".join([token.lemma_ for token in doc if not token.is_stop and not token.is_punct])
    abbreviations = {"CAD": "Coronary Artery Disease", "HTN": "Hypertension"}
    for abbr, expanded in abbreviations.items():
        cleaned_text = cleaned_text.replace(abbr, expanded)
    return cleaned_text

# Batch process multiple case notes
def batch_process_case_notes(case_notes):
    data = []
    for note in case_notes:
        preprocessed_note = preprocess_text(note)
        entities = ner_pipe(preprocessed_note)
        data.append(entities)
    return data

# Extract and summarize entities
def extract_entities(case_notes):
    results = []
    for note in case_notes:
        entities = ner_pipe(note)
        entity_dict = {'AGE': [], 'DISEASE': [], 'MEDICATION': [], 'SYMPTOM': [], 'SEX': []}
        for entity in entities:
            entity_type = entity['entity_group']
            if entity_type in entity_dict:
                entity_dict[entity_type].append(entity['word'])
        results.append(entity_dict)
    return results

# Example case notes (you can add more here)
case_notes = [
    "45 year old woman diagnosed with CAD and prescribed aspirin.",
    "60 year old male with HTN, taking metoprolol and lisinopril.",
    "35 year old male presented with fever and cough, diagnosed with pneumonia."
]

# Batch processing
extracted_data = extract_entities(case_notes)

# Convert extracted entities to a DataFrame for analysis
df = pd.DataFrame(extracted_data)

# Display the data for analytics
df


ModuleNotFoundError: No module named 'seaborn'

In [None]:
!conda install seaborn