In [None]:
# # Install required packages
# !pip install transformers spacy gliner
# !python -m spacy download en_core_web_sm

In [1]:
from gliner import GLiNER
from transformers import pipeline
from typing import Dict, List, Any
import spacy

class MedicalNLPPipeline:
    def __init__(self):
        # Initialize GLiNER model
        self.model = GLiNER.from_pretrained("urchade/gliner_base")
        
        # Initialize zero-shot classifier
        self.zero_shot = pipeline("zero-shot-classification",
                                model="facebook/bart-large-mnli")
        
        # Load spaCy for additional linguistic features
        self.nlp = spacy.load("en_core_web_sm")
        
        # Define medical entity types
        self.medical_entities = [
            "patient",
            "doctor",
            "medication",
            "dosage",
            "frequency",
            "condition",
            "symptom",
            "procedure",
            "test",
            "date",
            "time",
            "duration",
            "facility",
            "department",
            "vital_sign",
            "lab_result"
        ]
        
        # Define intent labels
        self.intent_labels = [
            "add_patient",
            "assign_medication",
            "schedule_followup",
            "update_record",
            "query_info",
            "check_vitals",
            "order_test",
            "review_results"
        ]

    def process_text(self, text: str) -> Dict[str, Any]:
        """
        Process medical text through both GLiNER and zero-shot classification
        """
        # Get intent using zero-shot classification
        intent_result = self._classify_intent(text)
        
        # Get entities using GLiNER
        entities = self.model.predict_entities(
            text,
            self.medical_entities
        )
        
        # Process with spaCy for additional linguistic features
        doc = self.nlp(text)
        
        # Extract temporal information
        temporal_info = self._extract_temporal_info(doc)
        
        # Structure the results
        structured_entities = self._structure_entities(entities)
        
        return {
            "intent": intent_result,
            "entities": structured_entities,
            "temporal_info": temporal_info,
            "raw_entities": entities
        }

    def _classify_intent(self, text: str) -> Dict[str, Any]:
        """
        Classify intent using zero-shot classification
        """
        # Prepare hypothesis template for each intent
        hypothesis_template = "This is a request to {}."
        
        # Get zero-shot classification results
        result = self.zero_shot(
            text,
            self.intent_labels,
            hypothesis_template=hypothesis_template,
            multi_label=True
        )
        
        # Structure the results
        return {
            "primary_intent": result["labels"][0],
            "confidence": result["scores"][0],
            "all_intents": [
                {"intent": label, "score": score}
                for label, score in zip(result["labels"], result["scores"])
            ]
        }

    def _structure_entities(self, entities: List[Dict]) -> Dict[str, List[Dict]]:
        """
        Structure extracted entities by category
        """
        structured = {
            "patient_info": [],
            "medical_info": [],
            "temporal_info": [],
            "location_info": [],
            "other": []
        }
        
        category_mapping = {
            "patient": "patient_info",
            "doctor": "patient_info",
            "medication": "medical_info",
            "dosage": "medical_info",
            "frequency": "medical_info",
            "condition": "medical_info",
            "symptom": "medical_info",
            "procedure": "medical_info",
            "test": "medical_info",
            "date": "temporal_info",
            "time": "temporal_info",
            "duration": "temporal_info",
            "facility": "location_info",
            "department": "location_info",
            "vital_sign": "medical_info",
            "lab_result": "medical_info"
        }
        
        for entity in entities:
            category = category_mapping.get(entity["label"], "other")
            structured[category].append({
                "text": entity["text"],
                "type": entity["label"],
                "span": entity.get("span", None)
            })
        
        return structured

    def _extract_temporal_info(self, doc) -> Dict[str, Any]:
        """
        Extract detailed temporal information
        """
        temporal_info = {
            "dates": [],
            "times": [],
            "durations": [],
            "frequencies": [],
            "patterns": []
        }
        
        # Use GLiNER for temporal entities
        temporal_entities = self.model.predict_entities(
            doc.text,
            ["date", "time", "duration", "frequency"]
        )
        
        for entity in temporal_entities:
            category = entity["label"]
            if category in temporal_info:
                temporal_info[category].append(entity["text"])
        
        # Extract medication frequency patterns
        frequency_patterns = [
            "daily", "twice", "weekly", "monthly",
            "every", "times a day", "hours"
        ]
        
        text_lower = doc.text.lower()
        for pattern in frequency_patterns:
            if pattern in text_lower:
                temporal_info["patterns"].append(pattern)
        
        return temporal_info

    def process_conversation(self, conversation: List[str]) -> List[Dict[str, Any]]:
        """
        Process a conversation history
        """
        results = []
        context = {}
        
        for utterance in conversation:
            # Process current utterance
            current_result = self.process_text(utterance)
            
            # Update context
            self._update_context(context, current_result)
            
            # Add context to current result
            current_result["context"] = context.copy()
            
            results.append(current_result)
        
        return results

    def _update_context(self, context: Dict[str, Any], current_result: Dict[str, Any]):
        """
        Update conversation context with new information
        """
        entities = current_result["entities"]
        
        # Update patient context
        if entities["patient_info"]:
            context["current_patient"] = entities["patient_info"][0]
        
        # Update medical context
        if entities["medical_info"]:
            context["current_medical_info"] = entities["medical_info"]
        
        # Update temporal context
        if current_result["temporal_info"]["dates"]:
            context["last_mentioned_date"] = current_result["temporal_info"]["dates"][0]

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Initialize the pipeline
pipeline = MedicalNLPPipeline()

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Fetching 4 files: 100%|██████████| 4/4 [03:59<00:00, 59.98s/it]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [None]:
# Test single text processing
test_text = "Dr. Smith prescribed 500mg of amoxicillin three times daily for patient John Doe's bacterial infection. Follow-up appointment scheduled for next Tuesday at 2 PM."

result = pipeline.process_text(test_text)
print("\nProcessed Text Result:")
print("Intent:", result["intent"])
print("\nEntities:", result["entities"])
print("\nTemporal Info:", result["temporal_info"])

In [None]:
# Test conversation processing
conversation = [
    "Patient John Doe is complaining of severe headache for the past 3 days.",
    "Vital signs show BP 120/80, temperature 38.5°C.",
    "Prescribe ibuprofen 400mg every 6 hours for pain relief."
]

conversation_results = pipeline.process_conversation(conversation)

print("\nConversation Processing Results:")
for i, result in enumerate(conversation_results):
    print(f"\nUtterance {i+1}:")
    print("Intent:", result["intent"])
    print("Entities:", result["entities"]) 
    print("Context:", result["context"])