# Dev: Extract

In [1]:
import sys
import os
import re
from pathlib import Path
import pandas as pd
from typing import List, Dict, Tuple, Optional

from IPython.display import display_markdown, Image

sys.path.insert(1, str(Path.cwd().parent)) 

from report.ex_snip import snip_hx_plain
import radreportparser as rrp
from radreportparser import (RadReportExtractor, 
                             SectionConfig,
                             extract_section)

In [2]:
extractor = RadReportExtractor()

In [3]:
# Plain text
snip_hx = {
    ## Basic
    "basic": """
MRI OF THE BRAIN AND ORBITS 

History: MALT lymphoma at the right orbit S/P chemotherapy was sent to follow-up. 

Technique: 
Sagittal SE T1W 
3D FSE FLAIR FS +Gd with MPR 

Comparison: Limited comparison to the MRI brain on 4-6-2022 
""", 
    ## No newline
    "no_newline": """
MDCT OF THE NECK   Indication: A 58-year-old man, known case of nasopharyngeal cancer (T4N2M1) with lung metastasis   Technique: Post contrast enhanced axial scan of the neck using 1.0 mm slice thickness with 3.0 mm axial, coronal and sagittal reformation   Comparison: The prior CT of the neck taken on October 4, 2015   Findings:  	The current study reveals slight shrinkage but no significant change in extension of the preexisting ill-defined hypodense lesion with partial mild enhancement in some portions, epicenter at the left-sided nasopharynx. Extension of the lesion is described as follow;  ... Superior: No interval change of extension into the left foramen Ovale and left inferior orbital fissure. No significant change of few enhancing foci in the left inferior temporal lobe.  ... Anterior: Involvement of the left infratemporal fossa, left PPF, left retroantral space, and left masticator space. Erosion and sclerotic change of the left pterygoid bone and posterior wall of the left maxillary sinus, unchanged.
""",
    ## Markdown
    "markdown": """
**EMERGENCY MDCT OF THE BRAIN**

**HISTORY:** A 25-year-old female presents with dizziness and brief loss of consciousness following a minor head impact 3 hours ago. Physical examination reveals no focal neurological deficits. The CT scan was requested to rule out intracranial hemorrhage.

**TECHNIQUES:** Axial helical scan of the brain was performed using 2.5-mm (brain) and 1.25-mm (bone) slice thickness with coronal and sagittal reconstructions.  

**COMPARISON:** None.
"""
}

## `KeyWord` Enum

In [4]:
from enum import Enum

class KeyWord(Enum):
    HISTORY = ["history:", "\\*\\*history:\\*\\*", "indication"]

In [5]:
KeyWord.HISTORY.value

['history:', '\\*\\*history:\\*\\*', 'indication']

## Parse History

In [6]:
extractor.section_configs["history"] = SectionConfig(
    name="history", start_keys=["Hx"], 
    next_section_keys=["technique", "comparison"]
)

extractor.section_configs

{'title': SectionConfig(name='title', start_keys=None, next_section_keys=['\\W*history\\W*', '\\W*indication(s?)\\W*', '\\W*clinical\\s+history\\W*', '\\W*clinical\\s+indication(s?)\\W*', '\\W*technique(s?)\\W*', '\\W*comparison(s?)\\W*', '\\W*Finding(s?)\\W*', '\\W*Impression(s?)\\W*']),
 'history': SectionConfig(name='history', start_keys=['Hx'], next_section_keys=['technique', 'comparison']),
 'technique': SectionConfig(name='technique', start_keys=['\\W*technique(s?)\\W*'], next_section_keys=['\\W*comparison(s?)\\W*', '\\W*Finding(s?)\\W*', '\\W*Impression(s?)\\W*']),
 'comparison': SectionConfig(name='comparison', start_keys=['\\W*comparison(s?)\\W*'], next_section_keys=['\\W*technique(s?)\\W*', '\\W*Finding(s?)\\W*', '\\W*Impression(s?)\\W*']),
 'findings': SectionConfig(name='findings', start_keys=['\\W*Finding(s?)\\W*'], next_section_keys=['\\W*Impression(s?)\\W*', '\\W*Report Severity\\W*', '\\W*Finalized Datetime\\W*', '\\W*Preliminary Datetime\\W*']),
 'impression': SectionC

In [7]:
extractor.extract_history(snip_hx["basic"], include_key=True)

''

In [8]:

extractor.extract_history(snip_hx["no_newline"], include_key=True)


extractor.extract_history(snip_hx["markdown"], include_key=True)

''

In [9]:
extractor.extract_technique(snip_hx["markdown"], include_key=False)

'Axial helical scan of the brain was performed using 2.5-mm (brain) and 1.25-mm (bone) slice thickness with coronal and sagittal reconstructions'

In [14]:
extractor.extract_title(snip_hx["basic"], include_key=True)

'MRI OF THE BRAIN AND ORBITS'

## Config

In [11]:
from dataclasses import dataclass

@dataclass
class SectionConfig:
    """Configuration for a radiology report section.
    
    Parameters
    ----------
    name : str
        Name of the section (e.g., "history", "findings")
    start_keys : list[str]
        Keys that mark the start of this section
    next_section_keys : list[str] | None
        Keys that mark the start of the next sections
    """
    name: str
    start_keys: List[str] | None
    next_section_keys: Optional[List[str]] = None

In [12]:
SectionConfig(name="history", start_keys=["history"], next_section_keys=["technique", "comparison", "findings", "impression"])

SectionConfig(name='history', start_keys=['history'], next_section_keys=['technique', 'comparison', 'findings', 'impression'])

In [13]:
section_configs = {
            "history": SectionConfig(
                name="history",
                start_keys=["history"],
                next_section_keys=["technique", "comparison", "findings", "impression"]

            )}

section_configs.get("history").start_keys

['history']