In [10]:
import re
import pandas as pd

In [6]:
decision_making_patterns = {
    "PhysicianDecision": r"Physician making decision: ?(.+)"
}
rtpa_patterns = {
    "BodyWeight": r"Body Weight Estimated Actual\n  Kg ?(.+)",
    "IVrTPATotalDose": r"IV  rTPA  Total  Dose  ?(.+)",
    "IVrTPABolusDose": r"IV  rTPA  Bolus Dose  ?(.+)",
    "IVrTPAInfusionDose": r"IV  rTPA  Infusion Dose  ?(.+)",
    "BolusGivenTime": r"Bolus given ED (.+?)\s",
    "BolusGivenBP": r"Bolus given ED .+?\s(.+)",
    "InfusionStartedTime": r"Infusion started ED (.+?)\s",
    "InfusionStartedBP": r"Infusion started ED .+?\s(.+)",
    "CompletionLocation": r"Completion (.+?)\s",
    "CompletionTime": r"Completion .+?\s(.+)",
    "TransferredToWardBed": r"Transferred to \(Ward/Bed\) ?(.+)",
    "AdverseReaction": r"Adverse Reaction\? ?(.+)",
    "ForEndovascularTherapy": r"For Endovascular Therapy\? ?(.+)",
    "AmountOfIVTPAInfused": r"Amount of IV TPA infused: ?(.+)"
}
nihss_patterns = {
    "DateTime": r"Date / Time:  ?(.+)",
    "BloodPressure": r"Blood Pressure: ?(.+)",
    "LevelOfConsciousness": r"1a\. Level of Consciousness: ?(\d)",
    "LOCQuestions": r"1b\. LOC Questions: ?(\d)",
    "LOCCommands": r"1c\. LOC Commands: ?(\d)",
    "BestGaze": r"2\. Best Gaze: ?(\d)",
    "Visual": r"3\. Visual: ?(\d)",
    "FacialPalsy": r"4\. Facial Palsy: ?(\d)",
    "MotorArmLeft": r"5\. Motor Arm - Left: ?(\d)",
    "MotorArmRight": r"5\. Motor Arm - Right: ?(\d)",
    "MotorLegLeft": r"6\. Motor Leg - Left: ?(\d)",
    "MotorLegRight": r"6\. Motor Leg - Right: ?(\d)",
    "LimbAtaxia": r"7\. Limb ataxia: ?(\d)",
    "Sensory": r"8\. Sensory: ?(\d)",
    "BestLanguage": r"9\. Best language: ?(\d)",
    "Dysarthria": r"10 Dysarthria: ?(.+)",
    "ExtinctionAndInattention": r"11\. Extinction and Inattention: ?(\d)",
    "TotalScore": r"Total Score: ?(.+)"
}
nursing_input_patterns = {
        "ReviewingNeurologist": r"Reviewing Neurologist: ?(.*)",
        "NeurologistActivationDate": r"Neurologist Activation: Date:  ?(.+)",
        "NeurologistActivationTime": r"Time:  ?(.+hrs)",
        "NurseActivationTime": r"Nurse Activation Time:  ?(.+hrs)",
        "NurseReviewTime": r"Nurse Review Time:   ?(.+hrs)",
        "StrokeOnsetTime": r"Stroke Onset Time: (.+)",
        "PatientActivatedFrom": r"Patient Activated from: (.+)",
        "GCS_E": r"GCS: E: (\d)",
        "GCS_V": r"V: (\d)",
        "GCS_M": r"M: (\d)",
        "PowerOverRight_RUL": r"Power over Right: RUL: (\d)",
        "PowerOverRight_RLL": r"RLL: (\d)",
        "PowerOverLeft_LUL": r"Power over Left: LUL: (\d)",
        "PowerOverLeft_LLL": r"LLL: (\d)",
        "Pupils": r"Pupils: (.+)",
        "PremorbidMRS": r"Premorbid  MRS: (\d)",
        "ReasonForNoRTPA": r"Reason for NO rTPA: (.+)",
        "ReasonForNoEVT": r"Reason for NO EVT: (.+)",
        "TransferredTo": r"Transferred to: (.+)",
        "EDNurse": r"ED Nurse: ?(.*)"
    }

post_stroke_patterns = {
    "FollowUpDate": r"Date (\d{1,2}/\d{1,2}/\d{2})",
    "DurationPostStroke": r"Duration Post Stroke (\d+ months)",
    "NatureOfEmployment": r"Nature of Employment (.*)",
    "ReturnedToWork": r"Returned to work\? (Yes|No)",
    "CommunitySupportServices": r"Community Support Services (.*)",
    "Readmission": r"Readmission (.*)",
    "BaselineMRS": r"Baseline mRS (\d)",
    "DischargeMRS": r"Discharge mRS (\d)",
    "CurrentMRS": r"Current mRS (\d)",
    "CurrentClinicalFrailtyScale": r"Current Clinical Frailty Scale if applicable (\d)",
    "AssessedBy": r"Assessed by (.*)"
}

In [18]:
post_stroke_value = r"""
Post Stroke Follow Up\.br\Date 9/4/24\.br\Duration Post Stroke 3 months\.br\\.br\Nature of Employment Cleaner\.br\Returned to work? Yes\.br\\.br\Community Support Services Nil\.br\\.br\Readmission Nil\.br\\.br\Baseline mRS 0\.br\Discharge mRS 1\.br\\.br\Current mRS O\.br\Current Clinical Frailty Scale if applicable 4\.br\Assessed by Based on Med Onco & Dr Ng Shi Yang reivew with thanks\.br\\.br\\.br\\.br
"""


value = r'''
HASTEN Thrombolysis and HASTEN Endovascular\.br\\.br\Reviewing Neurologist: Dr.Benjamin Tan\.br\Neurologist Activation: Date: 02/08/23\br\Time: 19:43 hrs\.br\Nurse Activation Time: 19:44 hrs\br\Nurse Review Time: 19:51 hrs\.br\Stroke Onset Time: 1700 hrs\br\Patient Activated from: NUH\br\GCS: E: 2\br\V: 1\br\M: 3\br\Power over Right: RUL: 3\br\RLL: 2\br\Power over Left: LUL: 1\br\LLL: 1\br\Pupils: reactive\br\Premorbid MRS: 0\.br\ED Nurse: NA\.br\\\.br\\.br\NIHSS Assessment\.br\\.br\Date/Time: 02/08/23@1951hrs\br\rTPA Onset to Needle Duration:\br\Blood Pressure: () 177/101 \br\1a. Level of Consciousness: 1\.br\1b. LOC Questions: 2\.br\1c. LOC Commands: 2\.br\2. Best Gaze: 2\.br\3. Visual: 21.br\4. Facial Palsy: 2\.br\5. Motor Arm- Left: 3\br\5. Motor Arm - Right: 1\.br\6. Motor Leg - Left: 3\.br\6. Motor Leg - Right: 3\.br\7. Limb ataxia: 0\.br\8. Sensory: 0\.br\9. Best language: 3\.br\10 Dysarthria: 2\br\11. Extinction and Inattention: 0\br\Total Score: 26\.br\\.br\\br\Body Weight Estimated\.br\ 50Kg\br\V TPA Total Dose 30 mg (0.6mg/kg)\br\IV ITPA Bolus Dose 4.5mg\.br\IV rTPA Infusion Dose 25.5mg\br\\br\V TPA Location Time BP Reading\.br\Bolus given Angio suit 20:37 hrs 170/101mmhg\.br\\.br\Infusion started Angio suit 21:00 NA\br\Completion Angio suit 22:00 NA\br\Transferred to (Ward/Bed) WD 26/15 23:00 133/36mmhg\.br\Adverse Reaction? No\.br\For Endovascular Therapy? Yes\.br\Amount of IV TPA infused: NA\.br\Amount of IV TPA to top up: NA Handed over to SN/SSN: NA\br\\.br\\.br\\.br\\.br\EVT- Endovascular Therapy WORKSHEET\.br\Hospital: NUH\.br\Neurology Consultant: Dr Benjamin Tan\br\Interventionist: Dr Cunli\.br\Anaesthetist: Dr.Nilanthi\br\Stroke Onset: Date: 02/08/23\.br\Time: 1700hrs\.br\EMD Arrival: Date: 02/08/01 hr Time: 1007hrel hOT Date: 02/09/2001 hr Time: 1024 hrs\.br\Angiosuite Arrival: Date: 02/08/23\.br\Time: 2032 hrs\.br\Transferred to: Ward: 26/16\br\Date: 02/08/23\.br\Time: 2300 hrs\.br\\br\EVT-Endovascular Therapy PROCEDURE DETAILS\.br\Groin Puncture Time: 2106 hrs\.br\Recanalization Time: 2225 hrs\.br\IA rTPA used? If so, state dosage: Yes 3mg\.br\Anaesthesia: General Anaesthesia\.br\Device Used: Solitire/Trevo NXT/Pneumbra suction\.br\Baseline TICI Score (Dr to advise): 0\.br\Post EVT TICI Score (Dr to advise): 2B\.br\Any Complications? (Dr to advise): No\.br\\\br\\.br\Blood Pressure Monitoring Chart\.br\Time Vital Signs (BP/HR)\.br\If Applicable Medications Dosage Remarks\.br\(If any)\.br\2034hrs 170/101 GTN Patch 10mg Applied in angio suit\.br\2045hrs 208/117 IV Labetalol 15mg Given in angio suit\.br\\.br\Taken over case from SSN Khiu in ED.\.br\\br\Initially planned for EVT as patient's family reported that patient had been\.br\compliant with Apixaban, however, upon clarification with patient's daughter,\.br\patient had ran out of apixaban for the last 2 days, and was not taking it.\.br\On the way to angio suit Dr. Benjamin decided to give IV rTPA Dose calculated\.br\together with SSN Khiu in angio suit.\.br\After given bolus dose noted pt's BP high 208/117.IV Labetalol 15 mg served\.br\Decision made by Dr.Benjamin to intubate pt and start infusion. Infusion started\.br\during EVT so unable to get BP\br\\\.br\Case handed over to SN Hong Ming.\.br\\.br\Monitoring of vitals:\br\15 mts x 2hrs followed by 30 mts x 6 hrs then hourly\.br\\.br\Post Removal Femoral Sheath Care:\.br\1) Monitor puncture site for hematoma/bleeding hourly for 8 hours. Manual\.br\compression if necessary.\.br\2) Monitor pulse, BP every 15min for 1hour, every 30min for next 3hrs and 1\.br\hourly for subsequent 4 hours\.br\3) Rest in bed x 6-8 hours\.br\\.br\\.br\
'''

In [37]:
data = {
    'Observation_Value': [
        post_stroke_value,
        value,
        'Example text 3',
        # Add more example texts here
    ]
}

# Create the Output_Table DataFrame
Output_Table = pd.DataFrame(data)

In [38]:
post_stroke_dict = extract_patterns(post_stroke_value, post_stroke_patterns)
decison_making_dict = extract_patterns(value, decision_making_patterns)
rtpa_dict = extract_patterns(value, rtpa_patterns)
nihss_dict = extract_patterns(value, nihss_patterns)
nursing_input_dict = extract_patterns(value, nursing_input_patterns)

def extract_patterns(text, patterns):
    extracted = {}
    for key, pattern in patterns.items():
        match = re.search(pattern, text, re.MULTILINE)
        if match:
            extracted[key] = match.group(1)
    return extracted

# combined_dict = {**post_stroke_dict, **decison_making_dict, **rtpa_dict, **nihss_dict, **nursing_input_dict}
# df = pd.DataFrame([combined_dict])

# print(df)

# combined_patterns = {**nursing_input_patterns, **nihss_patterns, **rtpa_patterns, **decision_making_patterns}
# for pattern in combined_patterns.values(): #General Note
#     general_note_text = re.sub(pattern, '', value, flags=re.MULTILINE)


extracted_df = pd.DataFrame(data=Output_Table)

def apply_extraction(value):
    post_stroke_dict = extract_patterns(value, post_stroke_patterns)
    decision_making_dict = extract_patterns(value, decision_making_patterns)
    rtpa_dict = extract_patterns(value, rtpa_patterns)
    nihss_dict = extract_patterns(value, nihss_patterns)
    nursing_input_dict = extract_patterns(value, nursing_input_patterns)

    combined_patterns = {**nursing_input_patterns, **nihss_patterns, **rtpa_patterns, **decision_making_patterns}
    for pattern in combined_patterns.values(): #General Note
        general_note_text = re.sub(pattern, '', value, flags=re.MULTILINE)

    nursing_input_dict["GeneralNotes"] = general_note_text.strip()
    combined_dict = {**post_stroke_dict, **decision_making_dict, **rtpa_dict, **nihss_dict, **nursing_input_dict}

    return combined_dict

# extracted_df = df["Observation_Value"].apply(lambda x: pd.Series(apply_extraction(x)))
# extracted_df = pd.DataFrame([apply_extraction(value)])
# extracted_df
# df_combined = df.join(extracted_df)
extracted_results = extracted_df['Observation_Value'].apply(lambda x: pd.Series(apply_extraction(x)))
extracted_df = pd.concat([extracted_df, extracted_results], axis=1)


Output_Table = extracted_df


In [39]:

extracted_df

Unnamed: 0,Observation_Value,FollowUpDate,DurationPostStroke,NatureOfEmployment,ReturnedToWork,CommunitySupportServices,Readmission,BaselineMRS,DischargeMRS,CurrentClinicalFrailtyScale,...,GCS_E,GCS_V,GCS_M,PowerOverRight_RUL,PowerOverRight_RLL,PowerOverLeft_LUL,PowerOverLeft_LLL,Pupils,TransferredTo,EDNurse
0,\nPost Stroke Follow Up\.br\Date 9/4/24\.br\Du...,9/4/24,3 months,Cleaner\.br\Returned to work? Yes\.br\\.br\Com...,Yes,Nil\.br\\.br\Readmission Nil\.br\\.br\Baseline...,Nil\.br\\.br\Baseline mRS 0\.br\Discharge mRS ...,0.0,1.0,4.0,...,,,,,,,,,,
1,\nHASTEN Thrombolysis and HASTEN Endovascular\...,,,,,,,,,,...,2.0,1.0,3.0,3.0,2.0,1.0,1.0,reactive\br\Premorbid MRS: 0\.br\ED Nurse: NA\...,Ward: 26/16\br\Date: 02/08/23\.br\Time: 2300 h...,NA\.br\\\.br\\.br\NIHSS Assessment\.br\\.br\Da...
2,Example text 3,,,,,,,,,,...,,,,,,,,,,


In [None]:
    decision_making_patterns = {
        "PhysicianDecision": r"Physician making decision: ?(.+)"
    }
    rtpa_patterns = {
        "BodyWeight": r"Body Weight Estimated Actual\n  Kg ?(.+)",
        "IVrTPATotalDose": r"IV  rTPA  Total  Dose  ?(.+)",
        "IVrTPABolusDose": r"IV  rTPA  Bolus Dose  ?(.+)",
        "IVrTPAInfusionDose": r"IV  rTPA  Infusion Dose  ?(.+)",
        "BolusGivenTime": r"Bolus given ED (.+?)\s",
        "BolusGivenBP": r"Bolus given ED .+?\s(.+)",
        "InfusionStartedTime": r"Infusion started ED (.+?)\s",
        "InfusionStartedBP": r"Infusion started ED .+?\s(.+)",
        "CompletionLocation": r"Completion (.+?)\s",
        "CompletionTime": r"Completion .+?\s(.+)",
        "TransferredToWardBed": r"Transferred to \(Ward/Bed\) ?(.+)",
        "AdverseReaction": r"Adverse Reaction\? ?(.+)",
        "ForEndovascularTherapy": r"For Endovascular Therapy\? ?(.+)",
        "AmountOfIVTPAInfused": r"Amount of IV TPA infused: ?(.+)"
    }
    nihss_patterns = {
        "DateTime": r"Date / Time:  ?(.+)",
        "BloodPressure": r"Blood Pressure: ?(.+)",
        "LevelOfConsciousness": r"1a\. Level of Consciousness: ?(\d)",
        "LOCQuestions": r"1b\. LOC Questions: ?(\d)",
        "LOCCommands": r"1c\. LOC Commands: ?(\d)",
        "BestGaze": r"2\. Best Gaze: ?(\d)",
        "Visual": r"3\. Visual: ?(\d)",
        "FacialPalsy": r"4\. Facial Palsy: ?(\d)",
        "MotorArmLeft": r"5\. Motor Arm - Left: ?(\d)",
        "MotorArmRight": r"5\. Motor Arm - Right: ?(\d)",
        "MotorLegLeft": r"6\. Motor Leg - Left: ?(\d)",
        "MotorLegRight": r"6\. Motor Leg - Right: ?(\d)",
        "LimbAtaxia": r"7\. Limb ataxia: ?(\d)",
        "Sensory": r"8\. Sensory: ?(\d)",
        "BestLanguage": r"9\. Best language: ?(\d)",
        "Dysarthria": r"10 Dysarthria: ?(.+)",
        "ExtinctionAndInattention": r"11\. Extinction and Inattention: ?(\d)",
        "TotalScore": r"Total Score: ?(.+)"
    }
    nursing_input_patterns = {
            "ReviewingNeurologist": r"Reviewing Neurologist: ?(.*)",
            "NeurologistActivationDate": r"Neurologist Activation: Date:  ?(.+)",
            "NeurologistActivationTime": r"Time:  ?(.+hrs)",
            "NurseActivationTime": r"Nurse Activation Time:  ?(.+hrs)",
            "NurseReviewTime": r"Nurse Review Time:   ?(.+hrs)",
            "StrokeOnsetTime": r"Stroke Onset Time: (.+)",
            "PatientActivatedFrom": r"Patient Activated from: (.+)",
            "GCS_E": r"GCS: E: (\d)",
            "GCS_V": r"V: (\d)",
            "GCS_M": r"M: (\d)",
            "PowerOverRight_RUL": r"Power over Right: RUL: (\d)",
            "PowerOverRight_RLL": r"RLL: (\d)",
            "PowerOverLeft_LUL": r"Power over Left: LUL: (\d)",
            "PowerOverLeft_LLL": r"LLL: (\d)",
            "Pupils": r"Pupils: (.+)",
            "PremorbidMRS": r"Premorbid  MRS: (\d)",
            "ReasonForNoRTPA": r"Reason for NO rTPA: (.+)",
            "ReasonForNoEVT": r"Reason for NO EVT: (.+)",
            "TransferredTo": r"Transferred to: (.+)",
            "EDNurse": r"ED Nurse: ?(.*)"
        }

    post_stroke_patterns = {
        "FollowUpDate": r"Date (\d{1,2}/\d{1,2}/\d{2})",
        "DurationPostStroke": r"Duration Post Stroke (\d+ months)",
        "NatureOfEmployment": r"Nature of Employment (.*)",
        "ReturnedToWork": r"Returned to work\? (Yes|No)",
        "CommunitySupportServices": r"Community Support Services (.*)",
        "Readmission": r"Readmission (.*)",
        "BaselineMRS": r"Baseline mRS (\d)",
        "DischargeMRS": r"Discharge mRS (\d)",
        "CurrentMRS": r"Current mRS (\d)",
        "CurrentClinicalFrailtyScale": r"Current Clinical Frailty Scale if applicable (\d)",
        "AssessedBy": r"Assessed by (.*)"
    }

    def extract_patterns(text, patterns):
        extracted = {}
        for key, pattern in patterns.items():
            match = re.search(pattern, text, re.MULTILINE)
            if match:
                extracted[key] = match.group(1)
        return extracted


    def apply_extraction(value):
        post_stroke_dict = extract_patterns(value, post_stroke_patterns)
        decision_making_dict = extract_patterns(value, decision_making_patterns)
        rtpa_dict = extract_patterns(value, rtpa_patterns)
        nihss_dict = extract_patterns(value, nihss_patterns)
        nursing_input_dict = extract_patterns(value, nursing_input_patterns)

        combined_patterns = {**nursing_input_patterns, **nihss_patterns, **rtpa_patterns, **decision_making_patterns}
        for pattern in combined_patterns.values(): #General Note
            general_note_text = re.sub(pattern, '', value, flags=re.MULTILINE)

        nursing_input_dict["GeneralNotes"] = general_note_text.strip()
        combined_dict = {**post_stroke_dict, **decision_making_dict, **rtpa_dict, **nihss_dict, **nursing_input_dict}

        return combined_dict

    extracted_df = pd.DataFrame(data=Output_Table)

    extracted_results = extracted_df['Observation_Value'].apply(lambda x: pd.Series(apply_extraction(x)))
    extracted_df = pd.concat([extracted_df, extracted_results], axis=1)


    Output_Table = extracted_df
