In [55]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

In [56]:
file_northern_light = "../../prepare/Northern Light Health WPV report by month August 2024 through January 2025.xlsx"
file_wpv_tool = "../../prepare/Workplace Violence Data Collection Tool January to October 2024.xlsx"

# ---------- Northern Light Health ----------
map_northern_light = {
    "Date of Injury": "event_time",
    "Physical Injury Incurred? Level of Care": "physical_injury_level",
    "Type of Person Affected": "perpetrator_type",
    "Occupational category of Person Impacted": "victim_profession",
    "Type of Violence": "violence_type",
    "Primary Assault Description ": "primary_assault_desc",
    "Assault Description": "assault_desc",
    "Immediate Actions (Reported)": "response_action",
    "Contributing Factors (Reported)": "contributing_factors",
    "Nature of Injury": "nature_of_injury",  
    "Location of Injury on Body": "injury_location", 
    "Emotional and/or Psychological Impact": "emotional_impact",
    "Care/Service Area": "service_area",
    "Department": "department",
    "OSHA Recordable?": "osha_recordable",
    "Law Enforcement Contacted": "law_enforcement_contacted",
    "Charges Pressed": "charges_pressed",
    "Severity of Assault": "severity"
}

# ---------- Workplace Violence Data Collection Tool ----------
map_wpv_tool = {
    "Start time": "event_time",
    "Occupational Category of Person Impacted:": "victim_profession",
    "Name of Department / Location Where Incident Occurred": "department",
    "Role of the Agressor": "perpetrator_type",
    "Type of Violence": "violence_type",
    "Description of the Assault (Select All that Apply)": "assault_desc",
    "Severity of Assault": "severity",
    "Emotional and/or Psychological Impact": "emotional_impact",
    "Response Action(s) Taken (Select all that Apply)": "response_action",
}

final_cols = [
    "event_time",
    "victim_profession",
    "department",
    "perpetrator_type",
    "violence_type",
    "severity",
    "emotional_impact",
    "assault_desc",
    "response_action",
    "contributing_factors",       
    "physical_injury_level",      
    "osha_recordable",            
    "charges_pressed",            
    "law_enforcement_contacted",  
    "nature_of_injury",          
    "injury_location",            
]

df_merged = pd.DataFrame()

# Clean Northern Light Health WPV
df_nlh = pd.DataFrame()
try:
    xl_nl = pd.ExcelFile(file_northern_light)
    sheets_nl = xl_nl.sheet_names
    print(">>> Northern Light Health WPV Sheets:", sheets_nl)
except Exception as e:
    print("read Northern Light Health WPV fail:", e)
    sheets_nl = []

for sheet in sheets_nl:
    try:
        df_temp = pd.read_excel(file_northern_light, sheet_name=sheet)
    except Exception as e:
        print(f" - read Sheet [{sheet}] fail: {e}")
        continue

    df_temp = df_temp.loc[:, ~df_temp.columns.duplicated()].copy()

    df_temp.rename(columns=map_northern_light, inplace=True)

    keep_cols = [c for c in final_cols if c in df_temp.columns]
    df_temp = df_temp[keep_cols].copy()

    if "event_time" in df_temp.columns:
        df_temp["event_time"] = pd.to_datetime(df_temp["event_time"], errors="coerce")
        df_temp = df_temp.dropna(subset=["event_time"])

    df_temp["source_file"] = "NorthernLight"
    df_temp["source_sheet"] = sheet

    df_nlh = pd.concat([df_nlh, df_temp], ignore_index=True)

# clean Workplace Violence Data Collection Tool
df_wpv = pd.DataFrame()
try:
    xl_wpv = pd.ExcelFile(file_wpv_tool)
    sheets_wpv = xl_wpv.sheet_names
    print(">>> WPV Data Collection Tool Sheets:", sheets_wpv)
except Exception as e:
    print("read WPV Collection Tool fail:", e)
    sheets_wpv = []

for sheet in sheets_wpv:
    try:
        df_temp = pd.read_excel(file_wpv_tool, sheet_name=sheet)
    except Exception as e:
        print(f" - read Sheet [{sheet}] fail: {e}")
        continue

    df_temp = df_temp.loc[:, ~df_temp.columns.duplicated()].copy()

    df_temp.rename(columns=map_wpv_tool, inplace=True)

    keep_cols = [c for c in final_cols if c in df_temp.columns]
    df_temp = df_temp[keep_cols].copy()

    if "event_time" in df_temp.columns:
        df_temp["event_time"] = pd.to_datetime(df_temp["event_time"], errors="coerce")
        df_temp = df_temp.dropna(subset=["event_time"])

    df_temp["source_file"] = "WPV_Tool"
    df_temp["source_sheet"] = sheet
    
    df_wpv = pd.concat([df_wpv, df_temp], ignore_index=True)


>>> Northern Light Health WPV Sheets: ['January 2025 NLH', 'December 2024 NLH', 'November 2024 NLH ', 'October 2024 NLH ', 'September 2024 NLH ', 'August 2024 NLH ']
>>> WPV Data Collection Tool Sheets: ['Sheet1']


In [57]:
mapping_dict = {
   "Unknown": "None - No Contact or Unwanted Contact w/o Injury",
   "No": "Mild - Mild Soreness/Abrasions/Scratches/Small Bruises",
   "Yes": "Moderate - Major Soreness/Cuts/Large Bruises", 
}
df_nlh["severity"] = df_nlh["physical_injury_level"].map(mapping_dict)

In [58]:
df_nlh

Unnamed: 0,event_time,victim_profession,department,perpetrator_type,violence_type,emotional_impact,assault_desc,response_action,contributing_factors,physical_injury_level,osha_recordable,charges_pressed,law_enforcement_contacted,nature_of_injury,injury_location,source_file,source_sheet,severity
0,2025-01-12,Other,Cadillac 2 North,Employee,Abuse/Assault (Physical),,A patient was seen touching a staff member on ...,Reported to department leader,Patient - Lack of Compliance/Adherence,No,No,No,No,Inappropriate touching,buttocks,NorthernLight,January 2025 NLH,Mild - Mild Soreness/Abrasions/Scratches/Small...
1,2025-01-14,PSYCHIATRIC TECHNICIAN I,Cadillac 2 South,Employee,Abuse/Assault (Verbal),,Patient in 201 had a significant early this mo...,Patient Behavior Safety Flag/Care Plan initiated,Action by Patient/Resident; Altered Mental Sta...,Unknown,,,,,,NorthernLight,January 2025 NLH,None - No Contact or Unwanted Contact w/o Injury
2,2025-01-14,INPATIENT COORDINATOR,Grounds & Common Areas (Outside),Employee,Abuse/Assault (Verbal),,It was reported that on the afternoon outdoor ...,De-escalation techniques,Action by Patient/Resident; Communication Failure,No,,,,,,NorthernLight,January 2025 NLH,Mild - Mild Soreness/Abrasions/Scratches/Small...
3,2025-01-04,PSYCHIATRIC TECHNICIAN I,Katahdin 1 South,Employee,Abuse/Assault (Physical),,this patient was escalated due to a game of mo...,De-escalation techniques; EAP - Offered/Employ...,Action by Patient/Resident; Task - Inexperienc...,Unknown,,,,,,NorthernLight,January 2025 NLH,None - No Contact or Unwanted Contact w/o Injury
4,2025-01-15,PSYCHIATRIC TECHNICIAN I,Katahdin 2 North,Employee,Abuse/Assault (Physical),,Patient continued to show poor boundaries with...,De-escalation techniques; Reported to departme...,Patient - Lack of Compliance/Adherence; Patien...,No,,,,,,NorthernLight,January 2025 NLH,Mild - Mild Soreness/Abrasions/Scratches/Small...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
417,2024-08-30,Staff Nurse,Emergency Department,Employee,Abuse/Assault (Verbal),,ED Nurse was hit in chest.,De-escalation techniques; EAP - Offered/Employ...,Action by Patient/Resident,No,,NaT,,,,NorthernLight,August 2024 NLH,Mild - Mild Soreness/Abrasions/Scratches/Small...
418,2024-08-31,INPATIENT COORDINATOR,Katahdin 2 South,Employee,Abuse/Assault (Physical),,During restraint patient attempted to bite wri...,Other,Action by Patient/Resident,No,,NaT,,,,NorthernLight,August 2024 NLH,Mild - Mild Soreness/Abrasions/Scratches/Small...
419,2024-08-31,PSYCHIATRIC TECHNICIAN,Emergency Department,Employee,Abuse/Assault (Physical),,ED Psych tech was kicked x2 at the half wall w...,De-escalation techniques; EAP - Offered/Employ...,Action by Patient/Resident,No,,NaT,,,,NorthernLight,August 2024 NLH,Mild - Mild Soreness/Abrasions/Scratches/Small...
420,2024-08-31,Staff Nurse,Emergency Department,Employee,Abuse/Assault (Physical),,ED RN was supervising the patient in the bathr...,De-escalation techniques; EAP - Offered/Employ...,Action by Patient/Resident,No,,NaT,,,,NorthernLight,August 2024 NLH,Mild - Mild Soreness/Abrasions/Scratches/Small...


In [59]:
df_nlh.columns

Index(['event_time', 'victim_profession', 'department', 'perpetrator_type',
       'violence_type', 'emotional_impact', 'assault_desc', 'response_action',
       'contributing_factors', 'physical_injury_level', 'osha_recordable',
       'charges_pressed', 'law_enforcement_contacted', 'nature_of_injury',
       'injury_location', 'source_file', 'source_sheet', 'severity'],
      dtype='object')

In [60]:
df_wpv["department"] = df_wpv["department"].replace(
    to_replace=r"(?i)^ed\b.*",
    value="Emergency Department",
    regex=True
)

df_wpv["department"] = df_wpv["department"].replace(
    to_replace=r"(?i)^acu\b.*",
    value="ACU",
    regex=True
)

df_wpv["department"] = df_wpv["department"].replace(
    to_replace=r"(?i)^er\b.*",
    value="ER",
    regex=True
)

In [61]:
df_wpv

Unnamed: 0,event_time,victim_profession,department,perpetrator_type,violence_type,severity,emotional_impact,assault_desc,response_action,source_file,source_sheet
0,2024-10-07 11:37:00,Security,Emergency Department,Patient,Physical,None - No Contact or Unwanted Contact w/o Injury,None - No Emotional or Psychological Impact,Kicking/punching,Security Called,WPV_Tool,Sheet1
1,2024-10-07 11:50:52,Nurse (RN/LPN),ICU,Patient,Verbal,None - No Contact or Unwanted Contact w/o Injury,None - No Emotional or Psychological Impact,Harassment,Security Called,WPV_Tool,Sheet1
2,2024-10-07 11:54:04,Security,ICU,Visitor,Verbal,None - No Contact or Unwanted Contact w/o Injury,None - No Emotional or Psychological Impact,Harassment,De-Escalation Techniques Used,WPV_Tool,Sheet1
3,2024-10-07 11:55:50,Nurse (RN/LPN),Emergency Department,Patient,Verbal,None - No Contact or Unwanted Contact w/o Injury,None - No Emotional or Psychological Impact,Harassment,De-Escalation Techniques Used,WPV_Tool,Sheet1
4,2024-10-07 11:58:28,Nurse (RN/LPN),ACU,Patient,Verbal,None - No Contact or Unwanted Contact w/o Injury,None - No Emotional or Psychological Impact,Harassment,Security Called,WPV_Tool,Sheet1
...,...,...,...,...,...,...,...,...,...,...,...
143,2024-10-11 16:43:40,Nurse (RN/LPN),ACU,Patient,Physical,Mild - Mild Soreness/Abrasions/Scratches/Small...,None - No Emotional or Psychological Impact,Hitting,Security Called,WPV_Tool,Sheet1
144,2024-10-11 16:45:31,Nurse (RN/LPN),ER,Patient,Verbal,None - No Contact or Unwanted Contact w/o Injury,Mild - Upset/Angry/Scared/Humiliated,Harassment,Security Called,WPV_Tool,Sheet1
145,2024-10-11 16:50:21,Nurse (RN/LPN),ER,Patient,Verbal,None - No Contact or Unwanted Contact w/o Injury,Mild - Upset/Angry/Scared/Humiliated,Harassment,Security Called,WPV_Tool,Sheet1
146,2024-10-17 08:03:20,Security,ER,Patient,Verbal,None - No Contact or Unwanted Contact w/o Injury,Mild - Upset/Angry/Scared/Humiliated,Harassment,Security Called,WPV_Tool,Sheet1


In [62]:
df_wpv.columns

Index(['event_time', 'victim_profession', 'department', 'perpetrator_type',
       'violence_type', 'severity', 'emotional_impact', 'assault_desc',
       'response_action', 'source_file', 'source_sheet'],
      dtype='object')

In [63]:
df_combined = pd.concat([df_nlh, df_wpv], ignore_index=True, sort=False)

In [64]:
print("\n>>> df shape:", df_combined.shape)
print(">>> df cols:", df_combined.columns.tolist())

# missing values
missing_count = df_combined.isnull().sum().sort_values(ascending=False)
print("\n>>> null values:")
print(missing_count.head(15))

print("\n>> head:")
display(df_combined.head())

output_path = "./merged_wpv_cleaned.csv"
df_combined.to_csv(output_path, index=False)
print(f"\n file output: {output_path}")



>>> df shape: (570, 18)
>>> df cols: ['event_time', 'victim_profession', 'department', 'perpetrator_type', 'violence_type', 'emotional_impact', 'assault_desc', 'response_action', 'contributing_factors', 'physical_injury_level', 'osha_recordable', 'charges_pressed', 'law_enforcement_contacted', 'nature_of_injury', 'injury_location', 'source_file', 'source_sheet', 'severity']

>>> null values:
charges_pressed              553
law_enforcement_contacted    485
injury_location              404
osha_recordable              214
nature_of_injury             150
physical_injury_level        148
contributing_factors         148
perpetrator_type              22
severity                       6
victim_profession              0
response_action                0
assault_desc                   0
emotional_impact               0
violence_type                  0
department                     0
dtype: int64

>> head:


Unnamed: 0,event_time,victim_profession,department,perpetrator_type,violence_type,emotional_impact,assault_desc,response_action,contributing_factors,physical_injury_level,osha_recordable,charges_pressed,law_enforcement_contacted,nature_of_injury,injury_location,source_file,source_sheet,severity
0,2025-01-12,Other,Cadillac 2 North,Employee,Abuse/Assault (Physical),,A patient was seen touching a staff member on ...,Reported to department leader,Patient - Lack of Compliance/Adherence,No,No,No,No,Inappropriate touching,buttocks,NorthernLight,January 2025 NLH,Mild - Mild Soreness/Abrasions/Scratches/Small...
1,2025-01-14,PSYCHIATRIC TECHNICIAN I,Cadillac 2 South,Employee,Abuse/Assault (Verbal),,Patient in 201 had a significant early this mo...,Patient Behavior Safety Flag/Care Plan initiated,Action by Patient/Resident; Altered Mental Sta...,Unknown,,,,,,NorthernLight,January 2025 NLH,None - No Contact or Unwanted Contact w/o Injury
2,2025-01-14,INPATIENT COORDINATOR,Grounds & Common Areas (Outside),Employee,Abuse/Assault (Verbal),,It was reported that on the afternoon outdoor ...,De-escalation techniques,Action by Patient/Resident; Communication Failure,No,,,,,,NorthernLight,January 2025 NLH,Mild - Mild Soreness/Abrasions/Scratches/Small...
3,2025-01-04,PSYCHIATRIC TECHNICIAN I,Katahdin 1 South,Employee,Abuse/Assault (Physical),,this patient was escalated due to a game of mo...,De-escalation techniques; EAP - Offered/Employ...,Action by Patient/Resident; Task - Inexperienc...,Unknown,,,,,,NorthernLight,January 2025 NLH,None - No Contact or Unwanted Contact w/o Injury
4,2025-01-15,PSYCHIATRIC TECHNICIAN I,Katahdin 2 North,Employee,Abuse/Assault (Physical),,Patient continued to show poor boundaries with...,De-escalation techniques; Reported to departme...,Patient - Lack of Compliance/Adherence; Patien...,No,,,,,,NorthernLight,January 2025 NLH,Mild - Mild Soreness/Abrasions/Scratches/Small...



 file output: ./merged_wpv_cleaned.csv
