In [1]:

%pprint
import sys
sys.path.insert(1, '../py')

Pretty printing has been turned OFF


In [2]:

from frvrs_utils import FRVRSUtilities
from notebook_utils import NotebookUtilities
from pandas import DataFrame
import numpy as np
import os
import os.path as osp
import pandas as pd
import re

nu = NotebookUtilities(
    data_folder_path=osp.abspath('../data'),
    saves_folder_path=osp.abspath('../saves')
)
fu = FRVRSUtilities(
    data_folder_path=osp.abspath('../data'),
    saves_folder_path=osp.abspath('../saves')
)
frvrs_logs_df = nu.load_object('frvrs_logs_df')


# OSU First VResponder Simulation Dataset: ITM preparation

<ul>
<li>The FRVRS is a high-fidelity, fully immersive, automated, programmable virtual reality (VR) simulation designed (using Unity for MetaQuest 2 headsets) to train frontline responders to treat and triage victims of mass casualty incidents.</li>
<li>The VR platform tracks and records (logs) their performance as they navigate the disaster scene.</li>
<li>Output from the system provides feedback to participants on their performance, and, in our case, a statistically analyzable dataset of patient engagement events.</li>
<li>TA3 has kept these logs of participant trainees for our analysis.</li>
<li>11 virtual patients were designed from a universal avatar and can be customized to have a variety of life-threatening (e.g. acute arterial bleed, penetrating injury, pneumothorax, amputations) and non-life-threatening (e.g. lacerations, sprains, hysteria, confusion) injuries.</li>
<li>Participants were assessed in a subway bombing, mass casualty incident on their:<ol><li>skill in using the Sort-Assess-Life saving Interventions-Transport (SALT) triage protocol,</li><li>effectiveness of their communication with patients, and their</li><li>skill in applying appropriate life-saving treatments.</li></ol></li>
<li>OSU has addended its IRB to allow for the sharing of de-identified data with other researchers.</li>
<li>The data are scrubbed for any identifying factors and shared with other members of TA3.</li>
<li>Metrics including time to control life-threatening hemorrhage and triage efficacy were analyzed using median and interquartile ranges (IQR).</li>
</ul>


## Simulation log event definitions

<p>The First VResponder tracks and records (logs) the frontline responder trainee's performance as they navigate the disaster scene. Output from the system provides feedback to participants on their performance, and, in our case, a statistically analyzable dataset of patient engagement events. The Subject Matter Experts (SMEs) have kept these logs of participant trainees for our analysis.</p>


Each entry in the First VResponder logs is prepended with four or five global columns, represented in the MCI-VR Metrics Types schema as Type, elapsedTime, Timestamp, and SessionId.
![FRVRS Log v1.0](../saves/png/frvrs_log_v_1_0.png)


Later versions also contain the fifth, Version, column:
![FRVRS Log v1.3](../saves/png/frvrs_log_v_1_3.png)


In our dataset, six columns are added to each row, represented as <strong>action_type</strong>, <strong>action_ticks</strong>, <strong>event_time</strong>, <strong>session_uuid</strong> (the simulation session's unique identifier), <strong>file_name</strong> (added for provenence), and <strong>logger_version</strong>.

In [3]:

print(frvrs_logs_df.columns.tolist()[:6])

['action_type', 'action_tick', 'event_time', 'session_uuid', 'file_name', 'logger_version']



Within each simulation session, multiple decision makers (DMs) can go through the simulation taking turns one by one. Events in the simulation are logged as:
### Session start and end events
All instances of <em>SESSION_END</em> and <em>SESSION_START</em> in the <strong>action_type</strong> column were analyzed to break up session UUIDs into an integer-based <strong>scene_id</strong> column.

In [21]:

mask_series = frvrs_logs_df.action_type.map(lambda x: str(x).startswith('SESSION_'))
session_set = set(frvrs_logs_df[mask_series].action_type.unique())
sorted(session_set)

['SESSION_END', 'SESSION_START']

In [22]:

fu.set_scene_indexes??

[0;31mSignature:[0m [0mfu[0m[0;34m.[0m[0mset_scene_indexes[0m[0;34m([0m[0mdf[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m   
    [0;32mdef[0m [0mset_scene_indexes[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mdf[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0;34m"""[0m
[0;34m        Section off player actions by session start and end. We are finding log entries above the first SESSION_START and below the last SESSION_END.[0m
[0;34m        [0m
[0;34m        Parameters:[0m
[0;34m            df: A Pandas DataFrame containing the player action data with its index reset.[0m
[0;34m        [0m
[0;34m        Returns:[0m
[0;34m            A Pandas DataFrame with the `scene_id` column added.[0m
[0;34m        """[0m[0;34m[0m
[0;34m[0m    [0;34m[0m
[0;34m[0m        [0;31m# Set the whole file to zero first[0m[0;34m[0m
[0;34m[0m        [0mdf[0m [0;34m=[0m [0mdf[0m[0;34m.[0m[0msort_values[0m[0;34m([0m[0;34m'action_t


### Patient related events (e.g. patient_engaged, patient_demoted, etc.)

In [13]:

mask_series = frvrs_logs_df.patient_sort.isnull()
patient_sort_set = set(frvrs_logs_df[~mask_series].action_type.unique())
sorted(patient_sort_set)

['PATIENT_DEMOTED', 'PATIENT_ENGAGED', 'PATIENT_RECORD']


<b>PatientDemoted</b>
<ul>
	<li>healthLevel</li>
	<li>healthTimeRemaining</li>
	<li>id</li>
	<li>position</li>
	<li>rotation</li>
	<li>salt</li>
	<li>sort</li>
	<li>pulse</li>
	<li>breath</li>
	<li>hearing</li>
	<li>mood</li>
	<li>pose</li>
</ul>

In [31]:

mask_series = frvrs_logs_df.columns.map(lambda x: str(x).startswith('patient_demoted_'))
columns_list = frvrs_logs_df.columns[mask_series].tolist()
print(columns_list)
mask_series = False
for cn in columns_list: mask_series |= ~frvrs_logs_df[cn].isnull()
frvrs_logs_df[mask_series][columns_list].sample(8).dropna(axis='columns', how='all').T

['patient_demoted_health_level', 'patient_demoted_health_time_remaining', 'patient_demoted_id', 'patient_demoted_position', 'patient_demoted_rotation', 'patient_demoted_salt', 'patient_demoted_sort', 'patient_demoted_pulse', 'patient_demoted_breath', 'patient_demoted_hearing', 'patient_demoted_mood', 'patient_demoted_pose']


Unnamed: 0,710778,828580,741180,736789,726969,765844,702087,734323
patient_demoted_health_level,100,100,100,100,100,100,100,100
patient_demoted_health_time_remaining,Infinity,Infinity,Infinity,Infinity,Infinity,Infinity,Infinity,Infinity
patient_demoted_id,Lily_2 Root,Military Mike Jungle Combat_3_2 Root,Gary_1 Root,Bob_0 Root,Mike_7 Root,Gloria_6 Root,Mike_2 Root,Mike_1 Root
patient_demoted_position,"(2.8, 0.0, 4.3)","(-1.1, 0.0, -22.0)","(1.7, 0.0, 7.4)","(-1.5, 0.0, 5.6)","(1.0, 0.0, 3.7)","(-1.0, 0.0, 2.9)","(2.6, 0.0, 2.4)","(2.7, 0.0, 0.1)"
patient_demoted_rotation,"(0.0, 1.0, 0.0, 0.0)","(0.0, -0.4, 0.0, 0.9)","(0.0, 1.0, 0.0, 0.0)","(0.0, 0.7, 0.0, 0.7)","(0.0, 0.1, 0.0, 1.0)","(0.0, 0.4, 0.0, 0.9)","(0.0, 0.7, 0.0, -0.7)","(0.0, 0.7, 0.0, -0.7)"
patient_demoted_salt,IMMEDIATE,IMMEDIATE,EXPECTANT,DEAD,DELAYED,DELAYED,IMMEDIATE,DELAYED
patient_demoted_sort,waver,waver,still,still,waver,waver,walker,waver
patient_demoted_pulse,fast,fast,faint,none,fast,fast,fast,normal
patient_demoted_breath,fast,fast,restricted,none,normal,normal,normal,normal
patient_demoted_hearing,normal,normal,none,none,normal,normal,normal,normal



<b>PatientEngaged</b>
<ul>
	<li>healthLevel</li>
	<li>healthTimeRemaining</li>
	<li>id</li>
	<li>position</li>
	<li>rotation</li>
	<li>salt</li>
	<li>sort</li>
	<li>pulse</li>
	<li>breath</li>
	<li>hearing</li>
	<li>mood</li>
	<li>pose</li>
</ul>

In [32]:

mask_series = frvrs_logs_df.columns.map(lambda x: str(x).startswith('patient_engaged_'))
columns_list = frvrs_logs_df.columns[mask_series].tolist()
print(columns_list)
mask_series = False
for cn in columns_list: mask_series |= ~frvrs_logs_df[cn].isnull()
frvrs_logs_df[mask_series][columns_list].sample(8).dropna(axis='columns', how='all').T

['patient_engaged_health_level', 'patient_engaged_health_time_remaining', 'patient_engaged_id', 'patient_engaged_position', 'patient_engaged_rotation', 'patient_engaged_salt', 'patient_engaged_sort', 'patient_engaged_pulse', 'patient_engaged_breath', 'patient_engaged_hearing', 'patient_engaged_mood', 'patient_engaged_pose']


Unnamed: 0,61780,696597,206622,701843,123379,632774,627404,660080
patient_engaged_health_level,100,100,100,100,100,63.87083,63.87083,100
patient_engaged_health_time_remaining,Infinity,Infinity,0,Infinity,Infinity,Infinity,Infinity,0
patient_engaged_id,Mike_5 Root,Gloria_8 Root,Gary_9 Root,Mike_5 Root,Lily_1 Root,Mike_7 Root,Mike_5 Root,Gary_9 Root
patient_engaged_position,"(2.7, 0.0, 2.3)","(4.8, 0.0, -3.4)","(3.0, 0.0, 9.0)","(2.7, 0.0, 2.3)","(1.2, 0.0, 3.9)","(1.4, 0.0, 4.0)","(2.6, 0.0, 2.0)","(3.5, 0.0, 9.4)"
patient_engaged_rotation,"(0.0, 1.0, 0.0, 0.0)","(0.0, 0.7, 0.0, -0.8)","(0.0, -0.9, 0.0, 0.3)","(0.0, 1.0, 0.0, 0.0)","(0.0, 0.1, 0.0, 1.0)","(0.0, 0.1, 0.0, 1.0)","(0.0, 1.0, 0.0, 0.0)","(0.0, 1.0, 0.0, -0.1)"
patient_engaged_salt,IMMEDIATE,MINIMAL,MINIMAL,IMMEDIATE,DELAYED,DELAYED,IMMEDIATE,MINIMAL
patient_engaged_sort,waver,walker,walker,waver,waver,waver,waver,walker
patient_engaged_pulse,fast,normal,normal,fast,normal,fast,fast,normal
patient_engaged_breath,normal,normal,normal,normal,normal,normal,normal,normal
patient_engaged_hearing,normal,normal,limited,normal,normal,normal,normal,limited



<b>PatientRecord</b>
<ul>
	<li>healthLevel</li>
	<li>healthTimeRemaining</li>
	<li>id</li>
	<li>position</li>
	<li>rotation</li>
	<li>salt</li>
	<li>sort</li>
	<li>pulse</li>
	<li>breath</li>
	<li>hearing</li>
	<li>mood</li>
	<li>pose</li>
</ul>

In [33]:

mask_series = frvrs_logs_df.columns.map(lambda x: str(x).startswith('patient_record_'))
columns_list = frvrs_logs_df.columns[mask_series].tolist()
print(columns_list)
mask_series = False
for cn in columns_list: mask_series |= ~frvrs_logs_df[cn].isnull()
frvrs_logs_df[mask_series][columns_list].sample(8).dropna(axis='columns', how='all').T

['patient_record_health_level', 'patient_record_health_time_remaining', 'patient_record_id', 'patient_record_position', 'patient_record_rotation', 'patient_record_salt', 'patient_record_sort', 'patient_record_pulse', 'patient_record_breath', 'patient_record_hearing', 'patient_record_mood', 'patient_record_pose']


Unnamed: 0,128254,229884,764924,105407,89185,793571,719753,582424
patient_record_health_level,100,63.87083,100,100,100,100,100,100
patient_record_health_time_remaining,Infinity,Infinity,Infinity,Infinity,Infinity,128,Infinity,Infinity
patient_record_id,Gloria_8 Root,Mike_5 Root,Mike_5 Root,Gloria_6 Root,Lily_2 Root,Mike_3 Root,Mike_0 Root,Lily_2 Root
patient_record_position,"(2.6, 0.0, 8.0)","(2.6, 0.0, 2.0)","(2.7, 0.0, 2.3)","(-1.2, 0.0, 3.1)","(3.0, 0.0, 4.5)","(3.2, 0.0, 4.3)","(2.6, 0.0, -1.4)","(3.0, 0.0, 4.0)"
patient_record_rotation,"(0.0, 1.0, 0.0, 0.0)","(0.0, 1.0, 0.0, 0.0)","(0.0, 1.0, 0.0, 0.0)","(0.0, 0.4, 0.0, 0.9)","(0.0, 1.0, 0.0, 0.0)","(0.0, 0.7, 0.0, -0.7)","(0.0, 0.7, 0.0, -0.7)","(0.0, 1.0, 0.0, 0.0)"
patient_record_salt,MINIMAL,IMMEDIATE,IMMEDIATE,DELAYED,IMMEDIATE,IMMEDIATE,MINIMAL,IMMEDIATE
patient_record_sort,still,waver,waver,waver,waver,still,walker,waver
patient_record_pulse,normal,fast,fast,fast,fast,faint,normal,fast
patient_record_breath,normal,normal,normal,normal,fast,collapsedRight,normal,fast
patient_record_hearing,limited,normal,normal,normal,normal,normal,normal,normal



### Injury related events (e.g. injury_record, injury_treated, etc.)

In [15]:

mask_series = frvrs_logs_df.injury_id.isnull()
injury_set = set(frvrs_logs_df[~mask_series].action_type.unique())
sorted(injury_set)

['INJURY_RECORD', 'INJURY_TREATED']


<b>InjuryRecord</b>
<ul>
	<li>Id</li>
	<li>patientId</li>
	<li>requiredProcedure</li>
	<li>severity</li>
	<li>bodyRegion</li>
	<li>injuryTreated</li>
	<li>injuryTreatedWithWrongTreatment</li>
	<li>injuryLocator</li>
</ul>

In [34]:

mask_series = frvrs_logs_df.columns.map(lambda x: str(x).startswith('injury_record_'))
columns_list = frvrs_logs_df.columns[mask_series].tolist()
print(columns_list)
mask_series = False
for cn in columns_list: mask_series |= ~frvrs_logs_df[cn].isnull()
frvrs_logs_df[mask_series][columns_list].sample(8).dropna(axis='columns', how='all').T

['injury_record_id', 'injury_record_patient_id', 'injury_record_required_procedure', 'injury_record_severity', 'injury_record_body_region', 'injury_record_injury_treated', 'injury_record_injury_treated_with_wrong_treatment', 'injury_record_injury_injury_locator']


Unnamed: 0,820787,15478,672878,641576,40789,202996,674645,730380
injury_record_id,L Neck Puncture,R Forearm Laceration,Face Shrapnel,R Shin Amputation,L Thigh Laceration,L Thigh Puncture,Face Shrapnel,R Shin Amputation
injury_record_patient_id,Marine Burned with Neck Puncture Root,Mike_0 Root,Lily_0 Root,Bob_0 Root,Mike_1 Root,Mike_7 Root,Bob_0 Root,Bob_0 Root
injury_record_required_procedure,woundpack,gauzePressure,airway,tourniquet,tourniquet,tourniquet,airway,tourniquet
injury_record_severity,high,low,high,high,medium,medium,high,high
injury_record_body_region,neck,rightArm,head,rightLeg,leftLeg,leftLeg,head,rightLeg
injury_record_injury_treated,False,False,False,False,False,False,False,False
injury_record_injury_treated_with_wrong_treatment,False,False,False,False,False,False,False,False
injury_record_injury_injury_locator,"(-0.2, 1.8, -21.0)","(2.7, 1.4, -0.9)","(-0.3, 1.6, 5.6)","(-1.0, 0.4, 5.9)","(2.9, 0.7, -0.1)","(1.3, 0.6, 3.5)","(-1.6, 1.7, 5.6)","(-1.7, 0.4, 5.6)"



<b>InjuryTreated</b>
<ul>
	<li>Id</li>
	<li>patientId</li>
	<li>requiredProcedure</li>
	<li>severity</li>
	<li>bodyRegion</li>
	<li>injuryTreated</li>
	<li>injuryTreatedWithWrongTreatment</li>
	<li>injuryLocator</li>
</ul>

In [35]:

mask_series = frvrs_logs_df.columns.map(lambda x: str(x).startswith('injury_treated_'))
columns_list = frvrs_logs_df.columns[mask_series].tolist()
print(columns_list)
mask_series = False
for cn in columns_list: mask_series |= ~frvrs_logs_df[cn].isnull()
frvrs_logs_df[mask_series][columns_list].sample(8).dropna(axis='columns', how='all').T

['injury_treated_id', 'injury_treated_patient_id', 'injury_treated_required_procedure', 'injury_treated_severity', 'injury_treated_body_region', 'injury_treated_injury_treated', 'injury_treated_injury_treated_with_wrong_treatment', 'injury_treated_injury_injury_locator']


Unnamed: 0,97577,797483,305931,86238,647439,168030,645813,803251
injury_treated_id,R Calf Laceration,R Shin Amputation,L Side Puncture,L Forearm Laceration,R Chest Collapse,R Shin Amputation,L Side Puncture,L Thigh Puncture
injury_treated_patient_id,Mike_7 Root,Lily_2 Root,Lily_4 Root,Gloria_8 Root,Gary_3 Root,Lily_2 Root,Lily_4 Root,Mike_7 Root
injury_treated_required_procedure,gauzePressure,tourniquet,woundpack,gauzePressure,decompress,tourniquet,woundpack,tourniquet
injury_treated_severity,low,high,medium,low,high,high,medium,medium
injury_treated_body_region,rightLeg,rightLeg,abdomen,leftArm,chest,rightLeg,abdomen,leftLeg
injury_treated_injury_treated,False,True,True,True,True,True,True,True
injury_treated_injury_treated_with_wrong_treatment,True,False,False,False,False,False,False,False
injury_treated_injury_injury_locator,"(1.3, 0.2, 3.8)","(3.3, 0.1, 4.2)","(1.1, 0.2, 6.3)","(4.3, 0.9, -1.3)","(0.0, 0.0, 0.0)","(3.3, 0.1, 4.2)","(1.1, 0.2, 6.3)","(0.7, 0.1, 3.5)"



### Item/Action related events (e.g. bag_accessed, tag_applied, tool_discarded, etc.)

In [46]:

mask_series = frvrs_logs_df.location_id.isnull()
location_set = set(frvrs_logs_df[~mask_series].action_type.unique())
sorted((location_set - patient_sort_set) - injury_set)

['BAG_ACCESS', 'BAG_CLOSED', 'PLAYER_GAZE', 'PLAYER_LOCATION', 'S_A_L_T_WALKED', 'S_A_L_T_WALK_IF_CAN', 'S_A_L_T_WAVED', 'S_A_L_T_WAVE_IF_CAN', 'TAG_DISCARDED', 'TELEPORT', 'TOOL_DISCARDED']


<b>BagAccess</b>
<ul>
	<li>Location</li>
</ul>

In [36]:

mask_series = frvrs_logs_df.columns.map(lambda x: str(x).startswith('bag_access_'))
columns_list = frvrs_logs_df.columns[mask_series].tolist()
print(columns_list)
mask_series = False
for cn in columns_list: mask_series |= ~frvrs_logs_df[cn].isnull()
frvrs_logs_df[mask_series][columns_list].sample(8).dropna(axis='columns', how='all').T

['bag_access_location']


Unnamed: 0,682718,207155,721959,826156,642149,582669,779798,726019
bag_access_location,"(1.1, 0.7, 5.4)","(3.5, 0.7, 5.2)","(0.6, 0.7, 0.5)","(3.7, 0.5, -4.0)","(0.7, 0.5, 1.4)","(-1.5, 0.5, 4.9)","(1.1, 0.5, 3.8)","(0.8, 0.7, 7.7)"



<b>BagClosed</b>
<ul>
	<li>Location</li>
</ul>

In [37]:

mask_series = frvrs_logs_df.columns.map(lambda x: str(x).startswith('bag_closed_'))
columns_list = frvrs_logs_df.columns[mask_series].tolist()
print(columns_list)
mask_series = False
for cn in columns_list: mask_series |= ~frvrs_logs_df[cn].isnull()
frvrs_logs_df[mask_series][columns_list].sample(8).dropna(axis='columns', how='all').T

['bag_closed_location']


Unnamed: 0,226444,181786,827570,718435,671014,706498,582018,182179
bag_closed_location,"(-1.1, 0.7, 4.0)","(0.9, 0.6, 7.5)","(0.2, 0.8, -21.1)","(3.1, 0.7, -4.0)","(2.7, 0.4, 0.7)","(3.5, 0.6, 1.8)","(3.7, 0.6, 3.0)","(3.1, 0.8, 3.6)"



<b>PlayerGaze</b>
<ul>
	<li>PatientID</li>
	<li>Location (x,y,z)</li>
	<li>Distance to Patient</li>
	<li>Direction of Gaze (vector3)</li>
</ul>

In [38]:

mask_series = frvrs_logs_df.columns.map(lambda x: str(x).startswith('player_gaze_'))
columns_list = frvrs_logs_df.columns[mask_series].tolist()
print(columns_list)
mask_series = False
for cn in columns_list: mask_series |= ~frvrs_logs_df[cn].isnull()
frvrs_logs_df[mask_series][columns_list].sample(8).dropna(axis='columns', how='all').T

['player_gaze_location', 'player_gaze_patient_id', 'player_gaze_distance_to_patient', 'player_gaze_direction_of_gaze']


Unnamed: 0,828540,821666,817545,825879,821636,822052,817656,820385
player_gaze_location,"(-2.2, 0.3, -22.4)","(2.9, 0.0, 2.5)","(2.9, 0.0, 2.5)","(1.0, 0.0, 19.0)","(2.9, 0.0, 2.5)","(2.9, 0.0, 2.5)","(2.9, 0.0, 2.5)","(2.9, 0.0, 2.5)"
player_gaze_patient_id,Intelligence Officer Burned_Gary_1 Root,Tutorial Military Marine Root,Tutorial Military Marine Root,Lily_11 Root,Tutorial Military Marine Root,Tutorial Military Marine Root,Tutorial Military Marine Root,Tutorial Military Marine Root
player_gaze_distance_to_patient,1.907849,1.807727,1.672139,21.50226,1.965846,1.444221,1.66494,1.552028
player_gaze_direction_of_gaze,"(-0.3, 1.4, 1.2)","(-0.6, 1.5, -0.8)","(-0.7, 1.5, -0.1)","(3.1, 0.9, -21.3)","(-0.7, 1.5, -1.0)","(0.3, 1.4, -0.3)","(-0.8, 1.5, -0.1)","(-0.6, 1.4, -0.2)"



<b>PlayerLocation</b>
<ul>
	<li>Location (x,y,z)</li>
	<li>Left Hand Location (x,y,z); deactivated in v1.3</li>
	<li>Right Hand Location (x,y,z); deactivated in v1.3</li>
</ul>

In [39]:

mask_series = frvrs_logs_df.columns.map(lambda x: str(x).startswith('player_location_'))
columns_list = frvrs_logs_df.columns[mask_series].tolist()
print(columns_list)
mask_series = False
for cn in columns_list: mask_series |= ~frvrs_logs_df[cn].isnull()
frvrs_logs_df[mask_series][columns_list].sample(8).dropna(axis='columns', how='all').T

['player_location_location', 'player_location_left_hand_location', 'player_location_right_hand_location']


Unnamed: 0,820632,814974,818117,820330,817617,825237,810830,817504
player_location_location,"(2.4, 1.5, 2.0)","(0.7, 1.9, 4.3)","(1.9, 1.5, 1.5)","(1.2, 1.7, -3.1)","(1.0, 1.7, -1.3)","(1.7, 0.9, 8.1)","(1.6, 1.7, -0.8)","(2.0, 1.5, 1.9)"
player_location_left_hand_location,"(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)"
player_location_right_hand_location,"(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)"



<b>SALTWalked</b>
<ul>
	<li>sortLocation</li>
	<li>sortCommandText</li>
	<li>patientId</li>
</ul>

In [41]:

mask_series = frvrs_logs_df.columns.map(lambda x: str(x).startswith('s_a_l_t_walked_'))
columns_list = frvrs_logs_df.columns[mask_series].tolist()
print(columns_list)
mask_series = False
for cn in columns_list: mask_series |= ~frvrs_logs_df[cn].isnull()
frvrs_logs_df[mask_series][columns_list].sample(8).dropna(axis='columns', how='all').T

['s_a_l_t_walked_sort_location', 's_a_l_t_walked_sort_command_text', 's_a_l_t_walked_patient_id']


Unnamed: 0,359136,623907,322765,577907,622121,482989,298909,367044
s_a_l_t_walked_sort_location,"(0.6, 0.0, 34.2)","(5.7, 0.0, -5.6)","(1.6, 0.0, 34.2)","(-0.7, 0.0, 12.9)","(3.4, 0.0, -5.5)","(5.6, 0.0, -34.3)","(5.3, 0.0, -28.6)","(5.7, 0.0, 33.1)"
s_a_l_t_walked_sort_command_text,walked,walked,walked,walked,walked,walked,walked,walked
s_a_l_t_walked_patient_id,Helga_10 Root,Gloria_8 Root,Helga_10 Root,Gloria_8 Root,Gloria_8 Root,Helga_10 Root,Bob_9 Root,Helga_10 Root



<b>SALTWalkIfCan</b>
<ul>
	<li>sortLocation</li>
	<li>sortCommandText</li>
	<li>patientId</li>
</ul>

In [42]:

mask_series = frvrs_logs_df.columns.map(lambda x: str(x).startswith('s_a_l_t_walk_if_can_'))
columns_list = frvrs_logs_df.columns[mask_series].tolist()
print(columns_list)
mask_series = False
for cn in columns_list: mask_series |= ~frvrs_logs_df[cn].isnull()
frvrs_logs_df[mask_series][columns_list].sample(8).dropna(axis='columns', how='all').T

['s_a_l_t_walk_if_can_sort_location', 's_a_l_t_walk_if_can_sort_command_text', 's_a_l_t_walk_if_can_patient_id']


Unnamed: 0,484922,245372,344335,373245,344054,294408,623951,625725
s_a_l_t_walk_if_can_sort_location,"(4.6, 0.0, -34.3)","(5.4, 0.0, -5.8)","(2.8, 0.0, 18.9)","(2.6, 0.0, 8.0)","(2.4, 0.0, 19.7)","(5.7, 0.0, -5.0)","(5.0, 0.0, -5.7)","(3.0, 0.0, -5.4)"
s_a_l_t_walk_if_can_sort_command_text,walkIfCan,walkIfCan,walkIfCan,walkIfCan,walkIfCan,walkIfCan,walkIfCan,walkIfCan
s_a_l_t_walk_if_can_patient_id,Helga_10 Root,Gloria_8 Root,Helga_10 Root,Bob_9 Root,Helga_10 Root,Gloria_8 Root,Gloria_8 Root,Gloria_8 Root



<b>SALTWave</b>
<ul>
	<li>sortLocation</li>
	<li>sortCommandText</li>
	<li>patientId</li>
</ul>

In [43]:

mask_series = frvrs_logs_df.columns.map(lambda x: str(x).startswith('s_a_l_t_waved_'))
columns_list = frvrs_logs_df.columns[mask_series].tolist()
print(columns_list)
mask_series = False
for cn in columns_list: mask_series |= ~frvrs_logs_df[cn].isnull()
frvrs_logs_df[mask_series][columns_list].sample(8).dropna(axis='columns', how='all').T

['s_a_l_t_waved_sort_location', 's_a_l_t_waved_sort_command_text', 's_a_l_t_waved_patient_id']


Unnamed: 0,192360,46429,782762,728759,40261,165975,755585,752365
s_a_l_t_waved_sort_location,"(3.0, 0.0, 4.5)","(2.9, 0.0, -3.8)","(1.3, 0.0, 3.4)","(2.7, 0.0, 2.3)","(3.1, 0.0, -4.9)","(-1.2, 0.0, 3.1)","(-1.2, 0.0, 3.1)","(3.9, 0.0, 8.0)"
s_a_l_t_waved_sort_command_text,waved,waved,waved,waved,waved,waved,waved,waved
s_a_l_t_waved_patient_id,Lily_2 Root,Helga_10 Root,Mike_7 Root,Mike_5 Root,Gloria_8 Root,Gloria_6 Root,Gloria_6 Root,Helga_10 Root



<b>SALTWaveIfCan</b>
<ul>
	<li>sortLocation</li>
	<li>sortCommandText</li>
	<li>patientId</li>
</ul>

In [44]:

mask_series = frvrs_logs_df.columns.map(lambda x: str(x).startswith('s_a_l_t_wave_if_can_'))
columns_list = frvrs_logs_df.columns[mask_series].tolist()
print(columns_list)
mask_series = False
for cn in columns_list: mask_series |= ~frvrs_logs_df[cn].isnull()
frvrs_logs_df[mask_series][columns_list].sample(8).dropna(axis='columns', how='all').T

['s_a_l_t_wave_if_can_sort_location', 's_a_l_t_wave_if_can_sort_command_text', 's_a_l_t_wave_if_can_patient_id']


Unnamed: 0,775145,203044,307379,98525,586564,785623,173260,657641
s_a_l_t_wave_if_can_sort_location,"(1.7, 0.0, -3.3)","(1.2, 0.0, 6.2)","(0.0, 0.0, 2.0)","(2.7, 0.0, 2.3)","(4.5, 0.0, -2.5)","(4.4, 0.0, -1.2)","(2.7, 0.0, 2.3)","(2.0, 0.0, 7.0)"
s_a_l_t_wave_if_can_sort_command_text,waveIfCan,waveIfCan,waveIfCan,waveIfCan,waveIfCan,waveIfCan,waveIfCan,waveIfCan
s_a_l_t_wave_if_can_patient_id,Helga_10 Root,Lily_4 Root,Gary_3 Root,Mike_5 Root,Helga_10 Root,Gloria_8 Root,Mike_5 Root,Gary_1 Root



<b>TagDiscarded</b>
<ul>
	<li>Type</li>
	<li>Location</li>
</ul>

In [45]:

mask_series = frvrs_logs_df.columns.map(lambda x: str(x).startswith('tag_discarded_'))
columns_list = frvrs_logs_df.columns[mask_series].tolist()
print(columns_list)
mask_series = False
for cn in columns_list: mask_series |= ~frvrs_logs_df[cn].isnull()
frvrs_logs_df[mask_series][columns_list].sample(8).dropna(axis='columns', how='all').T

['tag_discarded_type', 'tag_discarded_location']


Unnamed: 0,97668,224586,166943,31521,766604,181957,147046,725293
tag_discarded_type,gray,black,red,yellow,red,green,black,red
tag_discarded_location,"(-1.4, 0.8, 3.9)","(4.3, 1.3, -2.6)","(2.2, 1.1, -2.4)","(3.2, 0.3, 4.6)","(3.7, 0.2, 4.8)","(0.9, 0.2, 5.5)","(4.3, 1.3, -1.5)","(3.1, 1.1, 5.0)"



<b>Teleport</b>
<ul>
	<li>Location</li>
</ul>

In [47]:

mask_series = frvrs_logs_df.columns.map(lambda x: str(x).startswith('teleport_'))
columns_list = frvrs_logs_df.columns[mask_series].tolist()
print(columns_list)
mask_series = False
for cn in columns_list: mask_series |= ~frvrs_logs_df[cn].isnull()
frvrs_logs_df[mask_series][columns_list].sample(8).dropna(axis='columns', how='all').T

['teleport_location']


Unnamed: 0,198946,196281,718093,643428,828294,727946,201238,41820
teleport_location,"(-0.3, 0.0, 0.5)","(2.8, 0.0, -1.4)","(0.5, 0.0, 7.2)","(0.6, 0.0, 8.4)","(0.7, 0.0, -7.5)","(-1.9, 0.0, 6.7)","(2.0, 0.0, 8.6)","(2.3, 0.0, -3.3)"



<b>ToolDiscarded</b>
<ul>
	<li>Type</li>
	<li>Count</li>
	<li>Location</li>
</ul>

In [48]:

mask_series = frvrs_logs_df.columns.map(lambda x: str(x).startswith('tool_discarded_'))
columns_list = frvrs_logs_df.columns[mask_series].tolist()
print(columns_list)
mask_series = False
for cn in columns_list: mask_series |= ~frvrs_logs_df[cn].isnull()
frvrs_logs_df[mask_series][columns_list].sample(8).dropna(axis='columns', how='all').T

['tool_discarded_type', 'tool_discarded_count', 'tool_discarded_location']


Unnamed: 0,214425,31484,34096,683174,664907,136059,643892,149474
tool_discarded_type,Gauze,Gauze,Needle,Gauze,Gauze,Hemostatic Gauze,Gauze,Needle
tool_discarded_count,998,1000,998,1002,999,998,1002,997
tool_discarded_location,"(3.9, 0.9, -4.7)","(1.4, 0.1, 3.8)","(3.5, 0.3, 3.0)","(3.3, 1.4, -2.8)","(1.3, 0.1, 3.8)","(1.8, 0.4, -3.3)","(2.9, 0.4, 1.9)","(3.5, 0.2, 2.9)"


In [16]:

mask_series = frvrs_logs_df.patient_id.isnull()
patient_set = set(frvrs_logs_df[~mask_series].action_type.unique())
sorted(patient_set - injury_set - location_set)

['PULSE_TAKEN', 'TAG_APPLIED', 'TOOL_APPLIED']


<b>PulseTaken</b>
<ul>
    <li>pulseName</li>
    <li>patientId</li>
</ul>

In [49]:

mask_series = frvrs_logs_df.columns.map(lambda x: str(x).startswith('pulse_taken_'))
columns_list = frvrs_logs_df.columns[mask_series].tolist()
print(columns_list)
mask_series = False
for cn in columns_list: mask_series |= ~frvrs_logs_df[cn].isnull()
frvrs_logs_df[mask_series][columns_list].sample(8).dropna(axis='columns', how='all').T

['pulse_taken_pulse_name', 'pulse_taken_patient_id']


Unnamed: 0,214706,238322,794101,159162,808879,793886,752197,666124
pulse_taken_pulse_name,pulse_fast,pulse_none,pulse_faint,pulse_fast,pulse_fast,pulse_normal,pulse_fast,pulse_faint
pulse_taken_patient_id,Mike_5 Root,Gary_1 Root,Gary_3 Root,Lily_2 Root,Mike_5 Root,Mike_0 Root,Mike_2 Root,Mike_3 Root



<b>TagApplied</b>
<ul>
    <li>patientId</li>
    <li>type</li>
</ul>

In [50]:

mask_series = frvrs_logs_df.columns.map(lambda x: str(x).startswith('tag_applied_'))
columns_list = frvrs_logs_df.columns[mask_series].tolist()
print(columns_list)
mask_series = False
for cn in columns_list: mask_series |= ~frvrs_logs_df[cn].isnull()
frvrs_logs_df[mask_series][columns_list].sample(8).dropna(axis='columns', how='all').T

['tag_applied_patient_id', 'tag_applied_type']


Unnamed: 0,176370,631705,763827,184872,802918,682790,768041,168408
tag_applied_patient_id,Gary_9 Root,Gary_1 Root,Lily_4 Root,Mike_5 Root,Mike_3 Root,Gloria_8 Root,Mike_5 Root,Mike_7 Root
tag_applied_type,yellow,gray,red,red,red,yellow,red,yellow



<b>ToolApplied</b>
<ul>
    <li>patientId</li>
    <li>type</li>
    <li>attachmentPoint</li>
    <li>toolLocation</li>
    <li>data</li>
    <li>sender</li>
    <li>attachMessage</li>
</ul>

In [52]:

mask_series = frvrs_logs_df.columns.map(lambda x: str(x).startswith('tool_applied_'))
columns_list = frvrs_logs_df.columns[mask_series].tolist()
print(columns_list)
mask_series = False
for cn in columns_list: mask_series |= ~frvrs_logs_df[cn].isnull()
frvrs_logs_df[mask_series][columns_list].sample(6).dropna(axis='columns', how='all').T

['tool_applied_type', 'tool_applied_attachment_point', 'tool_applied_tool_location', 'tool_applied_data', 'tool_applied_sender', 'tool_applied_attach_message', 'tool_applied_patient_id']


Unnamed: 0,730562,658026,807135,16122,187519,691339
tool_applied_type,Tourniquet,Gauze_Dressing,Gauze_Pack,Gauze_Dressing,Tourniquet,Gauze_Dressing
tool_applied_attachment_point,RightUpLeg (UnityEngine.GameObject),skinCollider_BodyCollideLOD (UnityEngine.GameO...,skinCollider_Body_LOD (UnityEngine.GameObject),skinCollider_BodyCollideLOD (UnityEngine.GameO...,RightUpLeg (UnityEngine.GameObject),skinCollider_BodyCollideLOD (UnityEngine.GameO...
tool_applied_tool_location,tor_ring (1) (UnityEngine.GameObject),,,,tor_ring (1) (UnityEngine.GameObject),
tool_applied_data,tourniquet(Clone) (UnityEngine.GameObject),,,,tourniquet(Clone) (UnityEngine.GameObject),
tool_applied_sender,AppliedTourniquet,AppliedDressingGauze,AppliedPackingGauze,AppliedDressingGauze,AppliedTourniquet,AppliedDressingGauze
tool_applied_patient_id,Lily_2 Root,Mike_5 Root,Gloria_8 Root,Mike_0 Root,Lily_2 Root,Mike_5 Root



<h2>Data Collection and Preparation</h2>


<p>The SMEs masked the PII from approximately 129 sessions of participant trainees in the form of CSV files by screening all the <em>VOICE_COMMAND</em> and <em>VOICE_CAPTURE</em> lines and replacing any names with either “Max” or “Jane”. These CSVs were then gathered into one data frame.</p>


<p>A <strong>file_name</strong> column was added to record the source, and the third column, eventually known as the <strong>event_time</strong> column, was converted into a datetime column. The first four columns were given names: <strong>action_type</strong>, <strong>action_ticks</strong>, <strong>event_time</strong>, and <strong>session_uuid</strong> in accordance with the global MCIVR metrics types. Next, all instances of <em>SESSION_END</em> and <em>SESSION_START</em> in the <strong>action_type</strong> column were analyzed to group the session UUID into an integer-based <strong>scene_id</strong> column. This allowed the further analysis of action types to replace the numbered columns with columns named according to the non-global MCIVR metrics types.</p>


<p>The <strong>injury_record_injury_treated_with_wrong_treatment</strong>, <strong>injury_record_injury_treated</strong>, <strong>injury_treated_injury_treated_with_wrong_treatment</strong>, and <strong>injury_treated_injury_treated</strong> were converted into Booleans.</p>


<p>A <strong>patient_id</strong> column was added to modalize the values of the <strong>patient_demoted_id</strong>, <strong>patient_record_id</strong>, <strong>injury_record_patient_id</strong>, <strong>s_a_l_t_walk_if_can_patient_id</strong>, <strong>s_a_l_t_walked_patient_id</strong>, <strong>s_a_l_t_wave_if_can_patient_id</strong>, <strong>s_a_l_t_waved_patient_id</strong>, <strong>patient_engaged_id</strong>, <strong>pulse_taken_patient_id</strong>, <strong>injury_treated_patient_id</strong>, <strong>tool_applied_patient_id</strong>, and <strong>tag_applied_patient_id</strong> columns.</p>


<p>A <strong>location_id</strong> column was also added to modalize the various location columns (<strong>teleport_location</strong>, <strong>patient_demoted_position</strong>, <strong>patient_record_position</strong>, <strong>injury_record_injury_injury_locator</strong>, <strong>s_a_l_t_walk_if_can_sort_location</strong>, <strong>s_a_l_t_walked_sort_location</strong>, <strong>s_a_l_t_wave_if_can_sort_location</strong>, <strong>s_a_l_t_waved_sort_location</strong>, <strong>patient_engaged_position</strong>, <strong>bag_access_location</strong>,<strong>injury_treated_injury_injury_locator</strong>, <strong>bag_closed_location</strong>, <strong>tag_discarded_location</strong>, and <strong>tool_discarded_location</strong>).</p>


<p>An <strong>injury_id</strong> column was also added to modalize the various injury columns (<strong>injury_record_id</strong>, and <strong>injury_treated_id</strong>).</p>


<p>A <strong>patient_sort</strong> column was also added to modalize the various SORT columns (<strong>patient_demoted_sort</strong>, <strong>patient_record_sort</strong>, and <strong>patient_engaged_sort</strong>).</p>


Any runs longer than that 16 minutes are probably an instance of someone taking off the headset and setting it on the ground (or some other technicality aborting the scene) so added an <strong>is_scene_aborted</strong> column.


In order to cull a sample with a clear count of responders, we also added the <strong>scene_type</strong> and <strong>is_a_one_triage_file</strong> columns.


<p>Also, it was assumed that the manual PII masking might not be thorough enough, so the <strong>voice_command_command_description</strong> and <strong>voice_capture_message</strong> columns were remasked using the en_core_web_sm NLP library.</p>


Finally, a voice capture sentiment score was added in the <strong>voice_capture_sentiment_score</strong> column.


<h2>Data Analysis Methods</h2>
<p>Numerous dataset building, exploratory, maintenance, metrics development, and visualization functions have been developed from the process of<ul><li>merely examining edge cases and aggregates by sorting and grouping and looking at the head, tail, or value counts,</li><li>extensive interviews with the SMEs, and</li><li>Affinity Analysis and Association Rule Learning.</li></ul></p>
<p>The main software used for the analysis is Pandas. Pandas is an open-source library in Python for working with relational or labeled data. Pandas allows us to analyze the data and make conclusions based on statistical theories. We use Pandas to clean any messy datasets and make them readable and relevant. It provides various data structures (Series, DataFrame, TimeDelta, etc.) and operations (sort_values, tail, dropna, groupby, etc.) for manipulating numerical and time series data.</p>
<p>Another important library used for the analysis is Mlxtend. Mlxtend is a Python library that provides a variety of tools for market basket analysis, machine learning, and visualization. Mlxtend gives us our apriori and association_rules libraries, along with a TransactionEncoder for one hot encoding.</p>


## Sample code to mask PII

In [16]:

# Mask voice capture PII. OSU screened all of the **VOICE_COMMAND** and **VOICE_CAPTURE** lines and
# replaced any names with either Max or Jane, regardless of whether the name was that of the responder.
# But, just to make sure...
columns_list = ['voice_command_command_description', 'voice_capture_message']
if not frvrs_logs_df[columns_list].applymap(lambda x: '[PERSON]' in str(x), na_action='ignore').sum().sum():
    import spacy
    try: nlp = spacy.load('en_core_web_sm')
    except OSError as e:
        print(str(e).strip())
        command_str = f'{sys.executable} -m spacy download en_core_web_sm --quiet'
        print(command_str)
        !{command_str}
        nlp = spacy.load('en_core_web_sm')
    import en_core_web_sm
    nlp = en_core_web_sm.load()
    
    mask_series = frvrs_logs_df.voice_command_command_description.isnull() & frvrs_logs_df.voice_capture_message.isnull()
    df = frvrs_logs_df[~mask_series]
    def mask_pii(srs):
        for idx in columns_list:
            new_text = srs[idx]
            if str(new_text) != 'nan':
                doc = nlp(new_text)
                for entity in doc.ents:
                    if entity.label_ == 'PERSON': new_text = re.sub('\\b' + entity.text + '\\b', '[PERSON]', new_text)
                srs[idx] = new_text
    
        return srs
    
    for row_index, row_series in df.apply(mask_pii, axis='columns')[columns_list].iterrows():
        for column_name, column_value in row_series.items():
            if str(column_value) != 'nan': frvrs_logs_df.loc[row_index, column_name] = column_value
    
    # Store the results and show the new data frame shape
    nu.store_objects(frvrs_logs_df=frvrs_logs_df)
    print(frvrs_logs_df.shape) # (829116, 113)

Pickling to C:\Users\DaveBabbitt\Documents\GitHub\itm-analysis-reporting\saves\pkl\frvrs_logs_df.pkl
(829116, 113)



## Sample pre-processing code to combine raw data logs and prepare data for analysis

In [3]:

# Get all logs into one data frame
pickle_loaded = False
if nu.pickle_exists('frvrs_logs_df'):
    try:
        frvrs_logs_df = nu.load_object('frvrs_logs_df')
        pickle_loaded = True
    except AttributeError as e: print(str(e).strip())
if (not pickle_loaded) and nu.csv_exists('frvrs_logs_df', folder_path=nu.saves_folder):
    frvrs_logs_df = nu.load_csv(csv_name='frvrs_logs_df', folder_path=nu.saves_folder)
    pickle_loaded = True
if not pickle_loaded:
    
    # Add the CSVs to the data frame
    frvrs_logs_df = fu.concatonate_logs()
    
    # Remove numerically-named columns
    columns_list = [x for x in frvrs_logs_df.columns if not re.search(r'\d+', str(x))]
    frvrs_logs_df = frvrs_logs_df[columns_list]
    
    # Convert 'TRUE' and 'FALSE' to boolean values
    for cn in [
        'injury_record_injury_treated_with_wrong_treatment', 'injury_record_injury_treated',
        'injury_treated_injury_treated_with_wrong_treatment', 'injury_treated_injury_treated'
    ]: frvrs_logs_df[cn] = frvrs_logs_df[cn].map({'TRUE': True, 'FALSE': False, 'True': True, 'False': False})
    
    nu.store_objects(frvrs_logs_df=frvrs_logs_df)
print(frvrs_logs_df.shape) # (829116, 106)
columns_list = [cn for cn in frvrs_logs_df.columns if 'appl' in cn]
mask_series = False
for cn in columns_list: mask_series |= ~frvrs_logs_df[cn].isnull()
df = frvrs_logs_df[mask_series][columns_list]
display(df.sample(min(4, df.shape[0])).dropna(axis='columns', how='all').T)

(829116, 114)


Unnamed: 0,639076,112344,61512,154353
tool_applied_type,,Gauze_Dressing,Tourniquet,
tool_applied_attachment_point,,skinCollider_BodyCollideLOD (UnityEngine.GameO...,LeftUpLeg (UnityEngine.GameObject),
tool_applied_tool_location,,,tor_ring (1) (UnityEngine.GameObject),
tool_applied_data,,,tourniquet(Clone) (UnityEngine.GameObject),
tool_applied_sender,,AppliedDressingGauze,AppliedTourniquet,
tag_applied_patient_id,Gary_1 Root,,,Gary_5 Root
tag_applied_type,gray,,,yellow
tool_applied_patient_id,,Mike_5 Root,Mike_1 Root,



### Check for duplicate file ingestion

In [4]:

# Filter all the rows that have more than one unique value in the file_name column for each value in the session_uuid column
mask_series = (frvrs_logs_df.groupby('session_uuid').file_name.transform(pd.Series.nunique) > 1)
assert frvrs_logs_df[mask_series].shape[0] == 0, "You have duplicate files"
# columns_list = ['session_uuid', 'file_name']
# for (session_uuid, file_name), df in frvrs_logs_df[mask_series][columns_list].drop_duplicates().sort_values(columns_list).groupby(columns_list):
#     if not file_name.startswith('Double runs removed/'):
#         file_path = osp.join(fu.data_logs_folder, *file_name.split('/'))
#         os.remove(file_path)


### Add new features according to your increasing domain knowledge

In [5]:

# Modalize into one patient ID column if possible
new_column_name = 'patient_id'
if (new_column_name not in frvrs_logs_df.columns):
    columns_list= [
        'patient_demoted_id', 'patient_record_id', 'injury_record_patient_id', 's_a_l_t_walk_if_can_patient_id',
        's_a_l_t_walked_patient_id', 's_a_l_t_wave_if_can_patient_id', 's_a_l_t_waved_patient_id', 'patient_engaged_id',
        'pulse_taken_patient_id', 'injury_treated_patient_id', 'tool_applied_patient_id', 'tag_applied_patient_id',
        'player_gaze_patient_id'
    ]
    frvrs_logs_df = nu.modalize_columns(frvrs_logs_df, columns_list, new_column_name)
    
    # Store the results and show the new data frame shape
    nu.store_objects(frvrs_logs_df=frvrs_logs_df)
    print(frvrs_logs_df.shape) # (829116, 107)

Pickling to C:\Users\DaveBabbitt\Documents\GitHub\itm-analysis-reporting\saves\pkl\frvrs_logs_df.pkl
(829116, 107)


In [6]:

# Modalize into one location ID column if possible
new_column_name = 'location_id'
if (new_column_name not in frvrs_logs_df.columns):
    columns_list= [
        'teleport_location', 'patient_demoted_position', 'patient_record_position', 'injury_record_injury_injury_locator',
        's_a_l_t_walk_if_can_sort_location', 's_a_l_t_walked_sort_location', 's_a_l_t_wave_if_can_sort_location',
        's_a_l_t_waved_sort_location', 'patient_engaged_position', 'bag_access_location', 'injury_treated_injury_injury_locator',
        'bag_closed_location', 'tag_discarded_location', 'tool_discarded_location', 'player_location_location',
        'player_gaze_location'
    ]
    frvrs_logs_df = nu.modalize_columns(frvrs_logs_df, columns_list, new_column_name)
    
    # Store the results and show the new data frame shape
    nu.store_objects(frvrs_logs_df=frvrs_logs_df)
    print(frvrs_logs_df.shape) # (829116, 108)

Pickling to C:\Users\DaveBabbitt\Documents\GitHub\itm-analysis-reporting\saves\pkl\frvrs_logs_df.pkl
(829116, 108)


In [7]:

# Modalize into one injury ID column if possible
new_column_name = 'injury_id'
if (new_column_name not in frvrs_logs_df.columns):
    frvrs_logs_df = nu.modalize_columns(frvrs_logs_df, ['injury_record_id', 'injury_treated_id'], new_column_name)
    
    # Store the results and show the new data frame shape
    nu.store_objects(frvrs_logs_df=frvrs_logs_df)
    print(frvrs_logs_df.shape) # (829116, 109)

Pickling to C:\Users\DaveBabbitt\Documents\GitHub\itm-analysis-reporting\saves\pkl\frvrs_logs_df.pkl
(829116, 109)


In [8]:

# Modalize into one patient sort column if possible
new_column_name = 'patient_sort'
if (new_column_name not in frvrs_logs_df.columns):
    frvrs_logs_df = nu.modalize_columns(frvrs_logs_df, ['patient_demoted_sort', 'patient_record_sort', 'patient_engaged_sort'], new_column_name)
    
    # Store the results and show the new data frame shape
    nu.store_objects(frvrs_logs_df=frvrs_logs_df)
    print(frvrs_logs_df.shape) # (829116, 110)

Pickling to C:\Users\DaveBabbitt\Documents\GitHub\itm-analysis-reporting\saves\pkl\frvrs_logs_df.pkl
(829116, 110)


In [9]:

# Any runs longer than that 16 minutes are probably an instance
# of someone taking off the headset and setting it on the ground.
# 1 second = 1,000 milliseconds; 1 minute = 60 seconds
new_column_name = 'is_scene_aborted'
if (new_column_name in frvrs_logs_df.columns): frvrs_logs_df = frvrs_logs_df.drop(columns=new_column_name)
if (new_column_name not in frvrs_logs_df.columns):
    frvrs_logs_df[new_column_name] = False
    for (session_uuid, scene_id), scene_df in frvrs_logs_df.groupby(fu.scene_groupby_columns):
        mask_series = True
        for cn in fu.scene_groupby_columns: mask_series &= (frvrs_logs_df[cn] == eval(cn))
        frvrs_logs_df.loc[mask_series, new_column_name] = fu.get_is_scene_aborted(scene_df)
    
    # Store the results and show the new data frame shape
    nu.store_objects(frvrs_logs_df=frvrs_logs_df)
    print(frvrs_logs_df.shape) # (829116, 111)
    display(frvrs_logs_df.groupby('is_scene_aborted').size().to_frame().rename(columns={0: 'count'}))

Pickling to C:\Users\DaveBabbitt\Documents\GitHub\itm-analysis-reporting\saves\pkl\frvrs_logs_df.pkl
(829116, 111)


Unnamed: 0_level_0,count
is_scene_aborted,Unnamed: 1_level_1
False,829116


In [10]:

# Check if all the patient IDs in any run are some variant of Mike and designate those runs as "Orientation"
if ('scene_type' not in frvrs_logs_df.columns): frvrs_logs_df['scene_type'] = 'Triage'
column_value = 'Orientation'
if (column_value not in frvrs_logs_df.scene_type.unique()):
    
    # Filter out those files from the dataset and mark them
    base_mask_series = frvrs_logs_df.groupby(fu.scene_groupby_columns).patient_id.transform(lambda srs: all(srs.str.lower().str.contains('mike')))
    frvrs_logs_df.loc[base_mask_series, 'scene_type'] = column_value
    
    # Store the results and show the new data frame shape
    nu.store_objects(frvrs_logs_df=frvrs_logs_df)
    print(frvrs_logs_df.shape) # (829116, 112)

Pickling to C:\Users\DaveBabbitt\Documents\GitHub\itm-analysis-reporting\saves\pkl\frvrs_logs_df.pkl
(829116, 112)


In [11]:

# Get a sample with a clear count of responders
new_column_name = 'is_a_one_triage_file'
if (new_column_name not in frvrs_logs_df.columns):
    frvrs_logs_df[new_column_name] = False
    for file_name in frvrs_logs_df.file_name.unique():
        is_a_one_triage_file = fu.get_is_a_one_triage_file(frvrs_logs_df, file_name)
        mask_series = (frvrs_logs_df.file_name == file_name)
        frvrs_logs_df.loc[mask_series, new_column_name] = is_a_one_triage_file
    
    nu.store_objects(frvrs_logs_df=frvrs_logs_df)
    print(frvrs_logs_df.shape) # (829116, 113)

Pickling to C:\Users\DaveBabbitt\Documents\GitHub\itm-analysis-reporting\saves\pkl\frvrs_logs_df.pkl
(829116, 113)


In [18]:

# Add a voice capture sentiment score
if ('voice_capture_sentiment_score' not in frvrs_logs_df.columns):
    from nltk.sentiment.vader import SentimentIntensityAnalyzer
    sid = SentimentIntensityAnalyzer()
    mask_series = frvrs_logs_df.voice_capture_message.isnull()
    for row_index, row_series in frvrs_logs_df[~mask_series].iterrows():
        voice_capture_message = '\n' + row_series.voice_capture_message
        frvrs_logs_df.loc[row_index, 'voice_capture_sentiment_score'] = sid.polarity_scores(voice_capture_message)['compound']
    
    # Store the results and show the new data frame shape
    nu.store_objects(frvrs_logs_df=frvrs_logs_df)
    print(frvrs_logs_df.shape) # (829116, 114)

Pickling to C:\Users\DaveBabbitt\Documents\GitHub\itm-analysis-reporting\saves\pkl\frvrs_logs_df.pkl
(829116, 114)


In [19]:

nu.save_data_frames(frvrs_logs_df=frvrs_logs_df)

Saving to C:\Users\DaveBabbitt\Documents\GitHub\itm-analysis-reporting\saves\csv\frvrs_logs_df.csv



## Sample code to answer a couple of research questions


Preliminary Research Questions to consider for OSU Subway Bombing Data Set
<ol type="1" start="1">
    <li>
        Basic Questions
        <ol type="A" start="1">
            <li>Time To Hemorrhage Control for all responders</li>
            <li>Triage Efficiency (how long it take to triage the scene)</li>
            <li>Triage Accuracy (how many patients did they get right and do they do it in the right order; ie, Still or obvious life threat first (Red, Gray, Black), then wavers, then walkers last)</li>
            <li>Subgroup analyses for each of the 3 items above: Medical Students, Residents, Physicians/Faculty/Fellows, First Responders</li>
        </ol>
    </li>
    <li>
        Efficiency and accuracy in triage of mass casualty
        <ol type="A" start="1">
            <li>Distance travelled and efficiency vs accuracy (ie in looking at the maps, does a small footprint correlate to efficient performance). Do novice or lower level learners go back to the same patients multiple times?</li>
            <li>Future: correlation between gaze and intent.  If learner sees bleeding, do they treat it.</li>
        </ol>
    </li>
    <li>
        Determination of triage decisiveness, hover or bag open as an indicator (ie does a novice learner (medical student) open the bag or hover more than a more experienced learner (physician).
    </li>
    <li>
        Are all mass casualty tasks similar in difficulty (wound packing, needle decompression, tourniquet application, triage tag determination) based on time spent.
    </li>
    <li>
        Additional Metrics
        <ol type="A" start="1">
            <li>
                Performance
                <ol type="a" start="1">
                    <li>Number of patients engaged (this could be including the ones that wave, walk, put hands up)</li>
                    <li>Number of patients treated</li>
                    <li>Time to first treatment</li>
                    <li>Patient accuracy rate (how many patients correct / number of patients treated)</li>
                    <li>Number of pulses taken by number of patients treated</li>
                    <li>Treatment placement error (how far is treatment applied to location of wound. Zero would correspond to placing the treatment exactly on the wound)</li>
                </ol>
            </li>
            <li>
                Scene Efficiency
                <ol type="a" start="1">
                    <li>How many patients are treated between walk / wave commands</li>
                    <li>Total number of teleports</li>
                    <li>Total user actions taken</li>
                    <li>Average time between user actions</li>
                    <li>Interactions per patient</li>
                </ol>
            </li>
            <li>
                Communication / Interaction
                <ol type="a" start="1">
                    <li>How many times are walk / wave command issued</li>
                    <li>What is the average time between walk / wave commands</li>
                    <li>Number of voice captures per session (would be interesting to see if this correlates with anything)</li>
                    <li>Voice capture content (themes of what they are saying)</li>
                </ol>
            </li>
        </ol>
    </li>
    <li>
        Conditions to Analyze by
        <ol type="A" start="1">
            <li>Analyze results by which triage method deployed (SALT, START) if that can be distinguished</li>
        </ol>
    </li>
    <li>
        Questions
        <ol type="A" start="1">
            <li>Is low time to first treatment associated with higher performance?</li>
            <li>Are those that talk more, less efficient, lower performance?</li>
            <li>Are those that have lower average time between actions, higher in performance?</li>
        </ol>
    </li>
</ol>

In [4]:

# Estimating the time to hemorrhage control
if nu.pickle_exists('notice_to_control_time_df'):
    notice_to_control_time_df = nu.load_object('notice_to_control_time_df')
else:
    
    # Create a mask to filter rows where 'injury_treated_required_procedure' is 'tourniquet' or 'woundpack'
    mask_series = frvrs_logs_df.injury_treated_required_procedure.isin(['tourniquet', 'woundpack'])
    
    # Define columns for grouping
    
    
    # Group the DataFrame based on the defined columns and filter by the mask
    gb = frvrs_logs_df[mask_series].sort_values(['action_tick']).groupby(fu.patient_groupby_columns)
    
    # Create an empty list to store the row dictionaries
    rows_list = []
    
    # Iterate over the groupby object
    for (session_uuid, scene_id, patient_id), controlled_bleeder_df in gb:
    
        # Create a dictionary to store the row data
        row_dict = {}
        
        # Add the groupby columns, logger version and scene categories
        for cn in fu.patient_groupby_columns: row_dict[cn] = eval(cn)
        logger_version = fu.get_logger_version(controlled_bleeder_df)
        row_dict['logger_version'] = logger_version
        is_scene_aborted = fu.get_is_scene_aborted(controlled_bleeder_df)
        row_dict['is_scene_aborted'] = is_scene_aborted
        scene_type = fu.get_scene_type(controlled_bleeder_df)
        row_dict['scene_type'] = scene_type
        
        # Get the first time to hemorrhage control
        first_control = controlled_bleeder_df.action_tick.min()
        row_dict['first_control'] = first_control
    
        # Get the patient's entire history and first notice
        mask_series = True
        for cn in fu.patient_groupby_columns: mask_series &= (frvrs_logs_df[cn] == eval(cn))
        patient_df = frvrs_logs_df[mask_series]
        first_notice = fu.get_first_patient_interaction(patient_df)
        row_dict['first_notice'] = first_notice
        
        # Get the body region info
        mask_series = ~patient_df.injury_treated_body_region.isnull()
        if mask_series.any():
            injury_treated_body_region = patient_df[mask_series].injury_treated_body_region.squeeze()
            row_dict['injury_treated_body_region'] = injury_treated_body_region
    
        # Calculate the time from notice to control
        row_dict['notice_to_control_time'] = first_control - first_notice
    
        # Get the first action with the patient
        first_action = fu.get_first_patient_interaction(patient_df)
        row_dict['first_action'] = first_action
    
        # Calculate the time from action to control
        row_dict['action_to_control_time'] = first_control - first_action
    
        # Add the row dictionary to the list
        rows_list.append(row_dict)
    
    # Create a data frame from the list of row dictionaries
    notice_to_control_time_df = DataFrame(rows_list)
    nu.store_objects(notice_to_control_time_df=notice_to_control_time_df)

In [4]:

# Get the time that patients are engaged
if nu.pickle_exists('still_patients_df'):
    still_patients_df = nu.load_object('still_patients_df')
else:
    
    # Group the data by session and scene
    gb = frvrs_logs_df.sort_values(['action_tick']).groupby(fu.scene_groupby_columns)
    
    # Get a list of triage SORT columns
    sort_columns_list = [cn for cn in frvrs_logs_df.columns if cn.endswith('_sort')]
    
    # Iterate over each run to get the data rows
    rows_list = []
    for (session_uuid, scene_id), scene_df in gb:
    
        # Get the logger version and scene categories
        logger_version = fu.get_logger_version(scene_df)
        is_scene_aborted = fu.get_is_scene_aborted(scene_df)
        scene_type = fu.get_scene_type(scene_df)
        
        # Get list of patients in a scene that are still
        mask_series = False
        for cn in sort_columns_list: mask_series |= (scene_df[cn] == 'still')
        still_list = scene_df[mask_series].patient_id.unique().tolist()
    
        # Get the list of first engagements with all still patients
        for patient_id in still_list:
            row_dict = {}

            # Add scene category info
            row_dict['logger_version'] = logger_version
            for cn in fu.scene_groupby_columns: row_dict[cn] = eval(cn)
            row_dict['is_scene_aborted'] = is_scene_aborted
            row_dict['scene_type'] = scene_type
            
            # Get the patient's first notice
            for cn in fu.patient_groupby_columns: row_dict[cn] = eval(cn)
            patient_mask_series = (scene_df.patient_id == patient_id)
            first_notice = fu.get_first_patient_interaction(scene_df[patient_mask_series])
            row_dict['first_notice'] = first_notice
            
            # Get the first action with the patient
            first_action = fu.get_first_patient_interaction(scene_df[patient_mask_series])
            row_dict['first_action'] = first_action
            
            # Get the patient's first engagement
            mask_series = patient_mask_series & (scene_df.action_type == 'PATIENT_ENGAGED')
            df2 = scene_df[mask_series]
            if df2.shape[0]:
                first_engagement = df2.action_tick.min()
                notice_to_engagement = first_engagement - first_notice
                action_to_engagement = first_engagement - first_action
            else:
                first_engagement = np.nan
                notice_to_engagement = np.nan
                action_to_engagement = np.nan
            row_dict['first_engagement'] = first_engagement
            row_dict['notice_to_engagement'] = notice_to_engagement
            row_dict['action_to_engagement'] = action_to_engagement
            
            rows_list.append(row_dict)
    still_patients_df = DataFrame(rows_list)
    nu.store_objects(still_patients_df=still_patients_df)

In [15]:

if nu.pickle_exists('patient_count_df'):
    patient_count_df = nu.load_object('patient_count_df')
else:
    
    # Create a list of the columns to group by
    
    
    # Group the frvrs_logs_df dataframe by the groupby columns
    gb = frvrs_logs_df.sort_values(['action_tick']).groupby(fu.patient_groupby_columns)
    
    # Initialize an empty list to store rows
    rows_list = []
    
    # Iterate over each patient in each scene in each file
    for (session_uuid, scene_id, patient_id), patient_df in gb:
        
        # Get the logger version and the scene categories to the row dictionary
        logger_version = fu.get_logger_version(patient_df)
        is_scene_aborted = fu.get_is_scene_aborted(patient_df)
        scene_type = fu.get_scene_type(patient_df)
    
        # Iterate over the possible values for the two columns
        for wrong in [True, False]:
            for eventually in [True, False]:
                
                # Create a dictionary to store the results for the current group
                row_dict = {}
                
                # Populate the row_dict with values from the groupby columns
                for cn in fu.patient_groupby_columns: row_dict[cn] = eval(cn)
                
                # Add the logger version to the row dictionary
                row_dict['logger_version'] = logger_version
                row_dict['is_scene_aborted'] = is_scene_aborted
                row_dict['scene_type'] = scene_type
                
                # Add the current values of the two columns to the row dictionary
                row_dict['injury_treated_injury_treated_with_wrong_treatment'] = wrong
                row_dict['injury_treated_injury_treated'] = eventually
    
                # Initialize a filter rows to add conditions via a loop
                mask_series = True
    
                # Add conditions for each column in groupby columns
                for cn in fu.patient_groupby_columns: mask_series &= (frvrs_logs_df[cn] == eval(cn))
                
                # Add the two columns to the mask series
                mask_series &= (frvrs_logs_df.injury_treated_injury_treated_with_wrong_treatment == wrong)
                mask_series &= (frvrs_logs_df.injury_treated_injury_treated == eventually)
    
                # Calculate the unique patient count for the filtered rows
                row_dict['patient_count'] = fu.get_patient_count(frvrs_logs_df[mask_series])
    
                # Add the row dictionary to the rows list
                rows_list.append(row_dict)
    
    # Create a DataFrame from the list of rows
    patient_count_df = DataFrame(rows_list)
    nu.store_objects(patient_count_df=patient_count_df)

Pickling to C:\Users\DaveBabbitt\Documents\GitHub\itm-analysis-reporting\saves\pkl\patient_count_df.pkl
