In [1]:

%pprint
import sys
if ('../py' not in sys.path): sys.path.insert(1, '../py')

Pretty printing has been turned OFF


In [15]:

from FRVRS import nu, fu
from pandas import DataFrame, Index, Series, concat
import math
import matplotlib.pyplot as plt
import numpy as np
import os
import os.path as osp
import pandas as pd
import re
import seaborn as sns
import statsmodels.api as sm

In [3]:

# load data frames
data_frames_list = nu.load_data_frames(frvrs_logs_df='frvrs_logs_df', category_history_df='category_history_df')
frvrs_logs_df = data_frames_list['frvrs_logs_df']
print(frvrs_logs_df.shape) # (829116, 125)

Attempting to load /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/frvrs_logs_df.pkl.
Attempting to load /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/category_history_df.pkl.
(829116, 125)


In [7]:

input_features = [
    'injury_id', 'injury_severity', 'injury_required_procedure', 'patient_salt', 'patient_sort', 'patient_pulse', 'patient_breath',
    'patient_hearing', 'patient_mood', 'patient_pose'
    ]

In [39]:

ml_columns = ['lr_triage_priority_model_prediction', 'dtr_triage_priority_model_prediction']
stats_column_name = 'stats_type'
stats_columns_list = fu.scene_groupby_columns + ['patient_id', stats_column_name] + ml_columns
scene_stats_df = DataFrame([], columns=stats_columns_list)
for (session_uuid, scene_id), scene_df in frvrs_logs_df.groupby(fu.scene_groupby_columns):

    # Create the patient sort tuples list
    engagement_starts_list = []
    patient_stats_df = DataFrame([], columns=stats_columns_list)
    for patient_id, patient_df in scene_df.groupby('patient_id'):

        # Get the cluster ID, if available
        mask_series = ~patient_df.patient_sort.isnull()
        patient_sort = (
            patient_df[mask_series].sort_values('action_tick').iloc[-1].patient_sort
            if mask_series.any()
            else None
        )

        # Check if the responder even interacted with this patient
        mask_series = patient_df.action_type.isin(fu.responder_negotiations_list)
        if mask_series.any():

            # Get the list of engagements as action ticks
            engagements_list = patient_df[mask_series].action_tick
            mask_series = patient_df.action_tick.isin(engagements_list) & ~patient_df.location_id.isnull()
            if mask_series.any():
                df = patient_df[mask_series].sort_values('action_tick')

                # Get the first engagement start and location
                engagement_start = df.iloc[0].action_tick
                engagement_location = eval(df.iloc[0].location_id) # Evaluate string to get tuple
                location_tuple = (engagement_location[0], engagement_location[2])

                # Add engagement information to the list
                engagement_tuple = (patient_id, engagement_start, location_tuple, patient_sort)
                engagement_starts_list.append(engagement_tuple)
                
                # Get the patient stats
                df = nu.get_statistics(patient_df, ml_columns).reset_index(drop=False).rename(columns={'index': stats_column_name})
                for cn in fu.scene_groupby_columns: df[cn] = eval(cn)
                df['patient_id'] = patient_id
                df = df[stats_columns_list]
                
                # Append the data frame for the current patient to the stats data frame
                patient_stats_df = concat([patient_stats_df, df], axis='index').reset_index(drop=True)

    # Sort the starts list chronologically
    actual_engagement_order = sorted(engagement_starts_list, key=lambda x: x[1], reverse=False)
    engagement_starts_dict = {patient_id: engagement_start for patient_id, engagement_start, location_tuple, patient_sort in actual_engagement_order}
    
    # Calculate the R-squared adjusted value as a measure of derived ordering
    y = Series([t[1] for t in actual_engagement_order]).values.reshape(-1, 1)
    max_measure = -2
    max_stats_type = 'stats_type'
    max_ml_column = 'ml_column'
    max_ascending = 'ascending'
    for stats_type in patient_stats_df.stats_type.unique():
        mask_series = (patient_stats_df.stats_type == stats_type)
        for ml_column in ml_columns:
            for ascending in [True, False]:
                derived_engagement_order = patient_stats_df[mask_series].sort_values(ml_column, ascending=ascending).patient_id.tolist()
                X = Series([engagement_starts_dict[patient_id] for patient_id in derived_engagement_order]).values.reshape(-1, 1)
                if X.shape[0]:
                    X1 = sm.add_constant(X)
                    try:
                        measure_of_derived_ordering = sm.OLS(y, X1).fit().rsquared_adj
                        if (max_measure < measure_of_derived_ordering):
                            max_measure = measure_of_derived_ordering
                            max_stats_type = stats_type
                            max_ml_column = ml_column
                            max_ascending = ascending
                    except: continue
    patient_stats_df['max_measure'] = max_measure
    patient_stats_df['max_stats_type'] = max_stats_type
    patient_stats_df['max_ml_column'] = max_ml_column
    patient_stats_df['max_ascending'] = max_ascending
    
    # Append the data frame for the current patient to the stats data frame
    scene_stats_df = concat([scene_stats_df, patient_stats_df], axis='index').reset_index(drop=True)

In [51]:

mask_series = (scene_stats_df.max_measure == 1.0)
display(scene_stats_df[mask_series].max_stats_type.value_counts())

mean    837
25%     432
SD      378
max     261
mode    171
min      45
75%      36
Name: max_stats_type, dtype: int64

In [57]:

mask_series = (scene_stats_df.max_measure == 1.0) & (scene_stats_df.max_stats_type == 'mean')
display(scene_stats_df[mask_series].max_ml_column.value_counts())

dtr_triage_priority_model_prediction    630
lr_triage_priority_model_prediction     207
Name: max_ml_column, dtype: int64

In [58]:

mask_series = (scene_stats_df.max_measure == 1.0) & (scene_stats_df.max_stats_type == 'mean') & (scene_stats_df.max_ml_column == 'dtr_triage_priority_model_prediction')
display(scene_stats_df[mask_series].max_ascending.value_counts())

True     522
False    108
Name: max_ascending, dtype: int64

In [61]:

mask_series = (scene_stats_df.max_measure == 1.0) & (scene_stats_df.max_stats_type == 'mean') & (scene_stats_df.max_ml_column == 'dtr_triage_priority_model_prediction')
mask_series &= scene_stats_df.max_ascending
columns_list = [
    'patient_id', 'stats_type', 'lr_triage_priority_model_prediction', 'dtr_triage_priority_model_prediction'
]
display(scene_stats_df[mask_series][columns_list])

Unnamed: 0,patient_id,stats_type,lr_triage_priority_model_prediction,dtr_triage_priority_model_prediction
5868,Mike_0 Root,mean,7.081984,7.375000
5869,Mike_0 Root,mode,8.623898,9.000000
5870,Mike_0 Root,median,8.623898,9.000000
5871,Mike_0 Root,SD,2.075794,2.939736
5872,Mike_0 Root,min,3.926232,2.500000
...,...,...,...,...
41692,Mike_3 Root,min,5.734049,8.000000
41693,Mike_3 Root,25%,6.206855,8.000000
41694,Mike_3 Root,50%,8.623898,9.000000
41695,Mike_3 Root,75%,8.623898,9.000000


In [None]:

mask_series = (patient_stats_df.stats_type == 'mean')
derived_engagement_order = patient_stats_df[mask_series].sort_values('dtr_triage_priority_model_prediction', ascending=True).patient_id.tolist()

In [39]:

for (session_uuid, scene_id), scene_df in frvrs_logs_df.groupby(fu.scene_groupby_columns):

    # Create the patient sort tuples list
    engagement_starts_list = []
    patient_stats_df = DataFrame([], columns=stats_columns_list)
    for patient_id, patient_df in scene_df.groupby('patient_id'):

        # Get the cluster ID, if available
        mask_series = ~patient_df.patient_sort.isnull()
        patient_sort = (
            patient_df[mask_series].sort_values('action_tick').iloc[-1].patient_sort
            if mask_series.any()
            else None
        )

        # Check if the responder even interacted with this patient
        mask_series = patient_df.action_type.isin(fu.responder_negotiations_list)
        if mask_series.any():

            # Get the list of engagements as action ticks
            engagements_list = patient_df[mask_series].action_tick
            mask_series = patient_df.action_tick.isin(engagements_list) & ~patient_df.location_id.isnull()
            if mask_series.any():
                df = patient_df[mask_series].sort_values('action_tick')

                # Get the first engagement start and location
                engagement_start = df.iloc[0].action_tick
                engagement_location = eval(df.iloc[0].location_id) # Evaluate string to get tuple
                location_tuple = (engagement_location[0], engagement_location[2])

                # Add engagement information to the list
                engagement_tuple = (patient_id, engagement_start, location_tuple, patient_sort)
                engagement_starts_list.append(engagement_tuple)
                
                # Get the patient stats
                df = nu.get_statistics(patient_df, ml_columns).reset_index(drop=False).rename(columns={'index': stats_column_name})
                for cn in fu.scene_groupby_columns: df[cn] = eval(cn)
                df['patient_id'] = patient_id
                df = df[stats_columns_list]
                
                # Append the data frame for the current patient to the stats data frame
                patient_stats_df = concat([patient_stats_df, df], axis='index').reset_index(drop=True)

    # Sort the starts list chronologically
    actual_engagement_order = sorted(engagement_starts_list, key=lambda x: x[1], reverse=False)
    engagement_starts_dict = {patient_id: engagement_start for patient_id, engagement_start, location_tuple, patient_sort in actual_engagement_order}
    
    # Calculate the R-squared adjusted value as a measure of derived ordering
    y = Series([t[1] for t in actual_engagement_order]).values.reshape(-1, 1)
    max_measure = -2
    max_stats_type = 'stats_type'
    max_ml_column = 'ml_column'
    max_ascending = 'ascending'
    for stats_type in patient_stats_df.stats_type.unique():
        mask_series = (patient_stats_df.stats_type == stats_type)
        for ml_column in ml_columns:
            for ascending in [True, False]:
                derived_engagement_order = patient_stats_df[mask_series].sort_values(ml_column, ascending=ascending).patient_id.tolist()
                X = Series([engagement_starts_dict[patient_id] for patient_id in derived_engagement_order]).values.reshape(-1, 1)
                if X.shape[0]:
                    X1 = sm.add_constant(X)
                    try:
                        measure_of_derived_ordering = sm.OLS(y, X1).fit().rsquared_adj
                        if (max_measure < measure_of_derived_ordering):
                            max_measure = measure_of_derived_ordering
                            max_stats_type = stats_type
                            max_ml_column = ml_column
                            max_ascending = ascending
                    except: continue
    patient_stats_df['max_measure'] = max_measure
    patient_stats_df['max_stats_type'] = max_stats_type
    patient_stats_df['max_ml_column'] = max_ml_column
    patient_stats_df['max_ascending'] = max_ascending
    
    # Append the data frame for the current patient to the stats data frame
    scene_stats_df = concat([scene_stats_df, patient_stats_df], axis='index').reset_index(drop=True)