In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer


In [3]:
#load patient data provided by sponsor
df = pd.read_excel('../../docs/MSDS_database_cleaned_deidentified_revised.xlsx', sheet_name='MSDS_database_cleaned_deidentif')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 93 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   id                            120 non-null    int64  
 1   age                           120 non-null    int64  
 2   sex                           120 non-null    object 
 3   bmi                           120 non-null    float64
 4   C7CSVL_preop
                 120 non-null    float64
 5   SVA_preop                     120 non-null    float64
 6   TK_T5_T12_preop               120 non-null    float64
 7   TK_T10_L2_preop               120 non-null    float64
 8   T4PA_preop                    120 non-null    float64
 9   L1PA_preop                    120 non-null    float64
 10  global_tilt                   120 non-null    float64
 11  L1PA_ideal                    120 non-null    float64
 12  T4L1PA_preop                  120 non-null    float64
 13  LL_pr

In [5]:
# Define predictor variables
df_predictors = pd.DataFrame(df, columns = ['age', 'sex', 'PI_preop', 'PT_preop', 'LL_preop', 'SS_preop', 'T4PA_preop', 'L1PA_preop', 
                                            'SVA_preop', 'cobb_main_curve_preop', 'FC_preop', 'tscore_femneck_preop', 'HU_UIV_preop', 
                                            'HU_UIV_preop', 'HU_UIVplus1_preop', 'HU_UIVplus2_preop', 'num_levels'])

df_predictors['sex'] = df_predictors['sex'].map({'Male': 0, 'Female': 1})

#define surgical plan variables
surgical_plan = pd.DataFrame(df, columns = ['num_fused_levels','ALIF','XLIF','TLIF',
                                            'num_rods','num_screws','osteotomy'])

#### 1. Compute bonus points for improvement in GAP score

Bonus points will be given based on the degree of improvement in the patient's spinal condition. For example, a patient's condition that improved from severely disproportioned to proportioned will be awarded the highest bonus point (option 1 below). 

In [7]:
pd.set_option('display.max_rows', None)

#create a column showing the difference between preop and postop gap score
df['gap_change'] = df['gap_score_preop'] - df['gap_score_postop']
#df['gap_change']

#create a score system for gap change based on change in gap category - option 1
df['gap_category_postop'] = pd.cut(df['gap_score_postop'], bins=[-np.inf, 2, 6, 13], labels=['P', 'MD', 'SD'])
#df['gap_category_postop']

gap_category_improvement = []
 
for i in range(len(df)):
    if df['gap_category_postop'][i] =='P' and (df['gap_category'][i] == 'SD' or df['gap_category'][i] == 'MD'):
        gap_category_improvement.append(100)
    elif df['gap_category_postop'][i] =='MD' and df['gap_category'][i] == 'SD':
        gap_category_improvement.append(50)
    else:
        gap_category_improvement.append(0)
        
#create a score system for gap change based on change in gap score - option 2
for i in range(len(df)):
    if df['gap_change'][i] >= 5:
        gap_category_improvement.append(100)
    elif df['gap_change'][i] >= 2:
        gap_category_improvement.append(50)
    else:
        gap_category_improvement.append(0)


#### 2. Calculation of Composite Score

The factors used in computing the composite score are GAP score, L1PA penalty, L4SI penalty, T4L1PA penalty, LL penalty, and improvement in GAP score. The first five are given equal weights, while GAP improvement is weighted at 0.5. 

Calculation method:

GAP: The GAP score (1-13) is converted to a 0-100 scale.

Penalties: Quadratic penalties are calculated for violation of the respective constraints and normalized based on the maximum violation in the dataset. Then the penalties are subtracted from a base score of 100.

Improvement in GAP: A score of 100 is given if the patient's condition improved from severely disproportioned or moderately disproportioned to proportioned. Otherwise, a score of 50 is given if the patient's condition improved by one category. No score is given if the patient's condition remains in the same category or worsens. 

* note : may need to find a better way to normalize

In [None]:

def composite_score_calc(df, w1=1, w2=1, w3=1, w4=1, w5=1, w6=0.5):
    """
    Compute composite score based on gap score and quadartic penalties for constraint violations.

    Parameters:
    df: patient surgical data in Pandas DataFrame
    w1: weight for gap score
    w2: weight for L1PA penalty
    w3: weight for L4S1 penalty
    w4: weight for T4L1PA penalty
    w5: weight for LL penalty
    w6: weight for gap improvement score

   Returns:
   composite scores 
    """
    #calculate relative weights
    total_weight = w1 + w2 + w3 + w4 + w5 + w6
    rel_w1 = w1 / total_weight
    rel_w2 = w2 / total_weight
    rel_w3 = w3 / total_weight
    rel_w4 = w4 / total_weight
    rel_w5 = w5 / total_weight
    rel_w6 = w6 / total_weight

    # two options for GAP penalty: linear penalty or apply different penalty for each category

    # 1) linear penalty - convert gap score to 1-20 scale: 20 - 20/13 * gap_score_postop
    # will use this method for now
    gap_score_linear = []

    for i in df['gap_score_postop']:
        gap_score_linear.append(100 - (100/13) * i)

    #convert to numpy array for easier calculations
    gap_score_linear = np.array(gap_score_linear)

    # 2)on a grade scale of 100, apply different penalty per category (proportioned, moderately disproportioned, severely disproportioned)

    '''gap_score = []
    
    #the 100-50-0 scale can be adjusted as needed
    for i in df['gap_score_postop']:
        if i <= 2:
            gap_score.append(100)
        elif i <= 6:
            gap_score.append(50)
        else:
            gap_score.append(0)
    gap_score = np.array(gap_score)'''


    # L1PA quadratic penalty
    l1pa_pen = []
    for i in df['L1PA_ideal_mismatch_postop']:
        if abs(i) <= 3:
            l1pa_pen.append(0)
        else: 
            l1pa_pen.append((abs(i)-3)**2)
            
    #normalize L1PA penalty 
    max_L1PA_mismatch = []

    
    #(abs(df['L1PA_ideal_mismatch_postop']) - 3).max() - to be modified
    l1pa_pen = np.array(l1pa_pen)  
    l1pa_pen = (l1pa_pen) / (max_L1PA_mismatch **2) * 100

    #compute L1PA score by subtracting penalty from 100
    l1pa_score = 100 - l1pa_pen

    # L4S1 quadratic penalty
    l4s1_pen = []
    for i in df['L4_S1_postop']:
        if i >= 35 and i <= 45:
            l4s1_pen.append(0)

        elif i<35:
            l4s1_pen.append((35-i)**2)

        else:
            l4s1_pen.append((i-45)**2)

    #normalize L4S1 penalty
    normalized_range =[]
    for i in df['L4_S1_postop']:

        if i < 35:
            normalized_range.append(35 - i)
        elif i > 45:
            normalized_range.append(i - 45)
        else:
            normalized_range.append(0)

    max_l4s1_violation = max(normalized_range)

    l4s1_pen = (np.array(l4s1_pen)) / (max_l4s1_violation **2) * 100

    l4s1_score = 100 - l4s1_pen

    
    # T4L1PA penalty
    t4l1pa_pen = []
    for i in df['T4L1PA_ideal_mismatch_postop']:
        if abs(i) <= 3:
            t4l1pa_pen.append(0)
        else:
            t4l1pa_pen.append((abs(i)-3)**2)
    
    #normalize T4L1PA penalty
    max_t4l1pa_mismatch = (abs(df['T4L1PA_ideal_mismatch_postop']) - 3).max()  
    t4l1pa_pen = np.array(t4l1pa_pen)  
    t4l1pa_pen = (t4l1pa_pen) / (max_t4l1pa_mismatch **2) * 100

    t4l1pa_score = 100 - t4l1pa_pen

    
    # LL penalty
    ll_ideal = 0.54 * df['PI_preop'] + 27.6
    ll_mismatch = df['LL_postop'] - ll_ideal
    ll_pen = []
    for i in ll_mismatch:
        if abs(i) <= 3:
            ll_pen.append(0)
        else:
            ll_pen.append((abs(i)-3)**2) 

    #normalize LL penalty
    max_ll_mismatch = (abs(ll_mismatch) - 3).max()  
    ll_pen = (np.array(ll_pen)) / (max_ll_mismatch **2) * 100
    ll_score = 100 - ll_pen

    #gap improvement score based on category change

    gap_category_improvement = []
    for i in range(len(df)):
        if df['gap_category_postop'][i] =='P' and (df['gap_category'][i] == 'SD' or df['gap_category'][i] == 'MD'):
            gap_category_improvement.append(100)
        elif df['gap_category_postop'][i] =='MD' and df['gap_category'][i] == 'SD':
            gap_category_improvement.append(50)
        else:
            gap_category_improvement.append(0)
    gap_score_improvement = np.array(gap_category_improvement)

    # Composite score
    composite = (rel_w1 * gap_score_linear +
                 rel_w2 * l1pa_score +
                 rel_w3 * l4s1_score +
                 rel_w4 * t4l1pa_score +
                 rel_w5 * ll_score +
                 rel_w6 * gap_score_improvement)

    return composite


composite_scores = composite_score_calc(df) #weights can be adjusted here

composite_scores

array([89.74465424, 81.60831412, 87.55295893, 92.74968039, 98.6013986 ,
       83.28100122, 85.0854078 , 83.11373348, 86.53438076, 97.26205587,
       71.17878781, 98.04964561, 98.20620854, 89.27356397, 86.83759553,
       97.91336354, 72.77289376, 81.07065437, 76.4932694 , 83.92216759,
       79.92038552, 39.86562298, 28.09413685, 96.68271654, 99.35148945,
       68.36419619, 80.41718919, 79.60949981, 84.85702654, 73.94501783,
       78.44108043, 98.59270147, 96.55884132, 64.93112067, 75.735941  ,
       87.06310841, 68.75438784, 83.13108081, 71.96922135, 65.85950732,
       81.38883739, 93.5211849 , 67.92878662, 77.29127973, 65.044447  ,
       80.35658492, 82.01429545, 87.7177272 , 73.86300884, 73.16412917,
       74.21624803, 97.37617781, 83.65915247, 78.00434688, 84.39909737,
       86.14074607, 82.52398587, 76.15031119, 69.52238496, 83.27967396,
       82.11599481,         nan, 77.34568014, 93.26642039, 96.85895078,
       84.49527807, 96.314356  , 74.71289161, 76.50938201, 98.59

##### 2.1 Add Composite Scores to Data Frame

In [6]:
#show all rows
pd.set_option('display.max_rows', None)

df['composite_scores'] = round(pd.DataFrame(composite_scores),2)
print(df[['gap_score_postop','composite_scores']])

     gap_score_postop  composite_scores
0                 1.0             91.14
1                 4.0             78.11
2                 4.0             84.06
3                 2.0             95.55
4                 1.0            100.00
5                 1.0             84.68
6                 6.0             84.39
7                 4.0             79.62
8                 6.0             85.84
9                 1.0             98.66
10                7.0             62.79
11                1.0             99.45
12                1.0             99.60
13                1.0             90.67
14                4.0             83.34
15                1.0             99.31
16                4.0             69.28
17                7.0             72.68
18                7.0             68.10
19                3.0             79.03
20                1.0             81.32
21               11.0             37.07
22               10.0             23.90
23                1.0             98.08
