In [1]:
import pandas as pd
import numpy as np


In [2]:
#load cleaned patient data
df = pd.read_csv("../data/cleaned/MSDS_cleaned_0122.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 277 entries, 0 to 276
Data columns (total 98 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   id                            277 non-null    int64  
 1   age                           277 non-null    int64  
 2   sex                           277 non-null    object 
 3   bmi                           250 non-null    float64
 4   Race                          276 non-null    object 
 5   LOS_min                       276 non-null    float64
 6   PROC_TIME_MIN                 276 non-null    float64
 7   SMOKING                       228 non-null    object 
 8   C7CSVL_preop                  274 non-null    float64
 9   SVA_preop                     275 non-null    float64
 10  TK_T4_T12_preop               275 non-null    float64
 11  TK_T10_L2_preop               275 non-null    float64
 12  T4PA_preop                    275 non-null    float64
 13  L1PA_

#### 1. Calculation of Composite Score

The parameters used in computing the composite score are the GAP score, L1PA penalty, L4SI penalty, T4L1PA penalty, LL penalty, and improvement in GAP category (P, MD, SD). The parameters are given equal weights. 

Calculation method:

GAP: The GAP score (1-13) is converted to a 0-100 scale.

Penalties: Quadratic penalties are calculated for violation of the respective constraints and normalized based on the maximum violation in the dataset. Then the (weighted) penalties are added to the composite score.

Improvement in GAP (Option 2 in calculation doc): 

No penalty is applied if the patient's condition improved from severely disproportioned or moderately disproportioned to proportioned. Otherwise, a penalty of 30 is applied if the patient's condition improved from severely disproportioned to moderately disproportioned. A penalty of 100 is added if the patient's condition remains in the same category or worsens. 

* note : may need to find a better way to normalize

In [3]:
#create a score system for gap change based on change in gap category - option 1
# df["gap_category_postop"] = pd.cut(df["gap_score_postop"], bins=[-np.inf, 2, 6, 13], labels=["P", "MD", "SD"])
df["gap_score_postop"] = pd.to_numeric(df["gap_score_postop"], errors="coerce")

def composite_score_calc(df, w1=1, w2=1, w3=1, w4=1, w5=1, w6=1):
    """
    Compute composite score based on gap score and quadartic penalties for constraint violations.

    Parameters:
    df: patient surgical data in Pandas DataFrame
    w1: weight for L1PA penalty
    w2: weight for L4S1 penalty
    w3: weight for T4L1PA penalty
    w4: weight for LL penalty
    w5: weight for gap improvement score
    multiplier : scaling factor for penalties (since gap score is on a different scale)

   Returns:
   composite scores 
    """
    #calculate relative weights
    weights = [w1, w2, w3, w4, w5, w6]
    total_weight = sum(weights)
    rel_weights = [w / total_weight for w in weights]

    # convert gap scores to numpy array for simpler calculations
    gap_score_postop = np.array(df["gap_score_postop"] * 100/13)
   
    # calculate constraint penalties

    # 1) L1PA quadratic penalty
    l1pa_pen = [0 if abs(i) <= 3 else (abs(i)-3)**2 for i in df["L1PA_ideal_mismatch_postop"]]

    #normalize L1PA penalty
    l1pa_pen = np.array(l1pa_pen) / max(l1pa_pen) * 100

    # 2) L4S1 quadratic penalty
    l4s1_pen = [0 if 35 <= i <= 45 else (35 - i) ** 2 if i < 35 else (i - 45) ** 2 for i in df["L4_S1_postop"]]
   
    #normalize L4S1 penalty
    l4s1_pen = np.array(l4s1_pen) / max(l4s1_pen) * 100

    # 3) T4L1PA penalty
    t4l1pa_pen = [0 if abs(i) <= 3 else (abs(i)-3)**2 for i in df["T4L1PA_ideal_mismatch_postop"]]
    
    #normalize T4L1PA penalty
    t4l1pa_pen = np.array(t4l1pa_pen)/ max(t4l1pa_pen) * 100
 
    # 4) LL penalty
    ll_ideal = 0.54 * df["PI_preop"] + 27.6
    ll_mismatch = df["LL_postop"] - ll_ideal
    ll_pen = [0 if abs(i) <= 3 else (abs(i) - 3) ** 2 for i in ll_mismatch]
  
    #normalize LL penalty 
    ll_pen = (np.array(ll_pen)) / max(ll_pen) * 100

    #gap improvement score based on category change

    gap_category_improvement = []
    for i in range(len(df)):
        if df["gap_category_postop"][i] =="P" and (df["gap_category"][i]) in ["SD","MD", "P"]:
            gap_category_improvement.append(0)
        elif df["gap_category_postop"][i] =="MD" and df["gap_category"][i] == "SD":
            gap_category_improvement.append(30)
        else:
            gap_category_improvement.append(100)
    gap_category_improvement = np.array(gap_category_improvement)

    # Composite score

    composite = (rel_weights[0] * gap_score_postop +
                rel_weights[1] * l1pa_pen +
                rel_weights[2] * l4s1_pen +
                rel_weights[3] * t4l1pa_pen +
                rel_weights[4] * ll_pen +
                rel_weights[5] * gap_category_improvement)
    
    return composite


composite_scores = composite_score_calc(df) #weights can be adjusted here

print(composite_scores)

[4.53911818e+00 4.73532146e+00 3.37820757e+01 1.43607469e+01
 1.10141362e+01 5.66084573e+00 1.28205128e+00 2.88521608e+01
 3.00748136e+01 4.02580157e+00 1.48550934e+00 3.36793997e+01
 3.35964618e+00 1.29463426e+01 3.11357738e+01 2.31154303e+01
 1.37546008e+01 2.42429781e+01 2.35439738e+00 2.72105719e+01
 1.29407836e+01 1.75295319e+00 5.42402497e+01 2.96615515e+01
 1.53206754e+00            nan 1.81848439e+00 2.56457224e+01
 2.45609025e+01 1.41785348e+00 8.85545386e+00 4.83809088e+00
 3.01349654e+01 2.09467020e+01 1.12888276e+01 1.54585156e+00
 2.61419445e+01 2.31277670e+01 2.56595442e+01 2.78038499e+01
 2.08265348e+01 3.59840168e+01 7.09159154e+00 3.24835787e+01
 1.74093372e+01 1.40998472e+00 1.49353811e+00 3.25710276e+01
 1.02145170e+01 2.15696797e+01 1.85893969e+00 8.72125669e+00
 5.21667640e+01 1.73787341e+00 1.48940688e+00 3.24633378e+01
 2.16596391e+00 2.64486145e+01 2.28812268e-01 3.08441103e+01
 2.17974003e+00 2.33735710e+01 1.29994929e+01 1.54837776e+01
 3.44751574e+01 2.885439

##### 2. Add Composite Scores to Data Frame

In [4]:
#show all rows
pd.set_option("display.max_rows", None)

df["composite_scores"] = pd.DataFrame(composite_scores)
print(df[["gap_score_postop","composite_scores"]])

     gap_score_postop  composite_scores
0                 2.0          4.539118
1                 1.0          4.735321
2                 9.0         33.782076
3                 4.0         14.360747
4                 4.0         11.014136
5                 2.0          5.660846
6                 1.0          1.282051
7                 9.0         28.852161
8                 7.0         30.074814
9                 1.0          4.025802
10                1.0          1.485509
11                9.0         33.679400
12                2.0          3.359646
13                4.0         12.946343
14               10.0         31.135774
15                5.0         23.115430
16                6.0         13.754601
17                4.0         24.242978
18                1.0          2.354397
19                7.0         27.210572
20                6.0         12.940784
21                1.0          1.752953
22               13.0         54.240250
23                7.0         29.661552


In [5]:
# Save dataframe with composite scores to intermediate folder
output_path = "../data/intermediate/MSDS_database_with_composite_scores.csv"
df.to_csv(output_path, index=False)
print(f"Saved dataframe with composite scores to {output_path}")

Saved dataframe with composite scores to ../data/intermediate/MSDS_database_with_composite_scores.csv
