# IMPORT TOOLS

In [5]:
# BASIC TOOLS
import numpy as np
import pandas as pd
import datetime as dt

# STATISTIC TOOLS
import matplotlib.pyplot as plt 
import seaborn as sns
import scipy.stats as stats
import pingouin as pg 
import statsmodels.formula.api as smf
from statsmodels.tools.tools import add_constant
from scipy.optimize import linear_sum_assignment
from lifelines import KaplanMeierFitter, NelsonAalenFitter, CoxPHFitter

# PRE - PROCESSING TOOLS 
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, MinMaxScaler, PowerTransformer, LabelEncoder
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

# SUPERVISED LEARNING TOOLS
from sklearn.model_selection import train_test_split, KFold, cross_val_score, GridSearchCV
from sklearn.linear_model import LinearRegression, LogisticRegression, Lasso, Ridge
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC 
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier, BaggingClassifier, RandomForestClassifier, RandomForestRegressor, AdaBoostClassifier, AdaBoostRegressor, GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, accuracy_score, confusion_matrix, classification_report, roc_curve, roc_auc_score, pairwise_distances

# UNSUPERVISED LEARNING TOOLS
from sklearn.cluster import KMeans
from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
from sklearn.manifold import TSNE 
from sklearn.decomposition import PCA, NMF

# DEEP LEARNING TOOLS 
from sklearn.neural_network import MLPRegressor, MLPClassifier

# IMPORT DATA

In [6]:
xls = pd.ExcelFile('D:\\Users\\Desktop\\Python code\\GitHub\\Flowstateofmind\\Data\\csv\\Final_aommo.xlsx')
print(xls.sheet_names)

['yes (2)', 'Summary']


In [7]:
df = xls.parse(0)
sample = df.iloc[:, 3:]
sample

Unnamed: 0,Sex,Age,Level,EBL,Total,op time,Spinopelvic_preop_SVA,Spinopelvic_preop_PI,Spinopelvic_preop_LL,Spinopelvic_preop_PT,...,VAS_3mo,VAS_6mo,VAS_12mo,VAS_24mo,ODI_preop,ODI_1mo,ODI_3mo,ODI_6mo,ODI_12mo,ODI_24mo
0,2,63,3,259,139,83,45.97,60.2,24.6,38.8,...,6,4,3,5,0.88,0.8,0.72,0.6,0.38,0.4
1,1,69,4,270,133,102,-10.1,60.64,32.3,24.5,...,8,6,2,3,0.84,0.78,0.78,0.58,0.4,0.38
2,2,56,3,318,62,95,24.61,70.49,27.2,39.66,...,8,6,4,3,0.74,0.8,0.74,0.56,0.44,0.3
3,2,59,2,224,191,70,31.22,54.44,20.36,47.57,...,7,6,5,3,0.8,0.8,0.78,0.54,0.4,0.48
4,2,52,3,200,165,109,75.13,56.24,35.7,28.72,...,7,6,5,5,0.74,0.84,0.78,0.58,0.4,0.34
5,2,70,3,293,114,78,25.61,63.5,22.65,40.76,...,6,6,4,5,0.8,0.82,0.72,0.54,0.42,0.34
6,1,60,4,193,64,105,50.2,54.66,34.22,29.86,...,7,4,5,5,0.76,0.84,0.82,0.62,0.48,0.38
7,2,64,3,208,176,84,100.0,55.62,31.4,30.6,...,7,3,2,4,0.8,0.84,0.78,0.62,0.48,0.46
8,1,65,3,220,74,74,30.94,57.38,41.9,23.98,...,6,5,2,5,0.82,0.78,0.74,0.66,0.42,0.26
9,2,57,4,259,109,102,17.6,55.0,35.02,30.36,...,6,6,5,2,0.84,0.72,0.7,0.66,0.32,0.42


# UNDERSTAND THE DATA

In [8]:
sample.isna().sum()

Sex                      0
Age                      0
Level                    0
EBL                      0
Total                    0
op time                  0
Spinopelvic_preop_SVA    0
Spinopelvic_preop_PI     0
Spinopelvic_preop_LL     0
Spinopelvic_preop_PT     0
Spinopelvic_preop_SS     0
Spinopelvic_1mo_SVA      0
Spinopelvic_1mo_PI       0
Spinopelvic_1mo_LL       0
Spinopelvic_1mo_PT       0
Spinopelvic_1mo_SS       0
Spinopelvic_3mo_SVA      0
Spinopelvic_3mo_PI       0
Spinopelvic_3mo_LL       0
Spinopelvic_3mo_PT       0
Spinopelvic_3mo_SS       0
Spinopelvic_6mo_SVA      0
Spinopelvic_6mo_PI       0
Spinopelvic_6mo_LL       0
Spinopelvic_6mo_PT       0
Spinopelvic_6mo_SS       0
Spinopelvic_12mo_SVA     0
Spinopelvic_12mo_PI      0
Spinopelvic_12mo_LL      0
Spinopelvic_12mo_PT      0
Spinopelvic_12mo_SS      0
Spinopelvic_24mo_SVA     0
Spinopelvic_24mo_PI      0
Spinopelvic_24mo_LL      0
Spinopelvic_24mo_PT      0
Spinopelvic_24mo_SS      0
EQ-5D-5L_preop           0
E

In [9]:
print(sample.info())
sample.columns


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42 entries, 0 to 41
Data columns (total 54 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Sex                    42 non-null     int64  
 1   Age                    42 non-null     int64  
 2   Level                  42 non-null     int64  
 3   EBL                    42 non-null     int64  
 4   Total                  42 non-null     int64  
 5   op time                42 non-null     int64  
 6   Spinopelvic_preop_SVA  42 non-null     float64
 7   Spinopelvic_preop_PI   42 non-null     float64
 8   Spinopelvic_preop_LL   42 non-null     float64
 9   Spinopelvic_preop_PT   42 non-null     float64
 10  Spinopelvic_preop_SS   42 non-null     float64
 11  Spinopelvic_1mo_SVA    42 non-null     float64
 12  Spinopelvic_1mo_PI     42 non-null     float64
 13  Spinopelvic_1mo_LL     42 non-null     float64
 14  Spinopelvic_1mo_PT     42 non-null     float64
 15  Spinopel

Index(['Sex', 'Age', 'Level', 'EBL', 'Total', 'op time',
       'Spinopelvic_preop_SVA', 'Spinopelvic_preop_PI', 'Spinopelvic_preop_LL',
       'Spinopelvic_preop_PT', 'Spinopelvic_preop_SS', 'Spinopelvic_1mo_SVA',
       'Spinopelvic_1mo_PI', 'Spinopelvic_1mo_LL', 'Spinopelvic_1mo_PT',
       'Spinopelvic_1mo_SS', 'Spinopelvic_3mo_SVA', 'Spinopelvic_3mo_PI',
       'Spinopelvic_3mo_LL', 'Spinopelvic_3mo_PT', 'Spinopelvic_3mo_SS',
       'Spinopelvic_6mo_SVA', 'Spinopelvic_6mo_PI', 'Spinopelvic_6mo_LL',
       'Spinopelvic_6mo_PT', 'Spinopelvic_6mo_SS', 'Spinopelvic_12mo_SVA',
       'Spinopelvic_12mo_PI', 'Spinopelvic_12mo_LL', 'Spinopelvic_12mo_PT',
       'Spinopelvic_12mo_SS', 'Spinopelvic_24mo_SVA', 'Spinopelvic_24mo_PI',
       'Spinopelvic_24mo_LL', 'Spinopelvic_24mo_PT', 'Spinopelvic_24mo_SS',
       'EQ-5D-5L_preop', 'EQ-5D-5L_1mo', 'EQ-5D-5L_3mo', 'EQ-5D-5L_6mo',
       'EQ-5D-5L_12mo', 'EQ-5D-5L_24mo', 'VAS_preop', 'VAS_1mo', 'VAS_3mo',
       'VAS_6mo', 'VAS_12mo', 'VAS_24m

# EXPLORATORY DATA ANALYSIS

## Baseline Characteristics

In [10]:
sample['Sex'].value_counts()

2    26
1    16
Name: Sex, dtype: int64

In [11]:
# Test for normality
baseline = sample[['Age','Level','EBL','Total','op time']]
for i in baseline.columns : 
    print(i, stats.shapiro(df[i]))

Age ShapiroResult(statistic=0.9812403917312622, pvalue=0.7094213962554932)
Level ShapiroResult(statistic=0.7975444197654724, pvalue=3.9276892493944615e-06)
EBL ShapiroResult(statistic=0.924625039100647, pvalue=0.008569853380322456)
Total ShapiroResult(statistic=0.9370397925376892, pvalue=0.022543461993336678)
op time ShapiroResult(statistic=0.9303672313690186, pvalue=0.013330301269888878)


For each baseline characteristic
if p_value > 0.05, use mean
elif p_value < 0.05, use median (or 50% quartile)

In [12]:
sample[['Age','Level','EBL','Total','op time']].describe()

Unnamed: 0,Age,Level,EBL,Total,op time
count,42.0,42.0,42.0,42.0,42.0
mean,62.309524,3.142857,239.785714,127.761905,84.761905
std,6.671703,0.6833,55.666049,44.751242,14.183139
min,47.0,2.0,157.0,56.0,61.0
25%,58.25,3.0,193.0,98.25,73.0
50%,61.5,3.0,230.5,128.5,83.0
75%,66.5,4.0,286.75,168.5,96.5
max,77.0,4.0,336.0,199.0,110.0


## Spinopelvic one sample comparison

In [13]:
# test for normality
spinopelvic = sample[['Spinopelvic_preop_SVA', 'Spinopelvic_preop_PI', 'Spinopelvic_preop_LL',
       'Spinopelvic_preop_PT', 'Spinopelvic_preop_SS', 'Spinopelvic_1mo_SVA',
       'Spinopelvic_1mo_PI', 'Spinopelvic_1mo_LL', 'Spinopelvic_1mo_PT',
       'Spinopelvic_1mo_SS', 'Spinopelvic_3mo_SVA', 'Spinopelvic_3mo_PI',
       'Spinopelvic_3mo_LL', 'Spinopelvic_3mo_PT', 'Spinopelvic_3mo_SS',
       'Spinopelvic_6mo_SVA', 'Spinopelvic_6mo_PI', 'Spinopelvic_6mo_LL',
       'Spinopelvic_6mo_PT', 'Spinopelvic_6mo_SS', 'Spinopelvic_12mo_SVA',
       'Spinopelvic_12mo_PI', 'Spinopelvic_12mo_LL', 'Spinopelvic_12mo_PT',
       'Spinopelvic_12mo_SS', 'Spinopelvic_24mo_SVA', 'Spinopelvic_24mo_PI',
       'Spinopelvic_24mo_LL', 'Spinopelvic_24mo_PT', 'Spinopelvic_24mo_SS']]
for i in spinopelvic.columns : 
    print(i, stats.shapiro(df[i]))

Spinopelvic_preop_SVA ShapiroResult(statistic=0.9310577511787415, pvalue=0.014066877774894238)
Spinopelvic_preop_PI ShapiroResult(statistic=0.9636607766151428, pvalue=0.19938157498836517)
Spinopelvic_preop_LL ShapiroResult(statistic=0.9422385692596436, pvalue=0.034231625497341156)
Spinopelvic_preop_PT ShapiroResult(statistic=0.9468523859977722, pvalue=0.04985716566443443)
Spinopelvic_preop_SS ShapiroResult(statistic=0.9710264205932617, pvalue=0.3572752773761749)
Spinopelvic_1mo_SVA ShapiroResult(statistic=0.8904237747192383, pvalue=0.0007533872849307954)
Spinopelvic_1mo_PI ShapiroResult(statistic=0.960454523563385, pvalue=0.15332069993019104)
Spinopelvic_1mo_LL ShapiroResult(statistic=0.8743226528167725, pvalue=0.000267354364041239)
Spinopelvic_1mo_PT ShapiroResult(statistic=0.9689175486564636, pvalue=0.30361154675483704)
Spinopelvic_1mo_SS ShapiroResult(statistic=0.8803831934928894, pvalue=0.000391945504816249)
Spinopelvic_3mo_SVA ShapiroResult(statistic=0.9754557013511658, pvalue=0.4

In [14]:
# Define the time points and parameters to analyze
time_points = ['1mo', '3mo', '6mo', '12mo', '24mo']
parameters = ['SVA', 'PI', 'LL', 'PT', 'SS']

# Loop through each parameter and time point
for param in parameters:
    preop_col = f"Spinopelvic_preop_{param}"  # Preoperative column name
    for tp in time_points:
        followup_col = f"Spinopelvic_{tp}_{param}"  # Follow-up column name
        
        # Calculate the mean difference and its SD
        differences = spinopelvic[preop_col] - spinopelvic[followup_col]
        mean_diff = np.mean(differences)
        sd_diff = np.std(differences, ddof=1)  # Use ddof=1 for sample SD
        
        # Perform the paired t-test
        result = pg.ttest(spinopelvic[preop_col], spinopelvic[followup_col], paired=True, alternative='two-sided')
        
        # Print the result
        print(f"T-test: {param} - Preoperative vs {tp}")
        print(f"Mean Difference: {mean_diff:.2f} ± {sd_diff:.2f}")
        print(result)
        print("\n")

T-test: SVA - Preoperative vs 1mo
Mean Difference: -0.06 ± 24.13
               T  dof alternative     p-val          CI95%   cohen-d   BF10  \
T-test -0.015924   41   two-sided  0.987372  [-7.58, 7.46]  0.002722  0.167   

           power  
T-test  0.050034  


T-test: SVA - Preoperative vs 3mo
Mean Difference: -0.04 ± 25.08
               T  dof alternative     p-val          CI95%   cohen-d   BF10  \
T-test -0.009411   41   two-sided  0.992537  [-7.85, 7.78]  0.001681  0.167   

           power  
T-test  0.050013  


T-test: SVA - Preoperative vs 6mo
Mean Difference: -7.33 ± 26.19
               T  dof alternative     p-val           CI95%   cohen-d   BF10  \
T-test -1.814817   41   two-sided  0.076873  [-15.49, 0.83]  0.349782  0.746   

           power  
T-test  0.600103  


T-test: SVA - Preoperative vs 12mo
Mean Difference: 0.19 ± 27.69
               T  dof alternative     p-val          CI95%   cohen-d   BF10  \
T-test  0.045354   41   two-sided  0.964045  [-8.44, 8.82]  0.

## EQ-5D-5L

In [15]:
EQ = sample[['EQ-5D-5L_preop', 'EQ-5D-5L_1mo', 'EQ-5D-5L_3mo', 'EQ-5D-5L_6mo',
       'EQ-5D-5L_12mo', 'EQ-5D-5L_24mo']]
EQ.head()

Unnamed: 0,EQ-5D-5L_preop,EQ-5D-5L_1mo,EQ-5D-5L_3mo,EQ-5D-5L_6mo,EQ-5D-5L_12mo,EQ-5D-5L_24mo
0,-0.174,0.062,0.228,0.208,0.645,0.829
1,-0.028,0.106,-0.165,0.301,0.801,0.879
2,-0.174,0.151,0.199,0.287,0.753,0.768
3,-0.104,0.004,-0.127,0.127,0.925,0.738
4,-0.16,0.179,0.104,0.628,0.884,0.814


In [16]:
time_points = ['1mo', '3mo', '6mo', '12mo', '24mo']


for tp in time_points:
    followup_col = f"EQ-5D-5L_{tp}"  # Follow-up column name
        
    # Calculate the mean difference and its SD
    differences = EQ['EQ-5D-5L_preop'] - EQ[followup_col]
    mean_diff = np.mean(differences)
    sd_diff = np.std(differences, ddof=1)  # Use ddof=1 for sample SD
        
    # Perform the paired t-test
    result = pg.ttest(EQ['EQ-5D-5L_preop'], EQ[followup_col], paired=True, alternative='two-sided')
        
    # Print the result
    print(f"EQ-5D-5L: Preoperative vs {tp}")
    print(f"Mean Difference: {mean_diff:.2f} ± {sd_diff:.2f}")
    print(result)
    print("\n")

EQ-5D-5L: Preoperative vs 1mo
Mean Difference: -0.05 ± 0.21
               T  dof alternative     p-val          CI95%   cohen-d   BF10  \
T-test -1.732645   41   two-sided  0.090676  [-0.12, 0.01]  0.366446  0.656   

           power  
T-test  0.640204  


EQ-5D-5L: Preoperative vs 3mo
Mean Difference: -0.08 ± 0.23
               T  dof alternative     p-val           CI95%   cohen-d   BF10  \
T-test -2.244255   41   two-sided  0.030277  [-0.15, -0.01]  0.564005  1.578   

           power  
T-test  0.946118  


EQ-5D-5L: Preoperative vs 6mo
Mean Difference: -0.26 ± 0.28
               T  dof alternative         p-val           CI95%   cohen-d  \
T-test -5.931938   41   two-sided  5.407075e-07  [-0.34, -0.17]  1.556509   

           BF10  power  
T-test  3.0e+04    1.0  


EQ-5D-5L: Preoperative vs 12mo
Mean Difference: -0.80 ± 0.15
                T  dof alternative         p-val           CI95%   cohen-d  \
T-test -34.235256   41   two-sided  8.760908e-32  [-0.85, -0.75]  6.558118

## VAS

In [17]:
VAS = sample[['VAS_preop', 'VAS_1mo', 'VAS_3mo', 'VAS_6mo',
       'VAS_12mo', 'VAS_24mo']]
VAS.head()

Unnamed: 0,VAS_preop,VAS_1mo,VAS_3mo,VAS_6mo,VAS_12mo,VAS_24mo
0,8,8,6,4,3,5
1,8,7,8,6,2,3
2,10,9,8,6,4,3
3,9,7,7,6,5,3
4,9,8,7,6,5,5


In [18]:
time_points = ['1mo', '3mo', '6mo', '12mo', '24mo']


for tp in time_points:
    followup_col = f"VAS_{tp}"  # Follow-up column name
        
    # Calculate the mean difference and its SD
    differences = VAS['VAS_preop'] - VAS[followup_col]
    mean_diff = np.mean(differences)
    sd_diff = np.std(differences, ddof=1)  # Use ddof=1 for sample SD
        
    # Perform the paired t-test
    result = pg.ttest(VAS['VAS_preop'], VAS[followup_col], paired=True, alternative='two-sided')
        
    # Print the result
    print(f"VAS: Preoperative vs {tp}")
    print(f"Mean Difference: {mean_diff:.2f} ± {sd_diff:.2f}")
    print(result)
    print("\n")

VAS: Preoperative vs 1mo
Mean Difference: 0.26 ± 1.13
               T  dof alternative     p-val          CI95%   cohen-d   BF10  \
T-test  1.505434   41   two-sided  0.139878  [-0.09, 0.61]  0.281699  0.473   

           power  
T-test  0.429793  


VAS: Preoperative vs 3mo
Mean Difference: 1.57 ± 1.33
               T  dof alternative         p-val         CI95%   cohen-d  \
T-test  7.669817   41   two-sided  1.898962e-09  [1.16, 1.99]  1.602921   

             BF10  power  
T-test  6.176e+06    1.0  


VAS: Preoperative vs 6mo
Mean Difference: 3.67 ± 1.12
                T  dof alternative         p-val         CI95%   cohen-d  \
T-test  21.236761   41   two-sided  9.719980e-24  [3.32, 4.02]  3.434218   

             BF10  power  
T-test  3.663e+20    1.0  


VAS: Preoperative vs 12mo
Mean Difference: 5.19 ± 1.69
                T  dof alternative         p-val         CI95%   cohen-d  \
T-test  19.957457   41   two-sided  9.967922e-23  [4.67, 5.72]  4.641964   

             BF

## ODI

In [19]:
ODI = sample[['ODI_preop', 'ODI_1mo', 'ODI_3mo', 'ODI_6mo',
       'ODI_12mo', 'ODI_24mo']]
ODI.head()

Unnamed: 0,ODI_preop,ODI_1mo,ODI_3mo,ODI_6mo,ODI_12mo,ODI_24mo
0,0.88,0.8,0.72,0.6,0.38,0.4
1,0.84,0.78,0.78,0.58,0.4,0.38
2,0.74,0.8,0.74,0.56,0.44,0.3
3,0.8,0.8,0.78,0.54,0.4,0.48
4,0.74,0.84,0.78,0.58,0.4,0.34


In [20]:
time_points = ['1mo', '3mo', '6mo', '12mo', '24mo']


for tp in time_points:
    followup_col = f"ODI_{tp}"  # Follow-up column name
        
    # Calculate the mean difference and its SD
    differences = ODI['ODI_preop'] - ODI[followup_col]
    mean_diff = np.mean(differences)
    sd_diff = np.std(differences, ddof=1)  # Use ddof=1 for sample SD
        
    # Perform the paired t-test
    result = pg.ttest(ODI['ODI_preop'], ODI[followup_col], paired=True, alternative='two-sided')
        
    # Print the result
    print(f"ODI: Preoperative vs {tp}")
    print(f"Mean Difference: {mean_diff:.2f} ± {sd_diff:.2f}")
    print(result)
    print("\n")

ODI: Preoperative vs 1mo
Mean Difference: 0.00 ± 0.07
               T  dof alternative     p-val          CI95%   cohen-d   BF10  \
T-test  0.127697   41   two-sided  0.899013  [-0.02, 0.02]  0.028248  0.168   

           power  
T-test  0.053671  


ODI: Preoperative vs 3mo
Mean Difference: 0.02 ± 0.07
               T  dof alternative     p-val         CI95%  cohen-d   BF10  \
T-test  1.704261   41   two-sided  0.095901  [-0.0, 0.04]  0.39577  0.628   

           power  
T-test  0.706942  


ODI: Preoperative vs 6mo
Mean Difference: 0.21 ± 0.07
                T  dof alternative         p-val         CI95%   cohen-d  \
T-test  19.244791   41   two-sided  3.846462e-22  [0.19, 0.24]  4.196205   

            BF10  power  
T-test  1.03e+19    1.0  


ODI: Preoperative vs 12mo
Mean Difference: 0.41 ± 0.08
                T  dof alternative         p-val         CI95%   cohen-d  \
T-test  34.698362   41   two-sided  5.141563e-32  [0.38, 0.43]  8.397093   

             BF10  power  
T-