# Step 5: K-L Regression and ML Classification Models

In [1]:
import pandas as pd
from IPython.display import display, Markdown, JSON
from collections import Counter

### Objective
- Evaluate the extent to which BML scores relate with K-L severity
- Explore the predictive power that BML scores have on K-L scores down the line

### Statistical Tests/Models
- Linear regression (LR)
- Partial Least Squares (PLS)
- L1 Regularisation (Lasso)
- Multinomial Logistic Regression (logR)
- HistGradientBoost Classifier (HGBC)
- XGBoost Classifier

### Inputs and Outputs
- 45 BML variables at baseline and 12-month follow-up from full dataset, ungrouped (Input)
- K-L score at 12-month and 24-month follow-ups (Input)
- Coefficients and p-values for each BML variable from LR, PLS, Lasso (Output)
- Top-5 impactful BML variables per time point (Output)
- Confusion matrices for model predictions (Output)
- AUC-ROC curves for XGBoost Classifier (Output)

## 5.1 K-L Linear Regression

In [2]:
from scripts.mbm.kl_oriented.kl_mbm_relationships import v00_v01_kl_drop_coef_df, v00_v01_kl_drop_results

display(Markdown('### 5.1.1 Baseline BML variables\' coefficients and p-values against K-L score at 12-month follow-up'))
display(v00_v01_kl_drop_coef_df.sort_values(by='Coefficient', ascending=False).head(10))
display(Markdown('### 5.1.2 Baseline BML variables\' evaulators against K-L score at 12-month follow-up'))
display(v00_v01_kl_drop_results)




### 5.1.1 Baseline BML variables' coefficients and p-values against K-L score at 12-month follow-up

Unnamed: 0,Feature,Coefficient,p_value
0,V00MBMNFLC,0.693356,0.01939781
1,Intercept,0.684502,5.1985210000000004e-157
2,V00MBMSFLC,0.438083,0.001075904
3,V00MBMSFMC,0.324357,5.336657e-06
4,V00MBMPTLA,0.310372,0.4513839
5,V00MBMPTMC,0.261374,7.443128e-08
6,V00MBMNTLP,0.25399,0.2774733
7,V00MBMNTMA,0.241656,0.09654064
9,V00MBMNTMP,0.228223,0.2766297
10,V00MBMNSS,0.216739,4.818014e-05


### 5.1.2 Baseline BML variables' evaulators against K-L score at 12-month follow-up

Unnamed: 0,Value
R2,0.495054
RMSE,0.846109
Intercept,0.684502


In [3]:
from scripts.mbm.kl_oriented.kl_mbm_relationships import v00_v03_kl_drop_coef_df, v00_v03_kl_drop_results

display(Markdown('### 5.1.3 Baseline BML variables\' coefficients and p-values against K-L score at 24-month follow-up'))
display(v00_v03_kl_drop_coef_df.sort_values(by='Coefficient', ascending=False).head(10))
display(Markdown('### 5.1.4 Baseline BML variables\' evaulators against K-L score at 24-month follow-up'))
display(v00_v03_kl_drop_results)

### 5.1.3 Baseline BML variables' coefficients and p-values against K-L score at 24-month follow-up

Unnamed: 0,Feature,Coefficient,p_value
0,Intercept,0.792967,1.06775e-180
1,V00MBMNFLC,0.593929,0.07505313
2,V00MBMNTMA,0.46535,0.02144379
3,V00MBMSFLC,0.401643,0.004166778
4,V00MBMSFMC,0.304253,0.0002267976
5,V00MBMPTMC,0.27953,7.251611e-08
6,V00MBMNTLC,0.254015,0.2330686
8,V00MBMNTMP,0.235422,0.3147666
10,V00MBMPFLP,0.215931,0.05598832
11,V00MBMNFLA,0.199898,0.0003313782


### 5.1.4 Baseline BML variables' evaulators against K-L score at 24-month follow-up

Unnamed: 0,Value
R2,0.46385
RMSE,0.903957
Intercept,0.792967


In [4]:
from scripts.mbm.kl_oriented.kl_mbm_relationships import v01_v03_kl_drop_coef_df, v01_v03_kl_drop_results

display(Markdown('### 5.1.5 12-month follow-up BML variables\' coefficients and p-values against K-L score at 24-month follow-up'))
display(v01_v03_kl_drop_coef_df.sort_values(by='Coefficient', ascending=False).head(10))
display(Markdown('### 5.1.6 12-month follow-up BML variables\' evaulators against K-L score at 24-month follow-up'))
display(v01_v03_kl_drop_results)

### 5.1.5 12-month follow-up BML variables' coefficients and p-values against K-L score at 24-month follow-up

Unnamed: 0,Feature,Coefficient,p_value
0,Intercept,0.762738,3.2961e-171
1,V01MBMNTMP,0.515667,0.05204514
2,V01MBMSTLA,0.489306,0.03087092
3,V01MBMNFLC,0.467998,0.024258
4,V01MBMNTMA,0.398415,0.07029093
5,V01MBMSFLC,0.341872,0.07249253
7,V01MBMNTMC,0.253943,0.01511188
9,V01MBMNFLA,0.233169,6.929011e-07
10,V01MBMNFMP,0.227333,0.2407375
11,V01MBMNFMA,0.220474,0.2298041


### 5.1.6 12-month follow-up BML variables' evaulators against K-L score at 24-month follow-up

Unnamed: 0,Value
R2,0.475754
RMSE,0.893866
Intercept,0.762738


In [5]:
coef_dataframes = [v00_v01_kl_drop_coef_df, v00_v03_kl_drop_coef_df, v01_v03_kl_drop_coef_df]

top_features = []
for dataframe in coef_dataframes:
    top_5_features = dataframe.nlargest(5, 'Coefficient')
    top_features.extend(top_5_features['Feature'].tolist())

feature_count = Counter(top_features)
feature_count_dataframe = pd.DataFrame.from_dict(feature_count,orient='index',columns=['Count']).sort_values('Count', ascending=False)

display(Markdown('### 5.1.7 Features with top five coefficients from LR analysis'))
display(feature_count_dataframe)

### 5.1.7 Features with top five coefficients from LR analysis

Unnamed: 0,Count
Intercept,3
V00MBMNFLC,2
V00MBMSFLC,2
V00MBMSFMC,2
V00MBMPTLA,1
V00MBMNTMA,1
V01MBMNTMP,1
V01MBMSTLA,1
V01MBMNFLC,1
V01MBMNTMA,1


# 5.2 K-L Partial Least Squares
PLS finds BLS variables that both capture variation in the predictors and maximize their ability to explain variation in the K-L score.

In [6]:
from scripts.mbm.kl_oriented.kl_mbm_relationships import v00_v01_kl_drop_pls_coef_df, v00_v01_kl_drop_pls_results

display(Markdown('### 5.2.1 Baseline BML variables\' coefficients against K-L score at 12-month follow-up'))
display(v00_v01_kl_drop_pls_coef_df.sort_values(by='Coefficient', ascending=False).head(10))
display(Markdown('### 5.2.2 Baseline BML variables\' evaulators against K-L score at 12-month follow-up'))
display(v00_v01_kl_drop_pls_results)

### 5.2.1 Baseline BML variables' coefficients against K-L score at 12-month follow-up

Unnamed: 0,Feature,Coefficient,Target
0,Intercept,0.72072,V01XRKL
39,V00MBMNTLA,0.295288,V01XRKL
40,V00MBMNTMC,0.221544,V01XRKL
38,V00MBMNTMA,0.204023,V01XRKL
34,V00MBMNFLC,0.200364,V01XRKL
42,V00MBMNTMP,0.195576,V01XRKL
35,V00MBMNFMP,0.145112,V01XRKL
4,V00MBMSFLC,0.143615,V01XRKL
9,V00MBMSTLA,0.141868,V01XRKL
10,V00MBMSTMC,0.141558,V01XRKL


### 5.2.2 Baseline BML variables' evaulators against K-L score at 12-month follow-up

Unnamed: 0,Feature,Value
0,R2,{'V01XRKL': 0.48376695671765335}
1,RMSE,{'V01XRKL': 0.855513144902164}
2,n_components,1


In [7]:
from scripts.mbm.kl_oriented.kl_mbm_relationships import v00_v03_kl_drop_pls_coef_df, v00_v03_kl_drop_pls_results

display(Markdown('### 5.2.3 Baseline BML variables\' coefficients against K-L score at 24-month follow-up'))
display(v00_v03_kl_drop_pls_coef_df.sort_values(by='Coefficient', ascending=False).head(10))
display(Markdown('### 5.2.4 Baseline BML variables\' evaulators against K-L score at 24-month follow-up'))
display(v00_v03_kl_drop_pls_results)

### 5.2.3 Baseline BML variables' coefficients against K-L score at 24-month follow-up

Unnamed: 0,Feature,Coefficient,Target
0,Intercept,0.829059,V03XRKL
39,V00MBMNTLA,0.25156,V03XRKL
40,V00MBMNTMC,0.218649,V03XRKL
38,V00MBMNTMA,0.203404,V03XRKL
42,V00MBMNTMP,0.193683,V03XRKL
34,V00MBMNFLC,0.191609,V03XRKL
35,V00MBMNFMP,0.142667,V03XRKL
3,V00MBMSFMC,0.138837,V03XRKL
10,V00MBMSTMC,0.138831,V03XRKL
4,V00MBMSFLC,0.134873,V03XRKL


### 5.2.4 Baseline BML variables' evaulators against K-L score at 24-month follow-up

Unnamed: 0,Feature,Value
0,R2,{'V03XRKL': 0.44797793236948213}
1,RMSE,{'V03XRKL': 0.9172397964453746}
2,n_components,1


In [8]:
from scripts.mbm.kl_oriented.kl_mbm_relationships import v01_v03_kl_drop_pls_coef_df, v01_v03_kl_drop_pls_results

display(Markdown('### 5.2.5 12-month follow-up BML variables\' coefficients against K-L score at 24-month follow-up'))
display(v01_v03_kl_drop_pls_coef_df.sort_values(by='Coefficient', ascending=False).head(10))
display(Markdown('### 5.2.5 12-month follow-up BML variables\' evaulators against K-L score at 24-month follow-up'))
display(v01_v03_kl_drop_pls_results)

### 5.2.5 12-month follow-up BML variables' coefficients against K-L score at 24-month follow-up

Unnamed: 0,Feature,Coefficient,Target
0,Intercept,0.811077,V03XRKL
39,V01MBMNTLA,0.284593,V03XRKL
40,V01MBMNTMC,0.209449,V03XRKL
38,V01MBMNTMA,0.201263,V03XRKL
42,V01MBMNTMP,0.181725,V03XRKL
34,V01MBMNFLC,0.172107,V03XRKL
35,V01MBMNFMP,0.14661,V03XRKL
4,V01MBMSFLC,0.13847,V03XRKL
9,V01MBMSTLA,0.135575,V03XRKL
10,V01MBMSTMC,0.12731,V03XRKL


### 5.2.5 12-month follow-up BML variables' evaulators against K-L score at 24-month follow-up

Unnamed: 0,Feature,Value
0,R2,{'V03XRKL': 0.4550467683642422}
1,RMSE,{'V03XRKL': 0.9113480858303634}
2,n_components,1


In [9]:
coef_dataframes = [v00_v01_kl_drop_pls_coef_df, v00_v03_kl_drop_pls_coef_df, v01_v03_kl_drop_pls_coef_df]

top_features = []
for dataframe in coef_dataframes:
    top_5_features = dataframe.nlargest(5, 'Coefficient')
    top_features.extend(top_5_features['Feature'].tolist())

feature_count = Counter(top_features)
feature_count_dataframe = pd.DataFrame.from_dict(feature_count,orient='index',columns=['Count']).sort_values('Count', ascending=False)

display(Markdown('### 5.2.7 Features with top five coefficients from PLS regression analysis'))
display(feature_count_dataframe)

### 5.2.7 Features with top five coefficients from PLS regression analysis

Unnamed: 0,Count
Intercept,3
V00MBMNTLA,2
V00MBMNTMC,2
V00MBMNTMA,2
V00MBMNFLC,1
V00MBMNTMP,1
V01MBMNTLA,1
V01MBMNTMC,1
V01MBMNTMA,1
V01MBMNTMP,1


## 5.3 K-L L1 Regularisation
Lasso adds a penalty on the absolute size of coefficients, shrinking some to zero and thus performing feature selection.

In [10]:
from scripts.mbm.kl_oriented.kl_mbm_relationships import v00_v01_kl_drop_lasso_coef_df, v00_v01_kl_drop_lasso_results

n_nonzero_coefs = (v00_v01_kl_drop_lasso_coef_df['Coefficient'] > 1e-10).sum()
display(Markdown('### 5.3.1 Baseline BML variables\' coefficients against K-L score at 12-month follow-up'))
display(v00_v01_kl_drop_lasso_coef_df.sort_values(by='Coefficient', ascending=False).head(n_nonzero_coefs))
display(Markdown('### 5.2.5 Baseline BML variables\' evaulators against K-L score at 12-month follow-up'))
display(pd.DataFrame.from_dict(v00_v01_kl_drop_lasso_results, orient='index'))

### 5.3.1 Baseline BML variables' coefficients against K-L score at 12-month follow-up

Unnamed: 0,Feature,Target,Coefficient
0,Intercept,V01XRKL,1.252714
25,V00MBMPTMC,V01XRKL,0.22932
3,V00MBMSFMC,V01XRKL,0.183604
37,V00MBMNSS,V01XRKL,0.122315
4,V00MBMSFLC,V01XRKL,0.106373
27,V00MBMPTMP,V01XRKL,0.076437
20,V00MBMPFMP,V01XRKL,0.073636
32,V00MBMNFLA,V01XRKL,0.068672
34,V00MBMNFLC,V01XRKL,0.066509
8,V00MBMSTMA,V01XRKL,0.062404


### 5.2.5 Baseline BML variables' evaulators against K-L score at 12-month follow-up

Unnamed: 0,V01XRKL
R2,0.498163
RMSE,0.8435


In [11]:
from scripts.mbm.kl_oriented.kl_mbm_relationships import v00_v03_kl_drop_lasso_coef_df, v00_v03_kl_drop_lasso_results

n_nonzero_coefs = (v00_v03_kl_drop_lasso_coef_df['Coefficient'] > 1e-10).sum()
display(Markdown('### 5.3.3 Baseline BML variables\' coefficients against K-L score at 24-month follow-up'))
display(v00_v03_kl_drop_lasso_coef_df.sort_values(by='Coefficient', ascending=False).head(n_nonzero_coefs))
display(Markdown('### 5.3.4 Baseline BML variables\' evaulators against K-L score at 24-month follow-up'))
display(pd.DataFrame.from_dict(v00_v03_kl_drop_lasso_results, orient='index'))

### 5.3.3 Baseline BML variables' coefficients against K-L score at 24-month follow-up

Unnamed: 0,Feature,Target,Coefficient
0,Intercept,V03XRKL,1.357794
25,V00MBMPTMC,V03XRKL,0.237948
3,V00MBMSFMC,V03XRKL,0.176375
32,V00MBMNFLA,V03XRKL,0.109024
38,V00MBMNTMA,V03XRKL,0.106684
4,V00MBMSFLC,V03XRKL,0.096216
20,V00MBMPFMP,V03XRKL,0.094943
37,V00MBMNSS,V03XRKL,0.090143
27,V00MBMPTMP,V03XRKL,0.062474
41,V00MBMNTLC,V03XRKL,0.060353


### 5.3.4 Baseline BML variables' evaulators against K-L score at 24-month follow-up

Unnamed: 0,V03XRKL
R2,0.462279
RMSE,0.905281


In [12]:
from scripts.mbm.kl_oriented.kl_mbm_relationships import v01_v03_kl_drop_lasso_coef_df, v01_v03_kl_drop_lasso_results

n_nonzero_coefs = (v01_v03_kl_drop_lasso_coef_df['Coefficient'] > 1e-10).sum()
display(Markdown('### 5.3.5 12-month follow-up BML variables\' coefficients against K-L score at 24-month follow-up'))
display(v01_v03_kl_drop_lasso_coef_df.sort_values(by='Coefficient', ascending=False).head(n_nonzero_coefs))
display(Markdown('### 5.2.6 12-month follow-up BML variables\' evaulators against K-L score at 24-month follow-up'))
display(pd.DataFrame.from_dict(v01_v03_kl_drop_lasso_results, orient='index'))

### 5.3.5 12-month follow-up BML variables' coefficients against K-L score at 24-month follow-up

Unnamed: 0,Feature,Target,Coefficient
0,Intercept,V03XRKL,1.357794
25,V01MBMPTMC,V03XRKL,0.167778
32,V01MBMNFLA,V03XRKL,0.126019
37,V01MBMNSS,V03XRKL,0.119651
3,V01MBMSFMC,V03XRKL,0.116806
18,V01MBMPFMC,V03XRKL,0.113142
40,V01MBMNTMC,V03XRKL,0.099819
4,V01MBMSFLC,V03XRKL,0.07606
10,V01MBMSTMC,V03XRKL,0.072614
34,V01MBMNFLC,V03XRKL,0.064344


### 5.2.6 12-month follow-up BML variables' evaulators against K-L score at 24-month follow-up

Unnamed: 0,V03XRKL
R2,0.474683
RMSE,0.894778


In [13]:
coef_dataframes = [v00_v01_kl_drop_lasso_coef_df, v00_v03_kl_drop_lasso_coef_df, v01_v03_kl_drop_lasso_coef_df]

top_features = []
for dataframe in coef_dataframes:
    top_5_features = dataframe.nlargest(5, 'Coefficient')
    top_features.extend(top_5_features['Feature'].tolist())

feature_count = Counter(top_features)
feature_count_dataframe = pd.DataFrame.from_dict(feature_count,orient='index',columns=['Count']).sort_values('Count', ascending=False)

display(Markdown('### 5.3.7 Features with top five coefficients from lasso regression analysis'))
display(feature_count_dataframe)

### 5.3.7 Features with top five coefficients from lasso regression analysis

Unnamed: 0,Count
Intercept,3
V00MBMPTMC,2
V00MBMSFMC,2
V00MBMNSS,1
V00MBMSFLC,1
V00MBMNFLA,1
V00MBMNTMA,1
V01MBMPTMC,1
V01MBMNFLA,1
V01MBMNSS,1


# 5.4 K-L L1 Penalised Multinomial Logistic Regression Model
Multinomial logistic regression is a model that predicts probabilities of outcomes when the target variable has more than two categories (0,1,3,4) for K-L.
- Note: K-L score 3-4 aggregated into one class for class balance

In [14]:
from scripts.mbm.kl_oriented.kl_mbm_logR import v00_v01_moaks_kl_coefs_df, v00_v01_report, v00_v01_confusion_matrix

display(Markdown('### 5.4.1 Baseline BML variables\' coefficients against K-L score at 12-month follow-up'))
display(v00_v01_moaks_kl_coefs_df.sort_values(by='Coefficient', ascending=False).head(10))
display(Markdown('### 5.4.2 Baseline BML variables\' classification report against K-L score at 12-month follow-up'))
display(display(JSON(v00_v01_report, expanded=True)))
display(Markdown('### 5.4.3 Baseline BML variables\' confusion matrix against K-L score at 12-month follow-up'))
display(v00_v01_confusion_matrix)

### 5.4.1 Baseline BML variables' coefficients against K-L score at 12-month follow-up

Unnamed: 0,Class,Feature,Coefficient
170,2,V00MBMNTLP,0.84651
11,3,V00MBMSFMC,0.790315
135,3,V00MBMNFLC,0.790254
72,0,V00MBMPFLC,0.660596
15,3,V00MBMSFLC,0.608023
14,2,V00MBMSFLC,0.59914
160,0,V00MBMNTLC,0.52149
109,1,V00MBMPTLP,0.473988
150,2,V00MBMNTMA,0.454274
31,3,V00MBMSTMA,0.438742


### 5.4.2 Baseline BML variables' classification report against K-L score at 12-month follow-up

<IPython.core.display.JSON object>

None

### 5.4.3 Baseline BML variables' confusion matrix against K-L score at 12-month follow-up

Unnamed: 0,Pred 0,Pred 1,Pred 2,Pred 3
True 0,125,39,14,5
True 1,93,66,14,5
True 2,44,36,28,8
True 3,12,13,12,62


In [15]:
from scripts.mbm.kl_oriented.kl_mbm_logR import v00_v03_moaks_kl_coefs_df, v00_v03_report, v00_v03_confusion_matrix

display(Markdown('### 5.4.4 Baseline BML variables\' coefficients against K-L score at 24-month follow-up'))
display(v00_v03_moaks_kl_coefs_df.sort_values(by='Coefficient', ascending=False).head(10))
display(Markdown('### 5.4.5 Baseline BML variables\' classification report against K-L score at 24-month follow-up'))
display(display(JSON(v00_v03_report, expanded=True)))
display(Markdown('### 5.4.6 Baseline BML variables\' confusion matrix against K-L score at 24-month follow-up'))
display(v00_v03_confusion_matrix)

### 5.4.4 Baseline BML variables' coefficients against K-L score at 24-month follow-up

Unnamed: 0,Class,Feature,Coefficient
11,3,V00MBMSFMC,0.764489
150,2,V00MBMNTMA,0.604317
135,3,V00MBMNFLC,0.518164
39,3,V00MBMSTMC,0.503613
77,1,V00MBMPFMP,0.492347
14,2,V00MBMSFLC,0.448797
158,2,V00MBMNTMC,0.411023
101,1,V00MBMPTLC,0.404364
21,1,V00MBMSFLP,0.398541
15,3,V00MBMSFLC,0.395757


### 5.4.5 Baseline BML variables' classification report against K-L score at 24-month follow-up

<IPython.core.display.JSON object>

None

### 5.4.6 Baseline BML variables' confusion matrix against K-L score at 24-month follow-up

Unnamed: 0,Pred 0,Pred 1,Pred 2,Pred 3
True 0,121,28,17,6
True 1,91,36,25,6
True 2,47,22,50,13
True 3,15,11,18,70


In [16]:
from scripts.mbm.kl_oriented.kl_mbm_logR import v01_v03_moaks_kl_coefs_df, v01_v03_report, v01_v03_confusion_matrix

display(Markdown('### 5.4.7 12-month follow-up BML variables\' coefficients against K-L score at 24-month follow-up'))
display(v01_v03_moaks_kl_coefs_df.sort_values(by='Coefficient', ascending=False).head(10))
display(Markdown('### 5.4.8 12-month follow-up BML variables\' classification report against K-L score at 24-month follow-up'))
display(display(JSON(v01_v03_report, expanded=True)))
display(Markdown('### 5.4.9 12-month follow-up BML variables\' confusion matrix against K-L score at 24-month follow-up'))
display(v01_v03_confusion_matrix)

### 5.4.7 12-month follow-up BML variables' coefficients against K-L score at 24-month follow-up

Unnamed: 0,Class,Feature,Coefficient
77,1,V01MBMPFMP,1.276188
80,0,V01MBMPFLP,1.014974
150,2,V01MBMNTMA,0.950897
141,1,V01MBMNFLP,0.685036
139,3,V01MBMNFMP,0.645758
44,0,V01MBMSTMP,0.631005
110,2,V01MBMPTLP,0.553841
43,3,V01MBMSTLC,0.542805
133,1,V01MBMNFLC,0.504953
143,3,V01MBMNFLP,0.491301


### 5.4.8 12-month follow-up BML variables' classification report against K-L score at 24-month follow-up

<IPython.core.display.JSON object>

None

### 5.4.9 12-month follow-up BML variables' confusion matrix against K-L score at 24-month follow-up

Unnamed: 0,Pred 0,Pred 1,Pred 2,Pred 3
True 0,117,33,14,8
True 1,89,35,31,3
True 2,46,28,42,16
True 3,12,12,14,76


# 5.5 K-L HistGradientBoostClassifier Model
The HistGradientBoostingClassifier is a fast gradient boosting model that builds decision trees from binned feature values to efficiently handle large datasets.

In [17]:
from scripts.mbm.kl_oriented.kl_mbm_hist import v00_v01_moaks_kl_coefs_df, v00_v01_report, v00_v01_confusion_matrix

display(Markdown('### 5.5.1 Baseline BML variables\' coefficients against K-L score at 12-month follow-up'))
display(v00_v01_moaks_kl_coefs_df.sort_values(by='Importance', ascending=False).head(10))
display(Markdown('### 5.5.2 Baseline BML variables\' classification report against K-L score at 12-month follow-up'))
display(display(JSON(v00_v01_report, expanded=True)))
display(Markdown('### 5.5.3 Baseline BML variables\' confusion matrix against K-L score at 12-month follow-up'))
display(v00_v01_confusion_matrix)

       Feature  Importance  Importance_std
17  V00MBMPFMC    0.032639        0.018582
28   V00MBMPPM    0.030556        0.013586
9   V00MBMSTMC    0.023611        0.011013
24  V00MBMPTMC    0.022743        0.010313
21   V00MBMPSS    0.022743        0.007261
13   V00MBMSPM    0.020312        0.006636
7   V00MBMSTMA    0.018056        0.006459
15  V00MBMPFMA    0.017708        0.004710
6    V00MBMSSS    0.016493        0.008689
22  V00MBMPTMA    0.012847        0.009515
1   V00MBMSFLA    0.011285        0.008620
0   V00MBMSFMA    0.009375        0.004406
30  V00MBMNFMA    0.007812        0.005612
34  V00MBMNFMP    0.007118        0.006517
19  V00MBMPFMP    0.006771        0.004557
44   V00MBMNPL    0.006076        0.003494
16  V00MBMPFLA    0.005729        0.006165
2   V00MBMSFMC    0.005729        0.004789
39  V00MBMNTMC    0.005208        0.007325
14   V00MBMSPL    0.005035        0.010573
26  V00MBMPTMP    0.004167        0.003740
29   V00MBMPPL    0.003993        0.006262
36   V00MBM

### 5.5.1 Baseline BML variables' coefficients against K-L score at 12-month follow-up

Unnamed: 0,Feature,Importance,Importance_std
17,V00MBMPFMC,0.032639,0.018582
28,V00MBMPPM,0.030556,0.013586
9,V00MBMSTMC,0.023611,0.011013
24,V00MBMPTMC,0.022743,0.010313
21,V00MBMPSS,0.022743,0.007261
13,V00MBMSPM,0.020312,0.006636
7,V00MBMSTMA,0.018056,0.006459
15,V00MBMPFMA,0.017708,0.00471
6,V00MBMSSS,0.016493,0.008689
22,V00MBMPTMA,0.012847,0.009515


### 5.5.2 Baseline BML variables' classification report against K-L score at 12-month follow-up

<IPython.core.display.JSON object>

None

### 5.5.3 Baseline BML variables' confusion matrix against K-L score at 12-month follow-up

Unnamed: 0,Pred 0,Pred 1,Pred 2,Pred 3
True 0,134,41,8,0
True 1,80,82,14,2
True 2,40,28,40,8
True 3,7,17,8,67


In [18]:
from scripts.mbm.kl_oriented.kl_mbm_hist import v00_v03_moaks_kl_coefs_df, v00_v03_report, v00_v03_confusion_matrix

display(Markdown('### 5.5.4 Baseline BML variables\' coefficients against K-L score at 24-month follow-up'))
display(v00_v03_moaks_kl_coefs_df.sort_values(by='Importance', ascending=False).head(10))
display(Markdown('### 5.5.5 Baseline BML variables\' classification report against K-L score at 24-month follow-up'))
display(display(JSON(v00_v03_report, expanded=True)))
display(Markdown('### 5.5.6 Baseline BML variables\' confusion matrix against K-L score at 24-month follow-up'))
display(v00_v03_confusion_matrix)

### 5.5.4 Baseline BML variables' coefficients against K-L score at 24-month follow-up

Unnamed: 0,Feature,Importance,Importance_std
24,V00MBMPTMC,0.044965,0.006329
28,V00MBMPPM,0.040104,0.009865
2,V00MBMSFMC,0.030035,0.006771
4,V00MBMSFMP,0.023611,0.006024
14,V00MBMSPL,0.022743,0.01063
7,V00MBMSTMA,0.022222,0.00918
13,V00MBMSPM,0.019965,0.009325
16,V00MBMPFLA,0.019792,0.004406
9,V00MBMSTMC,0.017882,0.008578
1,V00MBMSFLA,0.015278,0.005424


### 5.5.5 Baseline BML variables' classification report against K-L score at 24-month follow-up

<IPython.core.display.JSON object>

None

### 5.5.6 Baseline BML variables' confusion matrix against K-L score at 24-month follow-up

Unnamed: 0,Pred 0,Pred 1,Pred 2,Pred 3
True 0,127,32,12,1
True 1,72,56,22,8
True 2,41,31,51,9
True 3,11,11,18,74


In [19]:
from scripts.mbm.kl_oriented.kl_mbm_hist import v01_v03_moaks_kl_coefs_df, v01_v03_report, v01_v03_confusion_matrix

display(Markdown('### 5.5.7 12-month follow-up BML variables\' coefficients against K-L score at 24-month follow-up'))
display(v01_v03_moaks_kl_coefs_df.sort_values(by='Importance', ascending=False).head(10))
display(Markdown('### 5.5.8 12-month follow-up BML variables\' classification report against K-L score at 24-month follow-up'))
display(display(JSON(v01_v03_report, expanded=True)))
display(Markdown('### 5.5.9 12-month follow-up BML variables\' confusion matrix against K-L score at 24-month follow-up'))
display(v01_v03_confusion_matrix)

### 5.5.7 12-month follow-up BML variables' coefficients against K-L score at 24-month follow-up

Unnamed: 0,Feature,Importance,Importance_std
1,V01MBMSFLA,0.050694,0.007192
13,V01MBMSPM,0.050347,0.010212
14,V01MBMSPL,0.034375,0.014931
24,V01MBMPTMC,0.034201,0.012079
21,V01MBMPSS,0.032118,0.005337
28,V01MBMPPM,0.028993,0.005153
9,V01MBMSTMC,0.025347,0.01123
2,V01MBMSFMC,0.024479,0.008123
29,V01MBMPPL,0.017882,0.003963
17,V01MBMPFMC,0.017014,0.00676


### 5.5.8 12-month follow-up BML variables' classification report against K-L score at 24-month follow-up

<IPython.core.display.JSON object>

None

### 5.5.9 12-month follow-up BML variables' confusion matrix against K-L score at 24-month follow-up

Unnamed: 0,Pred 0,Pred 1,Pred 2,Pred 3
True 0,130,18,21,3
True 1,72,58,22,6
True 2,25,27,66,14
True 3,14,9,10,81


# 5.6 K-L XGBoost Classifier Model with Lasso Feature Space
The XGBoost classifier with softmax predicts multi-class outcomes by applying gradient-boosted decision trees and using the softmax function to output class probabilities. Here, the features selected for prediction are those selected by Lasso in the previous steps.

In [23]:
from scripts.mbm.kl_oriented.kl_mbm_xgBoost import v00_v01_moaks_kl_lasso_coefs_df, v00_v01_moaks_kl_lasso_metrics, v00_v01_moaks_kl_roc

display(Markdown('### 5.6.1 Baseline BML variables\' importance against K-L score at 12-month follow-up'))
display(v00_v01_moaks_kl_lasso_coefs_df.sort_values(by='Importance', ascending=False).head(10))
display(Markdown('### 5.6.2 Baseline BML variables\' classification report against K-L score at 12-month follow-up'))
display(display(JSON(v00_v01_moaks_kl_lasso_metrics)))
display(Markdown('### 5.6.3 Baseline BML variables\' AUC-ROC curve against K-L score at 12-month follow-up'))
v00_v01_moaks_kl_roc.show()

### 5.6.1 Baseline BML variables' importance against K-L score at 12-month follow-up

Unnamed: 0,Feature,Importance
0,V00MBMPTMC,0.126541
1,V00MBMSTMC,0.110308
2,V00MBMNTMC,0.053654
3,V00MBMSTLC,0.052296
4,V00MBMSFLC,0.047329
5,V00MBMSFMC,0.045549
6,V00MBMPFMC,0.037608
7,V00MBMPSS,0.036488
8,V00MBMPFMP,0.033823
9,V00MBMPTMP,0.033543


### 5.6.2 Baseline BML variables' classification report against K-L score at 12-month follow-up

<IPython.core.display.JSON object>

None

### 5.6.3 Baseline BML variables' AUC-ROC curve against K-L score at 12-month follow-up

In [21]:
from scripts.mbm.kl_oriented.kl_mbm_xgBoost import v00_v03_moaks_kl_lasso_coefs_df, v00_v03_moaks_kl_lasso_metrics, v00_v03_moaks_kl_roc

display(Markdown('### 5.6.4 Baseline BML variables\' importance against K-L score at 24-month follow-up'))
display(v00_v03_moaks_kl_lasso_coefs_df.sort_values(by='Importance', ascending=False).head(10))
display(Markdown('### 5.6.5 Baseline BML variables\' classification report against K-L score at 24-month follow-up'))
display(display(JSON(v00_v03_moaks_kl_lasso_metrics, expanded=True)))
display(Markdown('### 5.6.6 Baseline BML variables\' AUC-ROC curve against K-L score at 24-month follow-up'))
v00_v03_moaks_kl_roc.show()

### 5.6.4 Baseline BML variables' importance against K-L score at 24-month follow-up

Unnamed: 0,Feature,Importance
0,V00MBMPTMC,0.264424
1,V00MBMSFMC,0.051032
2,V00MBMSTMC,0.049503
3,V00MBMSFLC,0.043225
4,V00MBMPSS,0.038755
5,V00MBMPFMP,0.03872
6,V00MBMSSS,0.035713
7,V00MBMPTMP,0.035057
8,V00MBMNFMP,0.034108
9,V00MBMPFMC,0.033713


### 5.6.5 Baseline BML variables' classification report against K-L score at 24-month follow-up

<IPython.core.display.JSON object>

None

### 5.6.6 Baseline BML variables' AUC-ROC curve against K-L score at 24-month follow-up

In [22]:
from scripts.mbm.kl_oriented.kl_mbm_xgBoost import v01_v03_moaks_kl_lasso_coefs_df, v00_v03_moaks_kl_lasso_metrics, v00_v03_moaks_kl_roc

display(Markdown('### 5.6.7 12-month follow-up BML variables\' importance against K-L score at 24-month follow-up'))
display(v01_v03_moaks_kl_lasso_coefs_df.sort_values(by='Importance', ascending=False).head(10))
display(Markdown('### 5.6.8 12-month follow-up BML variables\' classification report against K-L score at 24-month follow-up'))
display(display(JSON(v00_v03_moaks_kl_lasso_metrics, expanded=True)))
display(Markdown('### 5.6.9 12-month follow-up BML variables\' AUC-ROC curve against K-L score at 24-month follow-up'))
v00_v03_moaks_kl_roc.show()

### 5.6.7 12-month follow-up BML variables' importance against K-L score at 24-month follow-up

Unnamed: 0,Feature,Importance
0,V01MBMPTMC,0.289548
1,V01MBMNTMA,0.064569
2,V01MBMPFMC,0.05207
3,V01MBMSFLC,0.042981
4,V01MBMPFLP,0.039396
5,V01MBMPFMP,0.037807
6,V01MBMPSS,0.036423
7,V01MBMSTMC,0.034302
8,V01MBMSTLC,0.030597
9,V01MBMSFMC,0.028865


### 5.6.8 12-month follow-up BML variables' classification report against K-L score at 24-month follow-up

<IPython.core.display.JSON object>

None

### 5.6.9 12-month follow-up BML variables' AUC-ROC curve against K-L score at 24-month follow-up

## Results
BML variables have a degree of predictive power over the K-L score. XGBoost Classifier with Lasso-selected variables performs the best and generates a moderate AUC.