In [1]:
from imblearn.over_sampling import SMOTE
from scipy import stats
from sklearn import metrics
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve,f1_score,average_precision_score,precision_recall_curve
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
import numpy
import numpy as np
import pandas as pd
import pickle
import xgboost as xgb

#### mof

In [2]:
# dataset with FRAX CRFs and GRS
with open('ready_whi_sp23', 'rb') as file_handler:
    data = pickle.load(file_handler)
    X1, Y1 = data.get('X', []).values, data.get('Y', []).values
x_train1, x_test1, y_train1, y_test1 = train_test_split(X1, Y1, test_size=0.2,random_state=98)
sm = SMOTE(random_state=2)
x_train_s1, y_train_s1 = sm.fit_resample(x_train1, y_train1)

In [3]:
# dataset with FRAX CRFs (no grs)
with open('ready_whi_sp23', 'rb') as file_handler:
    data = pickle.load(file_handler)
    X2, Y2 = data.get('X_nogrs', []).values, data.get('Y', []).values
x_train2, x_test2, y_train2, y_test2 = train_test_split(X2, Y2, test_size=0.2,random_state=98)
sm = SMOTE(random_state=2)
x_train_s2, y_train_s2 = sm.fit_resample(x_train2, y_train2)

In [4]:
# Model 4 (FRAX CRFs + GRS)
xgb_rscv = xgb.XGBClassifier( 
                             gamma=4.667459888168229, 
                             max_depth=6,
                             reg_alpha=5.948836051062213, 
                             reg_lambda=10.831670870285606, 
                             n_estimators=37,
                             learning_rate=0.6897422327789582,
                             subsample=0.44119502971375474)



#fit the model
model_xgboost = xgb_rscv.fit(x_train_s1, y_train_s1)
y_pred1 = model_xgboost.predict(x_test1)
yscore_raw = model_xgboost.predict_proba(x_test1)
yscore = [s[1] for s in yscore_raw]
fpr, tpr, thresh = roc_curve(y_test1, yscore)
fpr1, tpr1, thresh1 = roc_curve(y_test1, y_pred1)
auc = roc_auc_score(y_test1, yscore)
prauc = average_precision_score(y_test1, yscore)
f1 = f1_score(y_test1,y_pred1,average="weighted")

print("Model 4:")
print("XGB Accuracy : ", metrics.accuracy_score(y_test1, y_pred1))
print("XGB AUC : ", auc)
print("XGB PRAUC : ", prauc)
print("XGB F1 : ", f1)

Model 4:
XGB Accuracy : 0.9123181377303589
XGB AUC : 0.7387495376080916
XGB PRAUC : 0.5703488372093023
XGB F1 : 0.9041235620908141


In [5]:
# Model 3 (FRAX CRFs + GRS)
xgb_rscv = xgb.XGBClassifier( 
                             gamma=0.001, 
                             max_depth=5,
                             reg_alpha=10, 
                             reg_lambda=10, 
                             n_estimators=400,
                             learning_rate=1,
                             subsample=0.8)

model_xgboost = xgb_rscv.fit(x_train_s1, y_train_s1)
y_pred1 = model_xgboost.predict(x_test1)
yscore_raw = model_xgboost.predict_proba(x_test1)
yscore = [s[1] for s in yscore_raw]
fpr, tpr, thresh = roc_curve(y_test1, yscore)
fpr1, tpr1, thresh1 = roc_curve(y_test1, y_pred1)
auc = roc_auc_score(y_test1, yscore)
prauc = average_precision_score(y_test1, yscore)
f1 = f1_score(y_test1,y_pred1,average="weighted")

print("Model 3:")
print("XGB Accuracy : ", metrics.accuracy_score(y_test1, y_pred1))
print("XGB AUC : ", auc)
print("XGB PRAUC : ", prauc)
print("XGB F1 : ", f1)

Model 3:
XGB Accuracy : 0.9101662463627546
XGB AUC : 0.7071555338161541
XGB PRAUC : 0.5703484018264849
XGB F1 : 0.9022044197488457


In [6]:
# Model 2 (FRAX CRFs)
xgb_rscv = xgb.XGBClassifier(gamma=2.657459888168224, 
                             max_depth=4,
                             reg_alpha=4.843365967683508, 
                             reg_lambda=6.781179876154992, 
                             n_estimators=137,
                             learning_rate=0.8204518092103462,
                             subsample=0.7960861103728022)


# Fit the model
model_xgboost = xgb_rscv.fit(x_train_s2, y_train_s2)
y_pred2 = model_xgboost.predict(x_test2)
yscore_raw = model_xgboost.predict_proba(x_test2)
yscore = [s[1] for s in yscore_raw]
fpr, tpr, thresh = roc_curve(y_test2, yscore)
auc = roc_auc_score(y_test2, yscore)
prauc = average_precision_score(y_test2, yscore)
f1 = f1_score(y_test2,y_pred2,average="weighted")

print("Model 2:")
print("XGB Accuracy : ", metrics.accuracy_score(y_test2, y_pred2))
print("XGB AUC : ", auc)
print("XGB PRAUC : ", prauc)
print("XGB F1 : ", f1)

Model 2:
XGB Accuracy : 0.9101662463627546
XGB AUC : 0.7012791751572777
XGB PRAUC : 0.5202288557213933
XGB F1 : 0.9010564919695884


In [7]:
# Model 1 (FRAX CRFs)
xgb_rscv = xgb.XGBClassifier(gamma=0.1, 
                             max_depth=5,
                             reg_alpha=1, 
                             reg_lambda=100, 
                             n_estimators=400,
                             learning_rate=1,
                             subsample=0.8)



# Fit the model
model_xgboost = xgb_rscv.fit(x_train_s2, y_train_s2)
y_pred2 = model_xgboost.predict(x_test2)
yscore_raw = model_xgboost.predict_proba(x_test2)
yscore = [s[1] for s in yscore_raw]
fpr, tpr, thresh = roc_curve(y_test2, yscore)
auc = roc_auc_score(y_test2, yscore)
prauc = average_precision_score(y_test2, yscore)
f1 = f1_score(y_test2,y_pred2,average="weighted")

print("Model 1:")
print("XGB Accuracy : ", metrics.accuracy_score(y_test2, y_pred2))
print("XGB AUC : ", auc)
print("XGB PRAUC : ", prauc)
print("XGB F1 : ", f1)

Model 1:
XGB Accuracy : 0.9082444228903976
XGB AUC : 0.6907618659108573
XGB PRAUC : 0.520095238095238
XGB F1 : 0.8991749027804148


#### hip

In [8]:
# dataset with FRAX CRFs and GRS
with open('ready_whi_sp23_hip_py38', 'rb') as file_handler:
    data = pickle.load(file_handler)
    X1, Y1 = data.get('X', []).values, data.get('Y', []).values
x_train1, x_test1, y_train1, y_test1 = train_test_split(X1, Y1, test_size=0.2,random_state=98)
sm = SMOTE(random_state=2)
x_train_s1, y_train_s1 = sm.fit_resample(x_train1, y_train1)

In [9]:
# dataset with FRAX CRF (no grs)
with open('ready_whi_sp23_hip_py38', 'rb') as file_handler:
    data = pickle.load(file_handler)
    X2, Y2 = data.get('X_nogrs', []).values, data.get('Y', []).values
x_train2, x_test2, y_train2, y_test2 = train_test_split(X2, Y2, test_size=0.2,random_state=98)
sm = SMOTE(random_state=2)
x_train_s2, y_train_s2 = sm.fit_resample(x_train2, y_train2)

In [10]:
# Model 4 (FRAX CRFs + GRS)
xgb_rscv = xgb.XGBClassifier( 
                             gamma=8.667459948122074, 
                             max_depth=6,
                             reg_alpha=5.948836051062213, 
                             reg_lambda=7.831670870285606, 
                             n_estimators=38,
                             learning_rate=0.6897253437008639,
                             subsample=0.4411950009198089)


#fit the model
model_xgboost = xgb_rscv.fit(x_train_s1, y_train_s1)
y_pred1 = model_xgboost.predict(x_test1)
yscore_raw = model_xgboost.predict_proba(x_test1)
yscore = [s[1] for s in yscore_raw]
fpr, tpr, thresh = roc_curve(y_test1, yscore)
fpr1, tpr1, thresh1 = roc_curve(y_test1, y_pred1)
auc = roc_auc_score(y_test1, yscore)
prauc = average_precision_score(y_test1, yscore)
f1 = f1_score(y_test1,y_pred1,average="weighted")

print("Model 4: ")
print("XGB Accuracy : ", metrics.accuracy_score(y_test1, y_pred1))
print("XGB AUC : ", auc)
print("XGB PRAUC : ", prauc)
print("XGB F1 : ", f1)

Model 4:
XGB Accuracy : 0.9857045675413022
XGB AUC : 0.7976665372560652
XGB PRAUC : 0.5768571428571428
XGB F1 : 0.9763283816339704


In [11]:
# Model 3 (FRAX CRFs + GRS)
xgb_rscv = xgb.XGBClassifier( 
                             gamma=0.001, 
                             max_depth=5,
                             reg_alpha=10, 
                             reg_lambda=10, 
                             n_estimators=400,
                             learning_rate=1,
                             subsample=0.8)



#fit the model
model_xgboost = xgb_rscv.fit(x_train_s1, y_train_s1)
y_pred1 = model_xgboost.predict(x_test1)
yscore_raw = model_xgboost.predict_proba(x_test1)
yscore = [s[1] for s in yscore_raw]
fpr, tpr, thresh = roc_curve(y_test1, yscore)
fpr1, tpr1, thresh1 = roc_curve(y_test1, y_pred1)
auc = roc_auc_score(y_test1, yscore)
prauc = average_precision_score(y_test1, yscore)
f1 = f1_score(y_test1,y_pred1,average="weighted")

print("Model 3: ")
print("XGB Accuracy : ", metrics.accuracy_score(y_test1, y_pred1))
print("XGB AUC : ", auc)
print("XGB PRAUC : ", prauc)
print("XGB F1 : ", f1)

Model 3:
XGB Accuracy : 0.9852283770651118
XGB AUC : 0.7586610517083493
XGB PRAUC : 0.5768571428571428
XGB F1 : 0.9813544834704495


In [12]:
# Model 2 (FRAX CRFs)
xgb_rscv = xgb.XGBClassifier(gamma=2.6699906002181897, 
                             max_depth=4,
                             reg_alpha=5.9097900106221335, 
                             reg_lambda=10.80125087028611, 
                             n_estimators=109,
                             learning_rate=0.99070348870674,
                             subsample=0.24000103603942474)



# Fit the model
model_xgboost = xgb_rscv.fit(x_train_s2, y_train_s2)
y_pred2 = model_xgboost.predict(x_test2)
yscore_raw = model_xgboost.predict_proba(x_test2)
yscore = [s[1] for s in yscore_raw]
fpr, tpr, thresh = roc_curve(y_test2, yscore)
auc = roc_auc_score(y_test2, yscore)
prauc = average_precision_score(y_test2, yscore)
f1 = f1_score(y_test2,y_pred2,average="weighted")

print("Model 2: ")
print("XGB Accuracy : ", metrics.accuracy_score(y_test2, y_pred2))
print("XGB AUC : ", auc)
print("XGB PRAUC : ", prauc)
print("XGB F1 : ", f1)

Model 2:
XGB Accuracy : 0.9770651117589894
XGB AUC : 0.7521217319523803
XGB PRAUC : 0.5579491525423728
XGB F1 : 0.9762762428415672


In [13]:
# Model 1 (FRAX CRFs)
xgb_rscv = xgb.XGBClassifier(gamma=0.1, 
                             max_depth=5,
                             reg_alpha=1, 
                             reg_lambda=100, 
                             n_estimators=400,
                             learning_rate=1,
                             subsample=0.5)



# Fit the model
model_xgboost = xgb_rscv.fit(x_train_s2, y_train_s2)
y_pred2 = model_xgboost.predict(x_test2)
yscore_raw = model_xgboost.predict_proba(x_test2)
yscore = [s[1] for s in yscore_raw]
fpr, tpr, thresh = roc_curve(y_test2, yscore)
auc = roc_auc_score(y_test2, yscore)
prauc = average_precision_score(y_test2, yscore)
f1 = f1_score(y_test2,y_pred2,average="weighted")

print("Model 1: ")
print("XGB Accuracy : ", metrics.accuracy_score(y_test2, y_pred2))
print("XGB AUC : ", auc)
print("XGB PRAUC : ", prauc)
print("XGB F1 : ", f1)

Model 1:
XGB Accuracy : 0.9770621963070942
XGB AUC : 0.7511447034089589
XGB PRAUC : 0.5584782608695652
XGB F1 : 0.9804868875127666
