In [1]:
from imblearn.over_sampling import SMOTE
from scipy import stats
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve,f1_score,average_precision_score,precision_recall_curve
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
import numpy
import numpy as np
import pandas as pd
import pickle
from sklearn.linear_model import LogisticRegression
from sklearn import tree
from sklearn.naive_bayes import GaussianNB

#### mof

In [2]:
# dataset with FRAX CRFs and GRS
with open('ready_whi_sp23', 'rb') as file_handler:
    data = pickle.load(file_handler)
    X1, Y1 = data.get('X', []).values, data.get('Y', []).values
x_train1, x_test1, y_train1, y_test1 = train_test_split(X1, Y1, test_size=0.2,random_state=98)
sm = SMOTE(random_state=2)
x_train_s1, y_train_s1 = sm.fit_resample(x_train1, y_train1)

In [3]:
# dataset with FRAX CRFs (no grs)
with open('ready_whi_sp23', 'rb') as file_handler:
    data = pickle.load(file_handler)
    X2, Y2 = data.get('X_nogrs', []).values, data.get('Y', []).values
x_train2, x_test2, y_train2, y_test2 = train_test_split(X2, Y2, test_size=0.2,random_state=98)
sm = SMOTE(random_state=2)
x_train_s2, y_train_s2 = sm.fit_resample(x_train2, y_train2)

In [4]:
# Model 4 (FRAX CRFs + GRS)
rf = RandomForestClassifier(n_estimators=90,
                            max_depth=8, min_samples_split=2, 
                            max_features=0.9985845161097171,
                            random_state=45)
model_rf = rf.fit(x_train_s1, y_train_s1)
y_pred1 = model_rf.predict(x_test1)
yscore_raw = model_rf.predict_proba(x_test1)
yscore = [s[1] for s in yscore_raw]
fpr, tpr, thresh = roc_curve(y_test1, yscore)
auc = roc_auc_score(y_test1, yscore)
prauc = average_precision_score(y_test1, yscore)
f1 = f1_score(y_test1,y_pred1,average="weighted")

print("Model 4: ")
print("RF Accuracy : ", metrics.accuracy_score(y_test1, y_pred1))
print("RF AUC : ", auc)
print("RF PRAUC : ", prauc)
print("RF F1 : ", f1)

Model 4:
RF Accuracy : 0.7846750727449079
RF AUC : 0.7577652840665227
RF PRAUC : 0.5774018691588785
RF F1 : 0.8351205412859825


In [5]:
# Model 3 (FRAX CRFs + GRS)
rf = RandomForestClassifier(n_estimators=100,
                            max_depth=8, min_samples_split=10, 
                            max_features=0.999,
                            random_state=45)
model_rf = rf.fit(x_train_s1, y_train_s1)
y_pred1 = model_rf.predict(x_test1)
yscore_raw = model_rf.predict_proba(x_test1)
yscore = [s[1] for s in yscore_raw]
fpr, tpr, thresh = roc_curve(y_test1, yscore)
auc = roc_auc_score(y_test1, yscore)
prauc = average_precision_score(y_test1, yscore)
f1 = f1_score(y_test1,y_pred1,average="weighted")

print("Model 3: ")
print("RF Accuracy : ", metrics.accuracy_score(y_test1, y_pred1))
print("RF AUC : ", auc)
print("RF PRAUC : ", prauc)
print("RF F1 : ", f1)

Model 3:
RF Accuracy : 0.7819592628516003
RF AUC : 0.7568214890115059
RF PRAUC : 0.5699818496110631
RF F1 : 0.8333049599517579


In [6]:
# Model 2 (FRAX CRFs)
rf_nogrs = RandomForestClassifier(n_estimators=100,
                                   max_depth=8, min_samples_split=9, max_features=0.9986730679991317,                                   
                                   random_state=45)
model_rf_nogrs = rf_nogrs.fit(x_train_s2, y_train_s2)
y_pred2 = model_rf_nogrs.predict(x_test2)
yscore_raw = model_rf_nogrs.predict_proba(x_test2)
yscore = [s[1] for s in yscore_raw]
fpr, tpr, thresh = roc_curve(y_test2, yscore)
auc = roc_auc_score(y_test2, yscore)
prauc = average_precision_score(y_test2, yscore)
f1 = f1_score(y_test2,y_pred2,average="weighted")

print("Model 2: ")
print("RF Accuracy : ", metrics.accuracy_score(y_test2, y_pred2))
print("RF AUC : ", auc)
print("RF PRAUC : ", prauc)
print("RF F1 : ", f1)

Model 2:
RF Accuracy : 0.7627546071774975
RF AUC : 0.6946338049408588
RF PRAUC : 0.5138224299065423
RF F1 : 0.8210035447920947


In [7]:
# Model 1 (FRAX CRFs)
rf_nogrs = RandomForestClassifier(n_estimators=20,
                                   max_depth=8, min_samples_split=2, max_features=0.999, 
                                   random_state=45)
model_rf_nogrs = rf_nogrs.fit(x_train_s2, y_train_s2)
y_pred2 = model_rf_nogrs.predict(x_test2)
yscore_raw = model_rf_nogrs.predict_proba(x_test2)
yscore = [s[1] for s in yscore_raw]
fpr, tpr, thresh = roc_curve(y_test2, yscore)
auc = roc_auc_score(y_test2, yscore)
prauc = average_precision_score(y_test2, yscore)
f1 = f1_score(y_test2,y_pred2,average="weighted")

print("Model 1: ")
print("RF Accuracy : ", metrics.accuracy_score(y_test2, y_pred2))
print("RF AUC : ", auc)
print("RF PRAUC : ", prauc)
print("RF F1 : ", f1)

Model 1:
RF Accuracy : 0.7624606207565471
RF AUC : 0.6821967838145431
RF PRAUC : 0.5200904130943103
RF F1 : 0.8201444801937245


#### hip

In [8]:
# dataset with FRAX CRFs and GRS
with open('ready_whi_sp23_hip_py38', 'rb') as file_handler:
    data = pickle.load(file_handler)
    X1, Y1 = data.get('X', []).values, data.get('Y', []).values
x_train1, x_test1, y_train1, y_test1 = train_test_split(X1, Y1, test_size=0.2,random_state=98)
sm = SMOTE(random_state=2)
x_train_s1, y_train_s1 = sm.fit_resample(x_train1, y_train1)

In [9]:
# dataset with FRAX CRF (no grs)
with open('ready_whi_sp23_hip_py38', 'rb') as file_handler:
    data = pickle.load(file_handler)
    X2, Y2 = data.get('X_nogrs', []).values, data.get('Y', []).values
x_train2, x_test2, y_train2, y_test2 = train_test_split(X2, Y2, test_size=0.2,random_state=98)
sm = SMOTE(random_state=2)
x_train_s2, y_train_s2 = sm.fit_resample(x_train2, y_train2)

In [10]:
# Model 4 (FRAX CRFs + GRS)
rf = RandomForestClassifier(n_estimators=90,
                            max_depth=8, 
                            min_samples_split=2, 
                            max_features=0.7700001278592569,
                            random_state=45)
model_rf = rf.fit(x_train_s1, y_train_s1)
y_pred1 = model_rf.predict(x_test1)
yscore_raw = model_rf.predict_proba(x_test1)
yscore = [s[1] for s in yscore_raw]
fpr, tpr, thresh = roc_curve(y_test1, yscore)
auc = roc_auc_score(y_test1, yscore)
prauc = average_precision_score(y_test1, yscore)
f1 = f1_score(y_test1,y_pred1,average="weighted")

print("Model 4: ")
print("RF Accuracy : ", metrics.accuracy_score(y_test1, y_pred1))
print("RF AUC : ", auc)
print("RF PRAUC : ", prauc)
print("RF F1 : ", f1)

Model 4:
RF Accuracy : 0.8530612244897959
RF AUC : 0.8002603410248804
RF PRAUC : 0.6701967213114755
RF F1 : 0.9101745644401951


In [11]:
# Model 3 (FRAX CRFs + GRS)
rf = RandomForestClassifier(n_estimators=100,
                            max_depth=5, min_samples_split=10, 
                            max_features=0.999,
                            random_state=45)
model_rf = rf.fit(x_train_s1, y_train_s1)
y_pred1 = model_rf.predict(x_test1)
yscore_raw = model_rf.predict_proba(x_test1)
yscore = [s[1] for s in yscore_raw]
fpr, tpr, thresh = roc_curve(y_test1, yscore)
auc = roc_auc_score(y_test1, yscore)
prauc = average_precision_score(y_test1, yscore)
f1 = f1_score(y_test1,y_pred1,average="weighted")

print("Model 3: ")
print("RF Accuracy : ", metrics.accuracy_score(y_test1, y_pred1))
print("RF AUC : ", auc)
print("RF PRAUC : ", prauc)
print("RF F1 : ", f1)

Model 3:
RF Accuracy : 0.8161321671525753
RF AUC : 0.7970263507500226
RF PRAUC : 0.5767704918032787
RF F1 : 0.8881644799890457


In [12]:
# Model 2 (FRAX CRFs)
rf_nogrs = RandomForestClassifier(n_estimators=282,
                                   max_depth=7, min_samples_split=9, max_features=0.565296273916827, 
                                   random_state=45)
model_rf_nogrs = rf_nogrs.fit(x_train_s2, y_train_s2)
y_pred2 = model_rf_nogrs.predict(x_test2)
yscore_raw = model_rf_nogrs.predict_proba(x_test2)
yscore = [s[1] for s in yscore_raw]
fpr, tpr, thresh = roc_curve(y_test2, yscore)
auc = roc_auc_score(y_test2, yscore)
prauc = average_precision_score(y_test2, yscore)
f1 = f1_score(y_test2,y_pred2,average="weighted")

print("Model 2: ")
print("RF Accuracy : ", metrics.accuracy_score(y_test2, y_pred2))
print("RF AUC : ", auc)
print("RF PRAUC : ", prauc)
print("RF F1 : ", f1)

Model 2:
RF Accuracy : 0.8167152575315842
RF AUC : 0.6880019785634133
RF PRAUC : 0.6333770491803278
RF F1 : 0.8880188571478942


In [13]:
# Model 1 (FRAX CRFs)
rf_nogrs = RandomForestClassifier(n_estimators=20,
                                   max_depth=8, min_samples_split=2, max_features=0.999, 
                                   random_state=45)
model_rf_nogrs = rf_nogrs.fit(x_train_s2, y_train_s2)
y_pred2 = model_rf_nogrs.predict(x_test2)
yscore_raw = model_rf_nogrs.predict_proba(x_test2)
yscore = [s[1] for s in yscore_raw]
fpr, tpr, thresh = roc_curve(y_test2, yscore)
auc = roc_auc_score(y_test2, yscore)
prauc = average_precision_score(y_test2, yscore)
f1 = f1_score(y_test2,y_pred2,average="weighted")

print("Model 1: ")
print("RF Accuracy : ", metrics.accuracy_score(y_test2, y_pred2))
print("RF AUC : ", auc)
print("RF PRAUC : ", prauc)
print("RF F1 : ", f1)

Model 1:
RF Accuracy : 0.8069203109815354
RF AUC : 0.6851565825282789
RF PRAUC : 0.5581967213114754
RF F1 : 0.8130117072337278
