In [1]:
from imblearn.over_sampling import SMOTE
from scipy import stats
from sklearn import metrics
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve,f1_score
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
import numpy
import pandas as pd
import pickle
import time
from bayes_opt import BayesianOptimization
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
import warnings
warnings.filterwarnings("ignore")

#### mof

In [2]:
# dataset with FRAX CRFs and GRS
with open('ready_whi_sp23', 'rb') as file_handler:
    data = pickle.load(file_handler)
    X1, Y1 = data.get('X', []).values, data.get('Y', []).values
x_train1, x_test1, y_train1, y_test1 = train_test_split(X1, Y1, test_size=0.2,random_state=98)
sm = SMOTE(random_state=2)
x_train_s1, y_train_s1 = sm.fit_resample(x_train1, y_train1)

In [3]:
# dataset with FRAX CRFs (no grs)
with open('ready_whi_sp23', 'rb') as file_handler:
    data = pickle.load(file_handler)
    X2, Y2 = data.get('X_nogrs', []).values, data.get('Y', []).values
x_train2, x_test2, y_train2, y_test2 = train_test_split(X2, Y2, test_size=0.2,random_state=98)
sm = SMOTE(random_state=2)
x_train_s2, y_train_s2 = sm.fit_resample(x_train2, y_train2)

In [4]:
# Bayesian optimization for Model 4 (FRAX CRFs + GRS)
def svc_cv(C, gamma, data, targets):
    estimator = SVC(C=C, gamma=gamma, random_state=2)
    cval = cross_val_score(estimator, data, targets, scoring='roc_auc', 
                           cv=10)
    return cval.mean()

def optimize_svc(data, targets):
    def svc_crossval(C, gamma):
        C = C
        gamma = gamma
        return svc_cv(C=C, gamma=gamma, data=data, targets=targets)
    optimizer = BayesianOptimization( f=svc_crossval,
            pbounds={"C": (0.01,100), 
                     "gamma": (0.01,100)
                    },
            verbose=0,
            allow_duplicate_points=True
            )
    optimizer.maximize(n_iter=4)

    print("Final result:", optimizer.max)
    
if __name__ == "__main__":
    print("--- Optimizing SVM with GRS---")
    optimize_svc(data=x_train_s1, targets=y_train_s1)

--- Optimizing SVM with GRS---
[91mData point [8.01144642 8.51685839] is not unique. 1 duplicates registered. Continuing ...[0m
[91mData point [8.01144642 8.51685839] is not unique. 2 duplicates registered. Continuing ...[0m
Final result: {'target': 0.9294468144853625, 'params': {'C': 8.011446423096377, 'gamma': 8.516858389169544}}


In [5]:
# Bayesian optimization for Model 2 (FRAX CRFs)
def svc_cv(C, gamma, data, targets):
    estimator = SVC(C=C, gamma=gamma, random_state=2)
    cval = cross_val_score(estimator, data, targets, scoring='roc_auc', 
                           cv=10)
    return cval.mean()

def optimize_svc(data, targets):
    def svc_crossval(C, gamma):
        C = C
        gamma = gamma
        return svc_cv(C=C, gamma=gamma, data=data, targets=targets)
    optimizer = BayesianOptimization( f=svc_crossval,
            pbounds={"C": (0.01,100), 
                     "gamma": (0.01,100)
                    },
            verbose=0,
            allow_duplicate_points=True
            )
    optimizer.maximize(n_iter=4)

    print("Final result:", optimizer.max)
    
if __name__ == "__main__":
    print("--- Optimizing SVM with no GRS---")
    optimize_svc(data=x_train_s2, targets=y_train_s2)

--- Optimizing SVM with no GRS---
[91mData point [11.71429574 20.90040947] is not unique. 1 duplicates registered. Continuing ...[0m
[91mData point [11.71429574 20.90040947] is not unique. 2 duplicates registered. Continuing ...[0m
[91mData point [11.71429574 20.90040947] is not unique. 3 duplicates registered. Continuing ...[0m
[91mData point [11.71429574 20.90040947] is not unique. 4 duplicates registered. Continuing ...[0m
[91mData point [11.71429574 20.90040947] is not unique. 5 duplicates registered. Continuing ...[0m
[91mData point [11.71429574 20.90040947] is not unique. 6 duplicates registered. Continuing ...[0m
[91mData point [11.71429574 20.90040947] is not unique. 7 duplicates registered. Continuing ...[0m
[91mData point [11.71429574 20.90040947] is not unique. 8 duplicates registered. Continuing ...[0m
Final result: {'target': 0.9105075211260415, 'params': {'C': 11.714295736628298, 'gamma': 20.900409473930125}}


In [14]:
# grid search for Model 3 (FRAX CRFs + GRS)
parameters = {
        'C': [0.01,10,100],
        'gamma': [0.01,10,100]
     }
svc = SVC(random_state=2)

model = GridSearchCV(estimator=svc, 
                    param_grid=parameters,cv=10).fit(x_train_s1, y_train_s1)
print(model.best_params_)

{'C': 10, 'gamma': 10}


In [15]:
# grid search for Model 1 (FRAX CRFs)
parameters = {
        'C': [0.01,10,100],
        'gamma': [0.01,10,100]
     }
svc = SVC(random_state=2)

model = GridSearchCV(estimator=svc, 
                    param_grid=parameters,cv=10).fit(x_train_s2, y_train_s2)
print(model.best_params_)

{'C': 10, 'gamma': 10}


#### hip

In [16]:
# dataset with FRAX CRFs and GRS
with open('ready_whi_sp23_hip_py38', 'rb') as file_handler:
    data = pickle.load(file_handler)
    X1, Y1 = data.get('X', []).values, data.get('Y', []).values
x_train1, x_test1, y_train1, y_test1 = train_test_split(X1, Y1, test_size=0.2,random_state=98)
sm = SMOTE(random_state=2)
x_train_s1, y_train_s1 = sm.fit_resample(x_train1, y_train1)

In [17]:
# dataset with FRAX CRF (no grs)
with open('ready_whi_sp23_hip_py38', 'rb') as file_handler:
    data = pickle.load(file_handler)
    X2, Y2 = data.get('X_nogrs', []).values, data.get('Y', []).values
x_train2, x_test2, y_train2, y_test2 = train_test_split(X2, Y2, test_size=0.2,random_state=98)
sm = SMOTE(random_state=2)
x_train_s2, y_train_s2 = sm.fit_resample(x_train2, y_train2)

In [18]:
# Bayesisn optimization for Model 4 (FRAX CRFs + GRS)
def svc_cv(C, gamma, data, targets):
    estimator = SVC(C=C, gamma=gamma, random_state=2)
    cval = cross_val_score(estimator, data, targets, scoring='roc_auc', 
                           cv=10) 
    return cval.mean()

def optimize_svc(data, targets):
    def svc_crossval(C, gamma):
        C = C
        gamma = gamma
        return svc_cv(C=C, gamma=gamma, data=data, targets=targets)
    optimizer = BayesianOptimization( f=svc_crossval,
            pbounds={"C": (0.01,100), 
                     "gamma": (0.01,100)
                    },
            verbose=0,
            allow_duplicate_points=True
            )
    optimizer.maximize(n_iter=4)

    print("Final result:", optimizer.max)
    
if __name__ == "__main__":
    print("--- Optimizing SVM with GRS---")
    optimize_svc(data=x_train_s1, targets=y_train_s1)

--- Optimizing SVM with GRS---
Final result: {'target': 0.9623814314467856, 'params': {'C': 1.2351702914303146, 'gamma': 1.032948099274697}}


In [19]:
# Bayesisn optimization for Model 2 (FRAX CRFs)
def svc_cv(C, gamma, data, targets):
    estimator = SVC(C=C, gamma=gamma, random_state=2)
    cval = cross_val_score(estimator, data, targets, scoring='roc_auc', 
                           cv=10)
    return cval.mean()

def optimize_svc(data, targets):
    def svc_crossval(C, gamma):
        C = C
        gamma = gamma
        return svc_cv(C=C, gamma=gamma, data=data, targets=targets)
    optimizer = BayesianOptimization( f=svc_crossval,
            pbounds={"C": (0.01,100), 
                     "gamma": (0.01,100)
                    },
            verbose=0,
            allow_duplicate_points=True
            )
    optimizer.maximize(n_iter=4)

    print("Final result:", optimizer.max)
    
if __name__ == "__main__":
    print("--- Optimizing SVM with no GRS---")
    optimize_svc(data=x_train_s2, targets=y_train_s2)

--- Optimizing SVM with no GRS---
[91mData point [1.09533018 1.07172558] is not unique. 1 duplicates registered. Continuing ...[0m
[91mData point [1.09533018 1.07172558] is not unique. 2 duplicates registered. Continuing ...[0m
[91mData point [1.09533018 1.07172558] is not unique. 3 duplicates registered. Continuing ...[0m
[91mData point [1.09533018 1.07172558] is not unique. 4 duplicates registered. Continuing ...[0m
Final result: {'target': 0.9630563501453739, 'params': {'C': 1.0953301847329402, 'gamma': 1.0717255846156497}}


In [20]:
# grid search for Model 3 (FRAX CRFs + GRS)
parameters = {
        'C': [0.01,5,100],
        'gamma': [0.01,5,100]
     }
svc = SVC(random_state=2)

model = GridSearchCV(estimator=svc, 
                    param_grid=parameters,cv=10).fit(x_train_s1, y_train_s1) 
print(model.best_params_)

{'C': 5, 'gamma': 5}


In [21]:
# grid search for Model 1 (FRAX CRFs)
parameters = {
        'C': [0.01,5,100],
        'gamma': [0.01,5,100]
     }

svc = SVC(random_state=2)

model = GridSearchCV(estimator=svc, 
                    param_grid=parameters,cv=10).fit(x_train_s2, y_train_s2) 
print(model.best_params_)

{'C': 5, 'gamma': 5}
