In [30]:
import xgboost as xgb
from sklearn.model_selection import cross_val_score
from bayes_opt import BayesianOptimization
import pandas as pd

In [31]:
df = pd.read_csv('../data/diabetes.csv')

In [32]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [33]:
X = df.drop("Outcome", axis=1)
y = df["Outcome"]

In [34]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [35]:
def xGb():
    clf = xgb.XGBClassifier()
    clf.fit(X_train , y_train)
    pred = clf.predict(X_test) 
    return pred

In [36]:
from optuna import create_study, Trial

def objective(trial: Trial):
    params = {
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 50, 400),
        'gamma': trial.suggest_loguniform('gamma', 0.1, 5),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_uniform('subsample', 0.5, 1),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1),
    }


def optuna_xgb():
    study = create_study(direction='maximize')
    n_trials = 75
    try:
        study.optimize(objective, n_trials=n_trials)
    except :
        print("Optimization stopped")    
    best_trial = study.best_trial
    best_params = best_trial.params
    best_model = xgb.XGBClassifier(**best_params)
    best_model.fit(X_train, y_train)
    predictions = best_model.predict(X_test)
    return predictions


In [37]:
from Optimizer import Optimizer

def gen_xgb():

    hyperparameter_ranges = {
  "learning_rate": [(0.01, 0.3), float],
  "n_estimators": [(50, 200), int],
  "max_depth": [(3, 10), int],
  "min_child_weight": [(1, 10), int],
  "subsample": [(0.5, 1.0), float],
  "colsample_bytree": [(0.5, 1.0), float],
  "gamma": [(0, 1), float],
  "objective": [["binary:logistic"], str]
    }

    model = xgb.XGBClassifier()
    model_type = "classifier"
    pops = 10
    generations = 100
    mutation_rate = 0.3
    optimizer = Optimizer(X_train,y_train,model=model,model_type=model_type ,ranges = hyperparameter_ranges,population=pops , max_generation=generations , mutation_rate=mutation_rate)
    optimizer.search()
    best_params = optimizer.get_best_params()

    tuned_model = model.set_params(**best_params)

    model.fit(X_train , y_train)
    tuned_model.fit(X_train , y_train)

    pred = model.predict(X_test)
    tuned_pred  = tuned_model.predict(X_test)

    return tuned_pred


In [38]:
from sklearn.metrics import accuracy_score,classification_report

pred = xGb()
score = accuracy_score(pred , y_test)
report = classification_report(xGb() , y_test)
print(score , report)


0.7077922077922078               precision    recall  f1-score   support

           0       0.74      0.79      0.76        92
           1       0.65      0.58      0.62        62

    accuracy                           0.71       154
   macro avg       0.70      0.69      0.69       154
weighted avg       0.70      0.71      0.70       154



In [39]:
from sklearn.metrics import accuracy_score,classification_report

pred = optuna_xgb()
score = accuracy_score(pred , y_test)
report = classification_report(xGb() , y_test)
print(score , report)


[I 2024-02-06 06:46:49,720] A new study created in memory with name: no-name-6cfc0c1d-9d24-4408-b995-a98a625084b9
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'gamma': trial.suggest_loguniform('gamma', 0.1, 5),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1),
[W 2024-02-06 06:46:49,724] Trial 0 failed with parameters: {'max_depth': 6, 'learning_rate': 0.22959529624132619, 'n_estimators': 152, 'gamma': 0.1204037788133369, 'min_child_weight': 4, 'subsample': 0.9331057422081962, 'colsample_bytree': 0.5873464774895779} because of the following error: The value None could not be cast to float..
[W 2024-02-06 06:46:49,725] Trial 0 failed with value None.
[W 2024-02-06 06:46:49,727] Trial 1 failed with parameters: {'max_depth': 3, 'learning_rate': 0.02313774424320856, 'n_estimators': 221, 'gamma': 0.6551995674425265, 'min_child_weight': 3, 'subsample': 0.7426735208315476, 'colsampl

ValueError: No trials are completed yet.

In [None]:
from sklearn.metrics import accuracy_score,classification_report

pred = gen_xgb()
score = accuracy_score(pred , y_test)
report = classification_report(xGb() , y_test)
print(score , report)
