In [None]:
pip install xgboost

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_splits, StratifiedKFold, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.preprocessing import StandardScaler
from xgboost import XBGClassifier

In [None]:
df = pd.read_csv('diabetes.csv')
df

In [None]:
x = df.drop('class', axis=1)
y = df['class']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)
print(x_train_scaled)
print("-----------------------------------------------")
print(x_test_scaled)

In [None]:
xgb = XBGClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)

param_grid = {
    'n_estimators': [100, 150, 200, 300],
    'learning_rate': [0.01, 0.1, 0.15],
    'max_depth': [2, 3, 4, 5],
    'subsample': [0.8, 1.0],
    'colsample_bytree':[0.8, 1.0]
}

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

grid_search = GridSearchCV(estimator=xgb,
                           param_grid=param_grid,
                           scoring='recall',
                           cv=skf,
                           n_jobs=-1,
                           verbose=1)


In [None]:
grid_search.fit(x_train_scaled, y_train)


best_model = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)
print("Best Cross-Validation Recall:", grid_search.best_score_)

y_pred = best_model.predict(x_test_scaled)

In [None]:
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Identify feature importance scores using XGBClassifier

In [None]:
best_model.feature_importances_

In [None]:
features = pd.DataFrame(best_model.feature_importances_, index = df.iloc[:,:-1].columns, columns=["Importance"]
df1= features.sort_values(by = "Importances")
df1

In [None]:
import seaborn as sns
sns.barplot(data = df1, 