In [1]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier

In [58]:
X,y = make_classification(
    n_samples=1000,
    n_features=10,
    n_informative=8,
    n_redundant=2,
    n_classes=2,
    random_state=42
)

In [101]:
gbc = GradientBoostingClassifier(
    loss='log_loss',
    learning_rate=0.001,
    n_estimators=100,
    max_depth=3,
    subsample=0.8,
    random_state=42
)

In [102]:
X_train,X_test,y_train,y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42
)

In [103]:
gbc.fit(X_train,y_train)

In [104]:
# Evaluate model prediction
from sklearn.metrics import accuracy_score, classification_report

y_pred = gbc.predict(X_test)
print(f"Accuracy : {accuracy_score(y_test,y_pred)}")
print(f"Classification Report : \n{classification_report(y_test,y_pred)}")

Accuracy : 0.735
Classification Report : 
              precision    recall  f1-score   support

           0       0.79      0.68      0.73       106
           1       0.69      0.80      0.74        94

    accuracy                           0.73       200
   macro avg       0.74      0.74      0.73       200
weighted avg       0.74      0.73      0.73       200



In [105]:
# Checking Model is Overfitting & Underfitting
y_train_pred = gbc.predict(X_train)
y_test_pred = gbc.predict(X_test)

print(f"Training Accuracy : {accuracy_score(y_train, y_train_pred)}")
print(f"Testing Accuracy : {accuracy_score(y_test, y_test_pred)}")

Training Accuracy : 0.81
Testing Accuracy : 0.735


In [117]:
# Model shows slight overfitting

In [107]:
from sklearn.model_selection import cross_val_score

cv_scores = cross_val_score(gbc, X_train, y_train, cv=5)
print(cv_scores.mean())


0.74625


In [108]:
# Hyperparameter Tuning
from sklearn.model_selection import GridSearchCV

model = GradientBoostingClassifier()

parameters = {
    "learning_rate":[0.0001,0.001,0.01,0.1],
    "max_depth":[2,3,4,5],
    "n_estimators":[80,90,100,120]
}

gridsearchcv = GridSearchCV(
    estimator = model,
    param_grid= parameters,
    cv = 5,
    scoring ="accuracy"
)

In [110]:
gridsearchcv.fit(X_train,y_train)

In [111]:
print("Best Parameter:", gridsearchcv.best_params_)
print("Best Score:", gridsearchcv.best_score_)
print("Best Estimators:", gridsearchcv.best_estimator_)

Best Parameter: {'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 100}
Best Score: 0.8787499999999999
Best Estimators: GradientBoostingClassifier(max_depth=4)


In [114]:
# Check gridsearchcv Overfitting & underfitting
y_pred_train_grid = gridsearchcv.predict(X_train)
y_pred_test_grid = gridsearchcv.predict(X_test)

print("GridSearch Training Acuracy:", accuracy_score(y_train,y_pred_train_grid))
print("GridSearch Testing Accuracy:", accuracy_score(y_test, y_pred_test_grid))

GridSearch Training Acuracy: 0.9975
GridSearch Testing Accuracy: 0.845
