Regression


In [25]:
from xgboost import XGBRegressor
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [26]:
x, y = make_regression(n_samples=1000, n_features=10, random_state=1, noise=10)

In [27]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)

In [28]:
xgb = XGBRegressor()

In [30]:
y_pred = xgb.predict(X_test)

In [31]:
print(f"R2 Score: {r2_score(y_test, y_pred)}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred)}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred)}")

R2 Score: 0.8642269333605866
Mean Squared Error: 4589.770499843805
Mean Absolute Error: 51.14635320325199


In [32]:
# Hyper para meter

param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1, 0.2, 0.3]

}

In [33]:
from sklearn.model_selection import GridSearchCV

grid_search = GridSearchCV(estimator=xgb, param_grid=param_grid, cv=5, verbose=2, n_jobs=-1)

In [34]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 15 candidates, totalling 75 fits


In [35]:
grid_search.best_params_

{'learning_rate': 0.05, 'n_estimators': 300}

In [36]:
best_model_r = grid_search.best_estimator_

In [37]:
y_predr = best_model_r.predict(X_test)

In [38]:
print(f"R2 Score: {r2_score(y_test, y_predr)}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_predr)}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_predr)}")

R2 Score: 0.8853120139259136
Mean Squared Error: 3876.995255379563
Mean Absolute Error: 46.7884218607966


# Classification

In [39]:
from xgboost import XGBClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [40]:
x, y = make_classification(n_samples=1000, n_features=10, random_state=1, n_classes=2)

In [41]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)

In [42]:
xgc = XGBClassifier()

In [43]:
xgc.fit(X_train, y_train)

In [44]:
y_predC = xgc.predict(X_test)

In [47]:
print(f"Accuracy Score: {accuracy_score(y_test, y_predC)}")
print(f"Classification Report:\n {classification_report(y_test, y_predC)}")
print(f"Confusion Matrix:\n {confusion_matrix(y_test, y_predC)}")

Accuracy Score: 0.89
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.87      0.89       105
           1       0.86      0.92      0.89        95

    accuracy                           0.89       200
   macro avg       0.89      0.89      0.89       200
weighted avg       0.89      0.89      0.89       200

Confusion Matrix:
 [[91 14]
 [ 8 87]]


In [48]:
# hyperParams
param_gridc ={
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1, 0.2, 0.3]

}

grid_searchC = GridSearchCV(estimator=xgc, param_grid=param_gridc, cv=5, verbose=2, n_jobs=-1)

In [49]:
grid_searchC.fit(X_train, y_train)

Fitting 5 folds for each of 15 candidates, totalling 75 fits


In [50]:
grid_searchC.best_params_

{'learning_rate': 0.01, 'n_estimators': 300}

In [51]:
best_model_c = grid_searchC.best_estimator_

In [52]:
y_pred_c = best_model_c.predict(X_test)

In [53]:
print(f"Accuracy Score: {accuracy_score(y_test, y_pred_c)}")
print(f"Classification Report:\n {classification_report(y_test, y_pred_c)}")
print(f"Confusion Matrix:\n {confusion_matrix(y_test, y_pred_c)}")

Accuracy Score: 0.89
Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.88      0.89       105
           1       0.87      0.91      0.89        95

    accuracy                           0.89       200
   macro avg       0.89      0.89      0.89       200
weighted avg       0.89      0.89      0.89       200

Confusion Matrix:
 [[92 13]
 [ 9 86]]
