# **GRADIENT BOOSTING CLASSIFIER ALGORITHM**

In [None]:
# Import models and utility functions
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_digits



# Importing the dataset
X, y = load_digits(return_X_y=True)

# Splitting dataset
train_X, test_X, train_y, test_y = train_test_split(X, y,
													test_size = 0.25,
													random_state = 23)

# Instantiate Gradient Boosting Regressor
gbc = GradientBoostingClassifier(n_estimators=300,
								learning_rate=0.05,
								random_state=100,
								max_features=5 )
# Fit to training set
gbc.fit(train_X, train_y)

# Predict on test set
pred_y = gbc.predict(test_X)

# accuracy
acc = accuracy_score(test_y, pred_y)
print("Gradient Boosting Classifier accuracy is : {:.2f}".format(acc))


Gradient Boosting Classifier accuracy is : 0.98


In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Generate synthetic data
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the Gradient Boosting Model with restricted parameters
gbm = GradientBoostingClassifier(
    learning_rate=0.35,  # Too high learning rate
    n_estimators=30,  # Very few trees
    max_depth=1,  # Very shallow trees
    min_samples_split=50,  # High min_samples_split
    min_samples_leaf=20,  # High min_samples_leaf
    subsample=0.4,  # Aggressive subsampling
    random_state=42
)

# Train the model
gbm.fit(X_train, y_train)

# Make predictions
y_pred = gbm.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.4f}")

Model Accuracy: 0.8700


In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Generate synthetic data
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the Gradient Boosting Model with improved parameters
gbm = GradientBoostingClassifier(
    learning_rate=0.1,  # Lower learning rate for better generalization
    n_estimators=100,  # Increase number of trees
    max_depth=4,  # Deeper trees to capture more complexity
    min_samples_split=10,  # Reduce min_samples_split to allow more splits
    min_samples_leaf=5,  # Reduce min_samples_leaf to allow more leaf nodes
    subsample=0.8,  # Less aggressive subsampling
    random_state=42
)

# Train the model
gbm.fit(X_train, y_train)

# Make predictions
y_pred = gbm.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.4f}")

Model Accuracy: 0.9000


# **OVERFITTING SITUATION**

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Generate synthetic data
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the Gradient Boosting Model with overfitting conditions
gbm = GradientBoostingClassifier(
    learning_rate=0.005,  # Too low learning rate
    n_estimators=600,  # Too many trees
    max_depth=7,  # Too deep trees
    min_samples_split=2,  # No constraint on splits
    min_samples_leaf=1,  # No constraint on leaf nodes
    subsample=1.0,  # No subsampling (full dataset used)
    random_state=42
)

# Train the model
gbm.fit(X_train, y_train)

# Make predictions on training and test sets
y_train_pred = gbm.predict(X_train)
y_test_pred = gbm.predict(X_test)

# Evaluate the model
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")


Training Accuracy: 1.0000
Test Accuracy: 0.8950


In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Generate synthetic data
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the Gradient Boosting Model with better generalization
gbm = GradientBoostingClassifier(
    learning_rate=0.1,  # Moderate learning rate
    n_estimators=200,  # Reasonable number of trees
    max_depth=5,  # Balanced depth to avoid overfitting
    min_samples_split=10,  # Constrain splits to prevent overfitting
    min_samples_leaf=5,  # Prevents overly complex leaf nodes
    subsample=0.8,  # Introduce subsampling for regularization
    random_state=42
)

# Train the model
gbm.fit(X_train, y_train)

# Make predictions on training and test sets
y_train_pred = gbm.predict(X_train)
y_test_pred = gbm.predict(X_test)

# Evaluate the model
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")


Training Accuracy: 1.0000
Test Accuracy: 0.9100


# **XGBOOST ALGORITHM**

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Split into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Gradient Boosting model
gbm = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3)
gbm.fit(X_train, y_train)

# Predictions
y_pred = gbm.predict(X_test)

# Evaluate
print(f'GBM Accuracy: {accuracy_score(y_test, y_pred):.2f}')


GBM Accuracy: 0.96


In [None]:
import xgboost as xgb

# Train XGBoost model
xgb_model = xgb.XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
xgb_model.fit(X_train, y_train)

# Predictions
y_pred_xgb = xgb_model.predict(X_test)

# Evaluate
print(f'XGBoost Accuracy: {accuracy_score(y_test, y_pred_xgb):.2f}')


XGBoost Accuracy: 0.96


# **XGB ALGORITHM**

In [None]:
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

# Load dataset
data = load_breast_cancer()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define XGBoost Classifier with optimized parameters
xgb_clf = xgb.XGBClassifier(
    n_estimators=150,
    max_depth=6,
    learning_rate=0.2,
    subsample=0.8,
    colsample_bytree=0.7,
    reg_lambda=5,
    reg_alpha=1,
    n_jobs=1
)

# Train the model
xgb_clf.fit(X_train, y_train)

# Evaluate the model
accuracy = xgb_clf.score(X_test, y_test)
print(f"Model Accuracy: {accuracy * 100:.2f}%")


Model Accuracy: 95.61%


In [None]:
from sklearn.model_selection import GridSearchCV
import xgboost as xgb

# Define parameter grid
param_grid = {
    'alpha': [0, 0.1, 1, 5, 10],  # L1 values
    'lambda': [1, 10, 50, 100]    # L2 values
}

# Initialize model
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100)

# Grid search
grid_search = GridSearchCV(xgb_model, param_grid, cv=3, scoring='r2')
grid_search.fit(X_train, y_train)

# Best parameters
print("Best Parameters:", grid_search.best_params_)


Best Parameters: {'alpha': 0.1, 'lambda': 50}
